blob: 56ffc3f9ab53e0be613489e91e9502a4fbe1fa16 [file] [log] [blame]
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-gemminc-minmax.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/allocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
27 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
28 TEST_REQUIRES_ARM_NEON_FMA;
29 GemmMicrokernelTester()
30 .mr(1)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(1)
35 .n(8)
36 .k(8)
37 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
38 }
39
40 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_FMA;
42 GemmMicrokernelTester()
43 .mr(1)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(1)
48 .n(8)
49 .k(8)
50 .cn_stride(11)
51 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
52 }
53
54 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_strided_a) {
55 TEST_REQUIRES_ARM_NEON_FMA;
56 GemmMicrokernelTester()
57 .mr(1)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(1)
62 .n(8)
63 .k(8)
64 .a_stride(11)
65 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
66 }
67
68 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
69 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080070 for (uint32_t n = 1; n <= 8; n++) {
71 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080072 GemmMicrokernelTester()
73 .mr(1)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(n)
79 .k(8)
80 .iterations(1)
81 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
82 }
83 }
84 }
85
86 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
87 TEST_REQUIRES_ARM_NEON_FMA;
88 for (uint32_t m = 1; m <= 1; m++) {
89 GemmMicrokernelTester()
90 .mr(1)
91 .nr(8)
92 .kr(1)
93 .sr(1)
94 .m(m)
95 .n(8)
96 .k(8)
97 .iterations(1)
98 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
99 }
100 }
101
102 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
103 TEST_REQUIRES_ARM_NEON_FMA;
104 for (uint32_t n = 1; n <= 8; n++) {
105 GemmMicrokernelTester()
106 .mr(1)
107 .nr(8)
108 .kr(1)
109 .sr(1)
110 .m(1)
111 .n(n)
112 .k(8)
113 .iterations(1)
114 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
115 }
116 }
117
118 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
119 TEST_REQUIRES_ARM_NEON_FMA;
120 GemmMicrokernelTester()
121 .mr(1)
122 .nr(8)
123 .kr(1)
124 .sr(1)
125 .m(1)
126 .n(8)
127 .k(16)
128 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
129 }
130
131 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_strided_a) {
132 TEST_REQUIRES_ARM_NEON_FMA;
133 GemmMicrokernelTester()
134 .mr(1)
135 .nr(8)
136 .kr(1)
137 .sr(1)
138 .m(1)
139 .n(8)
140 .k(16)
141 .a_stride(19)
142 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
143 }
144
145 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
146 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800147 for (uint32_t n = 1; n <= 8; n++) {
148 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800149 GemmMicrokernelTester()
150 .mr(1)
151 .nr(8)
152 .kr(1)
153 .sr(1)
154 .m(m)
155 .n(n)
156 .k(16)
157 .iterations(1)
158 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
159 }
160 }
161 }
162
163 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
164 TEST_REQUIRES_ARM_NEON_FMA;
165 for (size_t k = 1; k < 16; k++) {
166 GemmMicrokernelTester()
167 .mr(1)
168 .nr(8)
169 .kr(1)
170 .sr(1)
171 .m(1)
172 .n(8)
173 .k(k)
174 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
175 }
176 }
177
178 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_strided_a) {
179 TEST_REQUIRES_ARM_NEON_FMA;
180 for (size_t k = 1; k < 16; k++) {
181 GemmMicrokernelTester()
182 .mr(1)
183 .nr(8)
184 .kr(1)
185 .sr(1)
186 .m(1)
187 .n(8)
188 .k(k)
189 .a_stride(19)
190 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
191 }
192 }
193
194 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
195 TEST_REQUIRES_ARM_NEON_FMA;
196 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800197 for (uint32_t n = 1; n <= 8; n++) {
198 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800199 GemmMicrokernelTester()
200 .mr(1)
201 .nr(8)
202 .kr(1)
203 .sr(1)
204 .m(m)
205 .n(n)
206 .k(k)
207 .iterations(1)
208 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
209 }
210 }
211 }
212 }
213
214 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
215 TEST_REQUIRES_ARM_NEON_FMA;
216 for (size_t k = 17; k < 32; k++) {
217 GemmMicrokernelTester()
218 .mr(1)
219 .nr(8)
220 .kr(1)
221 .sr(1)
222 .m(1)
223 .n(8)
224 .k(k)
225 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
226 }
227 }
228
229 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_strided_a) {
230 TEST_REQUIRES_ARM_NEON_FMA;
231 for (size_t k = 17; k < 32; k++) {
232 GemmMicrokernelTester()
233 .mr(1)
234 .nr(8)
235 .kr(1)
236 .sr(1)
237 .m(1)
238 .n(8)
239 .k(k)
240 .a_stride(37)
241 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
242 }
243 }
244
245 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
246 TEST_REQUIRES_ARM_NEON_FMA;
247 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800248 for (uint32_t n = 1; n <= 8; n++) {
249 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800250 GemmMicrokernelTester()
251 .mr(1)
252 .nr(8)
253 .kr(1)
254 .sr(1)
255 .m(m)
256 .n(n)
257 .k(k)
258 .iterations(1)
259 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
260 }
261 }
262 }
263 }
264
265 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
266 TEST_REQUIRES_ARM_NEON_FMA;
267 for (size_t k = 24; k <= 80; k += 8) {
268 GemmMicrokernelTester()
269 .mr(1)
270 .nr(8)
271 .kr(1)
272 .sr(1)
273 .m(1)
274 .n(8)
275 .k(k)
276 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
277 }
278 }
279
280 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_strided_a) {
281 TEST_REQUIRES_ARM_NEON_FMA;
282 for (size_t k = 24; k <= 80; k += 8) {
283 GemmMicrokernelTester()
284 .mr(1)
285 .nr(8)
286 .kr(1)
287 .sr(1)
288 .m(1)
289 .n(8)
290 .k(k)
291 .a_stride(83)
292 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
293 }
294 }
295
296 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
297 TEST_REQUIRES_ARM_NEON_FMA;
298 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800299 for (uint32_t n = 1; n <= 8; n++) {
300 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800301 GemmMicrokernelTester()
302 .mr(1)
303 .nr(8)
304 .kr(1)
305 .sr(1)
306 .m(m)
307 .n(n)
308 .k(k)
309 .iterations(1)
310 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
311 }
312 }
313 }
314 }
315
316 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
317 TEST_REQUIRES_ARM_NEON_FMA;
318 for (uint32_t n = 9; n < 16; n++) {
319 for (size_t k = 1; k <= 40; k += 9) {
320 GemmMicrokernelTester()
321 .mr(1)
322 .nr(8)
323 .kr(1)
324 .sr(1)
325 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800326 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800327 .k(k)
328 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
329 }
330 }
331 }
332
333 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
334 TEST_REQUIRES_ARM_NEON_FMA;
335 for (uint32_t n = 9; n < 16; n++) {
336 for (size_t k = 1; k <= 40; k += 9) {
337 GemmMicrokernelTester()
338 .mr(1)
339 .nr(8)
340 .kr(1)
341 .sr(1)
342 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800343 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800344 .k(k)
345 .cn_stride(11)
346 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
347 }
348 }
349 }
350
351 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_a) {
352 TEST_REQUIRES_ARM_NEON_FMA;
353 for (uint32_t n = 9; n < 16; n++) {
354 for (size_t k = 1; k <= 40; k += 9) {
355 GemmMicrokernelTester()
356 .mr(1)
357 .nr(8)
358 .kr(1)
359 .sr(1)
360 .m(1)
361 .n(n)
362 .k(k)
363 .a_stride(43)
364 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
365 }
366 }
367 }
368
369 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
370 TEST_REQUIRES_ARM_NEON_FMA;
371 for (uint32_t n = 9; n < 16; n++) {
372 for (size_t k = 1; k <= 40; k += 9) {
373 for (uint32_t m = 1; m <= 1; m++) {
374 GemmMicrokernelTester()
375 .mr(1)
376 .nr(8)
377 .kr(1)
378 .sr(1)
379 .m(m)
380 .n(n)
381 .k(k)
382 .iterations(1)
383 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
384 }
385 }
386 }
387 }
388
389 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
390 TEST_REQUIRES_ARM_NEON_FMA;
391 for (uint32_t n = 16; n <= 24; n += 8) {
392 for (size_t k = 1; k <= 40; k += 9) {
393 GemmMicrokernelTester()
394 .mr(1)
395 .nr(8)
396 .kr(1)
397 .sr(1)
398 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800399 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800400 .k(k)
401 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
402 }
403 }
404 }
405
406 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
407 TEST_REQUIRES_ARM_NEON_FMA;
408 for (uint32_t n = 16; n <= 24; n += 8) {
409 for (size_t k = 1; k <= 40; k += 9) {
410 GemmMicrokernelTester()
411 .mr(1)
412 .nr(8)
413 .kr(1)
414 .sr(1)
415 .m(1)
416 .n(n)
417 .k(k)
418 .cn_stride(11)
419 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
420 }
421 }
422 }
423
424 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_a) {
425 TEST_REQUIRES_ARM_NEON_FMA;
426 for (uint32_t n = 16; n <= 24; n += 8) {
427 for (size_t k = 1; k <= 40; k += 9) {
428 GemmMicrokernelTester()
429 .mr(1)
430 .nr(8)
431 .kr(1)
432 .sr(1)
433 .m(1)
434 .n(n)
435 .k(k)
436 .a_stride(43)
437 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
438 }
439 }
440 }
441
442 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
443 TEST_REQUIRES_ARM_NEON_FMA;
444 for (uint32_t n = 16; n <= 24; n += 8) {
445 for (size_t k = 1; k <= 40; k += 9) {
446 for (uint32_t m = 1; m <= 1; m++) {
447 GemmMicrokernelTester()
448 .mr(1)
449 .nr(8)
450 .kr(1)
451 .sr(1)
452 .m(m)
453 .n(n)
454 .k(k)
455 .iterations(1)
456 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
457 }
458 }
459 }
460 }
461
462 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
463 TEST_REQUIRES_ARM_NEON_FMA;
464 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800465 for (uint32_t n = 1; n <= 8; n++) {
466 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800467 GemmMicrokernelTester()
468 .mr(1)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(m)
473 .n(n)
474 .k(k)
475 .cm_stride(11)
476 .iterations(1)
477 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
478 }
479 }
480 }
481 }
482
483 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
484 TEST_REQUIRES_ARM_NEON_FMA;
485 GemmMicrokernelTester()
486 .mr(1)
487 .nr(8)
488 .kr(1)
489 .sr(1)
490 .m(1)
491 .n(8)
492 .k(8)
493 .qmin(128)
494 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
495 }
496
497 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
498 TEST_REQUIRES_ARM_NEON_FMA;
499 GemmMicrokernelTester()
500 .mr(1)
501 .nr(8)
502 .kr(1)
503 .sr(1)
504 .m(1)
505 .n(8)
506 .k(8)
507 .qmax(128)
508 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
509 }
510
511 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
512 TEST_REQUIRES_ARM_NEON_FMA;
513 GemmMicrokernelTester()
514 .mr(1)
515 .nr(8)
516 .kr(1)
517 .sr(1)
518 .m(1)
519 .n(8)
520 .k(8)
521 .cm_stride(11)
522 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
523 }
524#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
525
526
527#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
528 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
529 TEST_REQUIRES_ARM_NEON_FMA;
530 GemmMicrokernelTester()
531 .mr(4)
532 .nr(8)
533 .kr(1)
534 .sr(1)
535 .m(4)
536 .n(8)
537 .k(4)
538 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
539 }
540
541 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
542 TEST_REQUIRES_ARM_NEON_FMA;
543 GemmMicrokernelTester()
544 .mr(4)
545 .nr(8)
546 .kr(1)
547 .sr(1)
548 .m(4)
549 .n(8)
550 .k(4)
551 .cn_stride(11)
552 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
553 }
554
555 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_strided_a) {
556 TEST_REQUIRES_ARM_NEON_FMA;
557 GemmMicrokernelTester()
558 .mr(4)
559 .nr(8)
560 .kr(1)
561 .sr(1)
562 .m(4)
563 .n(8)
564 .k(4)
565 .a_stride(7)
566 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
567 }
568
569 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
570 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800571 for (uint32_t n = 1; n <= 8; n++) {
572 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800573 GemmMicrokernelTester()
574 .mr(4)
575 .nr(8)
576 .kr(1)
577 .sr(1)
578 .m(m)
579 .n(n)
580 .k(4)
581 .iterations(1)
582 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
583 }
584 }
585 }
586
587 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
588 TEST_REQUIRES_ARM_NEON_FMA;
589 for (uint32_t m = 1; m <= 4; m++) {
590 GemmMicrokernelTester()
591 .mr(4)
592 .nr(8)
593 .kr(1)
594 .sr(1)
595 .m(m)
596 .n(8)
597 .k(4)
598 .iterations(1)
599 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
600 }
601 }
602
603 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
604 TEST_REQUIRES_ARM_NEON_FMA;
605 for (uint32_t n = 1; n <= 8; n++) {
606 GemmMicrokernelTester()
607 .mr(4)
608 .nr(8)
609 .kr(1)
610 .sr(1)
611 .m(4)
612 .n(n)
613 .k(4)
614 .iterations(1)
615 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
616 }
617 }
618
619 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
620 TEST_REQUIRES_ARM_NEON_FMA;
621 GemmMicrokernelTester()
622 .mr(4)
623 .nr(8)
624 .kr(1)
625 .sr(1)
626 .m(4)
627 .n(8)
628 .k(8)
629 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
630 }
631
632 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
633 TEST_REQUIRES_ARM_NEON_FMA;
634 GemmMicrokernelTester()
635 .mr(4)
636 .nr(8)
637 .kr(1)
638 .sr(1)
639 .m(4)
640 .n(8)
641 .k(8)
642 .a_stride(11)
643 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
644 }
645
646 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
647 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800648 for (uint32_t n = 1; n <= 8; n++) {
649 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800650 GemmMicrokernelTester()
651 .mr(4)
652 .nr(8)
653 .kr(1)
654 .sr(1)
655 .m(m)
656 .n(n)
657 .k(8)
658 .iterations(1)
659 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
660 }
661 }
662 }
663
664 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
665 TEST_REQUIRES_ARM_NEON_FMA;
666 for (size_t k = 1; k < 8; k++) {
667 GemmMicrokernelTester()
668 .mr(4)
669 .nr(8)
670 .kr(1)
671 .sr(1)
672 .m(4)
673 .n(8)
674 .k(k)
675 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
676 }
677 }
678
679 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_strided_a) {
680 TEST_REQUIRES_ARM_NEON_FMA;
681 for (size_t k = 1; k < 8; k++) {
682 GemmMicrokernelTester()
683 .mr(4)
684 .nr(8)
685 .kr(1)
686 .sr(1)
687 .m(4)
688 .n(8)
689 .k(k)
690 .a_stride(11)
691 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
692 }
693 }
694
695 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
696 TEST_REQUIRES_ARM_NEON_FMA;
697 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800698 for (uint32_t n = 1; n <= 8; n++) {
699 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800700 GemmMicrokernelTester()
701 .mr(4)
702 .nr(8)
703 .kr(1)
704 .sr(1)
705 .m(m)
706 .n(n)
707 .k(k)
708 .iterations(1)
709 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
710 }
711 }
712 }
713 }
714
715 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
716 TEST_REQUIRES_ARM_NEON_FMA;
717 for (size_t k = 9; k < 16; k++) {
718 GemmMicrokernelTester()
719 .mr(4)
720 .nr(8)
721 .kr(1)
722 .sr(1)
723 .m(4)
724 .n(8)
725 .k(k)
726 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
727 }
728 }
729
730 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_strided_a) {
731 TEST_REQUIRES_ARM_NEON_FMA;
732 for (size_t k = 9; k < 16; k++) {
733 GemmMicrokernelTester()
734 .mr(4)
735 .nr(8)
736 .kr(1)
737 .sr(1)
738 .m(4)
739 .n(8)
740 .k(k)
741 .a_stride(19)
742 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
743 }
744 }
745
746 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
747 TEST_REQUIRES_ARM_NEON_FMA;
748 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800749 for (uint32_t n = 1; n <= 8; n++) {
750 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800751 GemmMicrokernelTester()
752 .mr(4)
753 .nr(8)
754 .kr(1)
755 .sr(1)
756 .m(m)
757 .n(n)
758 .k(k)
759 .iterations(1)
760 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
761 }
762 }
763 }
764 }
765
766 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
767 TEST_REQUIRES_ARM_NEON_FMA;
768 for (size_t k = 12; k <= 40; k += 4) {
769 GemmMicrokernelTester()
770 .mr(4)
771 .nr(8)
772 .kr(1)
773 .sr(1)
774 .m(4)
775 .n(8)
776 .k(k)
777 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
778 }
779 }
780
781 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_strided_a) {
782 TEST_REQUIRES_ARM_NEON_FMA;
783 for (size_t k = 12; k <= 40; k += 4) {
784 GemmMicrokernelTester()
785 .mr(4)
786 .nr(8)
787 .kr(1)
788 .sr(1)
789 .m(4)
790 .n(8)
791 .k(k)
792 .a_stride(43)
793 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
794 }
795 }
796
797 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
798 TEST_REQUIRES_ARM_NEON_FMA;
799 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800800 for (uint32_t n = 1; n <= 8; n++) {
801 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800802 GemmMicrokernelTester()
803 .mr(4)
804 .nr(8)
805 .kr(1)
806 .sr(1)
807 .m(m)
808 .n(n)
809 .k(k)
810 .iterations(1)
811 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
812 }
813 }
814 }
815 }
816
817 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
818 TEST_REQUIRES_ARM_NEON_FMA;
819 for (uint32_t n = 9; n < 16; n++) {
820 for (size_t k = 1; k <= 20; k += 5) {
821 GemmMicrokernelTester()
822 .mr(4)
823 .nr(8)
824 .kr(1)
825 .sr(1)
826 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800827 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800828 .k(k)
829 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
830 }
831 }
832 }
833
834 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
835 TEST_REQUIRES_ARM_NEON_FMA;
836 for (uint32_t n = 9; n < 16; n++) {
837 for (size_t k = 1; k <= 20; k += 5) {
838 GemmMicrokernelTester()
839 .mr(4)
840 .nr(8)
841 .kr(1)
842 .sr(1)
843 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800844 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800845 .k(k)
846 .cn_stride(11)
847 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
848 }
849 }
850 }
851
852 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_a) {
853 TEST_REQUIRES_ARM_NEON_FMA;
854 for (uint32_t n = 9; n < 16; n++) {
855 for (size_t k = 1; k <= 20; k += 5) {
856 GemmMicrokernelTester()
857 .mr(4)
858 .nr(8)
859 .kr(1)
860 .sr(1)
861 .m(4)
862 .n(n)
863 .k(k)
864 .a_stride(23)
865 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
866 }
867 }
868 }
869
870 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
871 TEST_REQUIRES_ARM_NEON_FMA;
872 for (uint32_t n = 9; n < 16; n++) {
873 for (size_t k = 1; k <= 20; k += 5) {
874 for (uint32_t m = 1; m <= 4; m++) {
875 GemmMicrokernelTester()
876 .mr(4)
877 .nr(8)
878 .kr(1)
879 .sr(1)
880 .m(m)
881 .n(n)
882 .k(k)
883 .iterations(1)
884 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
885 }
886 }
887 }
888 }
889
890 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
891 TEST_REQUIRES_ARM_NEON_FMA;
892 for (uint32_t n = 16; n <= 24; n += 8) {
893 for (size_t k = 1; k <= 20; k += 5) {
894 GemmMicrokernelTester()
895 .mr(4)
896 .nr(8)
897 .kr(1)
898 .sr(1)
899 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800900 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800901 .k(k)
902 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
903 }
904 }
905 }
906
907 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
908 TEST_REQUIRES_ARM_NEON_FMA;
909 for (uint32_t n = 16; n <= 24; n += 8) {
910 for (size_t k = 1; k <= 20; k += 5) {
911 GemmMicrokernelTester()
912 .mr(4)
913 .nr(8)
914 .kr(1)
915 .sr(1)
916 .m(4)
917 .n(n)
918 .k(k)
919 .cn_stride(11)
920 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
921 }
922 }
923 }
924
925 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_a) {
926 TEST_REQUIRES_ARM_NEON_FMA;
927 for (uint32_t n = 16; n <= 24; n += 8) {
928 for (size_t k = 1; k <= 20; k += 5) {
929 GemmMicrokernelTester()
930 .mr(4)
931 .nr(8)
932 .kr(1)
933 .sr(1)
934 .m(4)
935 .n(n)
936 .k(k)
937 .a_stride(23)
938 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
939 }
940 }
941 }
942
943 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
944 TEST_REQUIRES_ARM_NEON_FMA;
945 for (uint32_t n = 16; n <= 24; n += 8) {
946 for (size_t k = 1; k <= 20; k += 5) {
947 for (uint32_t m = 1; m <= 4; m++) {
948 GemmMicrokernelTester()
949 .mr(4)
950 .nr(8)
951 .kr(1)
952 .sr(1)
953 .m(m)
954 .n(n)
955 .k(k)
956 .iterations(1)
957 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
958 }
959 }
960 }
961 }
962
963 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
964 TEST_REQUIRES_ARM_NEON_FMA;
965 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800966 for (uint32_t n = 1; n <= 8; n++) {
967 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800968 GemmMicrokernelTester()
969 .mr(4)
970 .nr(8)
971 .kr(1)
972 .sr(1)
973 .m(m)
974 .n(n)
975 .k(k)
976 .cm_stride(11)
977 .iterations(1)
978 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
979 }
980 }
981 }
982 }
983
984 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
985 TEST_REQUIRES_ARM_NEON_FMA;
986 GemmMicrokernelTester()
987 .mr(4)
988 .nr(8)
989 .kr(1)
990 .sr(1)
991 .m(4)
992 .n(8)
993 .k(4)
994 .qmin(128)
995 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
996 }
997
998 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
999 TEST_REQUIRES_ARM_NEON_FMA;
1000 GemmMicrokernelTester()
1001 .mr(4)
1002 .nr(8)
1003 .kr(1)
1004 .sr(1)
1005 .m(4)
1006 .n(8)
1007 .k(4)
1008 .qmax(128)
1009 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1010 }
1011
1012 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
1013 TEST_REQUIRES_ARM_NEON_FMA;
1014 GemmMicrokernelTester()
1015 .mr(4)
1016 .nr(8)
1017 .kr(1)
1018 .sr(1)
1019 .m(4)
1020 .n(8)
1021 .k(4)
1022 .cm_stride(11)
1023 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1024 }
1025#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1026
1027
1028#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1029 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
1030 TEST_REQUIRES_ARM_NEON_FMA;
1031 GemmMicrokernelTester()
1032 .mr(5)
1033 .nr(8)
1034 .kr(1)
1035 .sr(1)
1036 .m(5)
1037 .n(8)
1038 .k(8)
1039 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1040 }
1041
1042 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
1043 TEST_REQUIRES_ARM_NEON_FMA;
1044 GemmMicrokernelTester()
1045 .mr(5)
1046 .nr(8)
1047 .kr(1)
1048 .sr(1)
1049 .m(5)
1050 .n(8)
1051 .k(8)
1052 .cn_stride(11)
1053 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1054 }
1055
1056 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
1057 TEST_REQUIRES_ARM_NEON_FMA;
1058 GemmMicrokernelTester()
1059 .mr(5)
1060 .nr(8)
1061 .kr(1)
1062 .sr(1)
1063 .m(5)
1064 .n(8)
1065 .k(8)
1066 .a_stride(11)
1067 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1068 }
1069
1070 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
1071 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001072 for (uint32_t n = 1; n <= 8; n++) {
1073 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001074 GemmMicrokernelTester()
1075 .mr(5)
1076 .nr(8)
1077 .kr(1)
1078 .sr(1)
1079 .m(m)
1080 .n(n)
1081 .k(8)
1082 .iterations(1)
1083 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1084 }
1085 }
1086 }
1087
1088 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
1089 TEST_REQUIRES_ARM_NEON_FMA;
1090 for (uint32_t m = 1; m <= 5; m++) {
1091 GemmMicrokernelTester()
1092 .mr(5)
1093 .nr(8)
1094 .kr(1)
1095 .sr(1)
1096 .m(m)
1097 .n(8)
1098 .k(8)
1099 .iterations(1)
1100 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1101 }
1102 }
1103
1104 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
1105 TEST_REQUIRES_ARM_NEON_FMA;
1106 for (uint32_t n = 1; n <= 8; n++) {
1107 GemmMicrokernelTester()
1108 .mr(5)
1109 .nr(8)
1110 .kr(1)
1111 .sr(1)
1112 .m(5)
1113 .n(n)
1114 .k(8)
1115 .iterations(1)
1116 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1117 }
1118 }
1119
1120 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
1121 TEST_REQUIRES_ARM_NEON_FMA;
1122 GemmMicrokernelTester()
1123 .mr(5)
1124 .nr(8)
1125 .kr(1)
1126 .sr(1)
1127 .m(5)
1128 .n(8)
1129 .k(16)
1130 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1131 }
1132
1133 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
1134 TEST_REQUIRES_ARM_NEON_FMA;
1135 GemmMicrokernelTester()
1136 .mr(5)
1137 .nr(8)
1138 .kr(1)
1139 .sr(1)
1140 .m(5)
1141 .n(8)
1142 .k(16)
1143 .a_stride(19)
1144 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1145 }
1146
1147 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
1148 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001149 for (uint32_t n = 1; n <= 8; n++) {
1150 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001151 GemmMicrokernelTester()
1152 .mr(5)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(m)
1157 .n(n)
1158 .k(16)
1159 .iterations(1)
1160 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1161 }
1162 }
1163 }
1164
1165 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
1166 TEST_REQUIRES_ARM_NEON_FMA;
1167 for (size_t k = 1; k < 16; k++) {
1168 GemmMicrokernelTester()
1169 .mr(5)
1170 .nr(8)
1171 .kr(1)
1172 .sr(1)
1173 .m(5)
1174 .n(8)
1175 .k(k)
1176 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1177 }
1178 }
1179
1180 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
1181 TEST_REQUIRES_ARM_NEON_FMA;
1182 for (size_t k = 1; k < 16; k++) {
1183 GemmMicrokernelTester()
1184 .mr(5)
1185 .nr(8)
1186 .kr(1)
1187 .sr(1)
1188 .m(5)
1189 .n(8)
1190 .k(k)
1191 .a_stride(19)
1192 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1193 }
1194 }
1195
1196 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
1197 TEST_REQUIRES_ARM_NEON_FMA;
1198 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001199 for (uint32_t n = 1; n <= 8; n++) {
1200 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001201 GemmMicrokernelTester()
1202 .mr(5)
1203 .nr(8)
1204 .kr(1)
1205 .sr(1)
1206 .m(m)
1207 .n(n)
1208 .k(k)
1209 .iterations(1)
1210 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1211 }
1212 }
1213 }
1214 }
1215
1216 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
1217 TEST_REQUIRES_ARM_NEON_FMA;
1218 for (size_t k = 17; k < 32; k++) {
1219 GemmMicrokernelTester()
1220 .mr(5)
1221 .nr(8)
1222 .kr(1)
1223 .sr(1)
1224 .m(5)
1225 .n(8)
1226 .k(k)
1227 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1228 }
1229 }
1230
1231 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
1232 TEST_REQUIRES_ARM_NEON_FMA;
1233 for (size_t k = 17; k < 32; k++) {
1234 GemmMicrokernelTester()
1235 .mr(5)
1236 .nr(8)
1237 .kr(1)
1238 .sr(1)
1239 .m(5)
1240 .n(8)
1241 .k(k)
1242 .a_stride(37)
1243 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1244 }
1245 }
1246
1247 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
1248 TEST_REQUIRES_ARM_NEON_FMA;
1249 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001250 for (uint32_t n = 1; n <= 8; n++) {
1251 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001252 GemmMicrokernelTester()
1253 .mr(5)
1254 .nr(8)
1255 .kr(1)
1256 .sr(1)
1257 .m(m)
1258 .n(n)
1259 .k(k)
1260 .iterations(1)
1261 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1262 }
1263 }
1264 }
1265 }
1266
1267 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
1268 TEST_REQUIRES_ARM_NEON_FMA;
1269 for (size_t k = 24; k <= 80; k += 8) {
1270 GemmMicrokernelTester()
1271 .mr(5)
1272 .nr(8)
1273 .kr(1)
1274 .sr(1)
1275 .m(5)
1276 .n(8)
1277 .k(k)
1278 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1279 }
1280 }
1281
1282 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
1283 TEST_REQUIRES_ARM_NEON_FMA;
1284 for (size_t k = 24; k <= 80; k += 8) {
1285 GemmMicrokernelTester()
1286 .mr(5)
1287 .nr(8)
1288 .kr(1)
1289 .sr(1)
1290 .m(5)
1291 .n(8)
1292 .k(k)
1293 .a_stride(83)
1294 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1295 }
1296 }
1297
1298 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
1299 TEST_REQUIRES_ARM_NEON_FMA;
1300 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001301 for (uint32_t n = 1; n <= 8; n++) {
1302 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001303 GemmMicrokernelTester()
1304 .mr(5)
1305 .nr(8)
1306 .kr(1)
1307 .sr(1)
1308 .m(m)
1309 .n(n)
1310 .k(k)
1311 .iterations(1)
1312 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1313 }
1314 }
1315 }
1316 }
1317
1318 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
1319 TEST_REQUIRES_ARM_NEON_FMA;
1320 for (uint32_t n = 9; n < 16; n++) {
1321 for (size_t k = 1; k <= 40; k += 9) {
1322 GemmMicrokernelTester()
1323 .mr(5)
1324 .nr(8)
1325 .kr(1)
1326 .sr(1)
1327 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001328 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001329 .k(k)
1330 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1331 }
1332 }
1333 }
1334
1335 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
1336 TEST_REQUIRES_ARM_NEON_FMA;
1337 for (uint32_t n = 9; n < 16; n++) {
1338 for (size_t k = 1; k <= 40; k += 9) {
1339 GemmMicrokernelTester()
1340 .mr(5)
1341 .nr(8)
1342 .kr(1)
1343 .sr(1)
1344 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001345 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001346 .k(k)
1347 .cn_stride(11)
1348 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1349 }
1350 }
1351 }
1352
1353 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
1354 TEST_REQUIRES_ARM_NEON_FMA;
1355 for (uint32_t n = 9; n < 16; n++) {
1356 for (size_t k = 1; k <= 40; k += 9) {
1357 GemmMicrokernelTester()
1358 .mr(5)
1359 .nr(8)
1360 .kr(1)
1361 .sr(1)
1362 .m(5)
1363 .n(n)
1364 .k(k)
1365 .a_stride(43)
1366 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1367 }
1368 }
1369 }
1370
1371 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
1372 TEST_REQUIRES_ARM_NEON_FMA;
1373 for (uint32_t n = 9; n < 16; n++) {
1374 for (size_t k = 1; k <= 40; k += 9) {
1375 for (uint32_t m = 1; m <= 5; m++) {
1376 GemmMicrokernelTester()
1377 .mr(5)
1378 .nr(8)
1379 .kr(1)
1380 .sr(1)
1381 .m(m)
1382 .n(n)
1383 .k(k)
1384 .iterations(1)
1385 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1386 }
1387 }
1388 }
1389 }
1390
1391 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
1392 TEST_REQUIRES_ARM_NEON_FMA;
1393 for (uint32_t n = 16; n <= 24; n += 8) {
1394 for (size_t k = 1; k <= 40; k += 9) {
1395 GemmMicrokernelTester()
1396 .mr(5)
1397 .nr(8)
1398 .kr(1)
1399 .sr(1)
1400 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001401 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001402 .k(k)
1403 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1404 }
1405 }
1406 }
1407
1408 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
1409 TEST_REQUIRES_ARM_NEON_FMA;
1410 for (uint32_t n = 16; n <= 24; n += 8) {
1411 for (size_t k = 1; k <= 40; k += 9) {
1412 GemmMicrokernelTester()
1413 .mr(5)
1414 .nr(8)
1415 .kr(1)
1416 .sr(1)
1417 .m(5)
1418 .n(n)
1419 .k(k)
1420 .cn_stride(11)
1421 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1422 }
1423 }
1424 }
1425
1426 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
1427 TEST_REQUIRES_ARM_NEON_FMA;
1428 for (uint32_t n = 16; n <= 24; n += 8) {
1429 for (size_t k = 1; k <= 40; k += 9) {
1430 GemmMicrokernelTester()
1431 .mr(5)
1432 .nr(8)
1433 .kr(1)
1434 .sr(1)
1435 .m(5)
1436 .n(n)
1437 .k(k)
1438 .a_stride(43)
1439 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1440 }
1441 }
1442 }
1443
1444 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
1445 TEST_REQUIRES_ARM_NEON_FMA;
1446 for (uint32_t n = 16; n <= 24; n += 8) {
1447 for (size_t k = 1; k <= 40; k += 9) {
1448 for (uint32_t m = 1; m <= 5; m++) {
1449 GemmMicrokernelTester()
1450 .mr(5)
1451 .nr(8)
1452 .kr(1)
1453 .sr(1)
1454 .m(m)
1455 .n(n)
1456 .k(k)
1457 .iterations(1)
1458 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1459 }
1460 }
1461 }
1462 }
1463
1464 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
1465 TEST_REQUIRES_ARM_NEON_FMA;
1466 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001467 for (uint32_t n = 1; n <= 8; n++) {
1468 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001469 GemmMicrokernelTester()
1470 .mr(5)
1471 .nr(8)
1472 .kr(1)
1473 .sr(1)
1474 .m(m)
1475 .n(n)
1476 .k(k)
1477 .cm_stride(11)
1478 .iterations(1)
1479 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1480 }
1481 }
1482 }
1483 }
1484
1485 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
1486 TEST_REQUIRES_ARM_NEON_FMA;
1487 GemmMicrokernelTester()
1488 .mr(5)
1489 .nr(8)
1490 .kr(1)
1491 .sr(1)
1492 .m(5)
1493 .n(8)
1494 .k(8)
1495 .qmin(128)
1496 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1497 }
1498
1499 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
1500 TEST_REQUIRES_ARM_NEON_FMA;
1501 GemmMicrokernelTester()
1502 .mr(5)
1503 .nr(8)
1504 .kr(1)
1505 .sr(1)
1506 .m(5)
1507 .n(8)
1508 .k(8)
1509 .qmax(128)
1510 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1511 }
1512
1513 TEST(F32_GEMMINC_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
1514 TEST_REQUIRES_ARM_NEON_FMA;
1515 GemmMicrokernelTester()
1516 .mr(5)
1517 .nr(8)
1518 .kr(1)
1519 .sr(1)
1520 .m(5)
1521 .n(8)
1522 .k(8)
1523 .cm_stride(11)
1524 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1525 }
1526#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1527
1528
1529#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1530 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
1531 TEST_REQUIRES_ARM_NEON_FMA;
1532 GemmMicrokernelTester()
1533 .mr(6)
1534 .nr(8)
1535 .kr(1)
1536 .sr(1)
1537 .m(6)
1538 .n(8)
1539 .k(4)
1540 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1541 }
1542
1543 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
1544 TEST_REQUIRES_ARM_NEON_FMA;
1545 GemmMicrokernelTester()
1546 .mr(6)
1547 .nr(8)
1548 .kr(1)
1549 .sr(1)
1550 .m(6)
1551 .n(8)
1552 .k(4)
1553 .cn_stride(11)
1554 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1555 }
1556
1557 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_strided_a) {
1558 TEST_REQUIRES_ARM_NEON_FMA;
1559 GemmMicrokernelTester()
1560 .mr(6)
1561 .nr(8)
1562 .kr(1)
1563 .sr(1)
1564 .m(6)
1565 .n(8)
1566 .k(4)
1567 .a_stride(7)
1568 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1569 }
1570
1571 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
1572 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001573 for (uint32_t n = 1; n <= 8; n++) {
1574 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001575 GemmMicrokernelTester()
1576 .mr(6)
1577 .nr(8)
1578 .kr(1)
1579 .sr(1)
1580 .m(m)
1581 .n(n)
1582 .k(4)
1583 .iterations(1)
1584 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1585 }
1586 }
1587 }
1588
1589 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
1590 TEST_REQUIRES_ARM_NEON_FMA;
1591 for (uint32_t m = 1; m <= 6; m++) {
1592 GemmMicrokernelTester()
1593 .mr(6)
1594 .nr(8)
1595 .kr(1)
1596 .sr(1)
1597 .m(m)
1598 .n(8)
1599 .k(4)
1600 .iterations(1)
1601 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1602 }
1603 }
1604
1605 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
1606 TEST_REQUIRES_ARM_NEON_FMA;
1607 for (uint32_t n = 1; n <= 8; n++) {
1608 GemmMicrokernelTester()
1609 .mr(6)
1610 .nr(8)
1611 .kr(1)
1612 .sr(1)
1613 .m(6)
1614 .n(n)
1615 .k(4)
1616 .iterations(1)
1617 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1618 }
1619 }
1620
1621 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
1622 TEST_REQUIRES_ARM_NEON_FMA;
1623 GemmMicrokernelTester()
1624 .mr(6)
1625 .nr(8)
1626 .kr(1)
1627 .sr(1)
1628 .m(6)
1629 .n(8)
1630 .k(8)
1631 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1632 }
1633
1634 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
1635 TEST_REQUIRES_ARM_NEON_FMA;
1636 GemmMicrokernelTester()
1637 .mr(6)
1638 .nr(8)
1639 .kr(1)
1640 .sr(1)
1641 .m(6)
1642 .n(8)
1643 .k(8)
1644 .a_stride(11)
1645 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1646 }
1647
1648 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
1649 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001650 for (uint32_t n = 1; n <= 8; n++) {
1651 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001652 GemmMicrokernelTester()
1653 .mr(6)
1654 .nr(8)
1655 .kr(1)
1656 .sr(1)
1657 .m(m)
1658 .n(n)
1659 .k(8)
1660 .iterations(1)
1661 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1662 }
1663 }
1664 }
1665
1666 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
1667 TEST_REQUIRES_ARM_NEON_FMA;
1668 for (size_t k = 1; k < 8; k++) {
1669 GemmMicrokernelTester()
1670 .mr(6)
1671 .nr(8)
1672 .kr(1)
1673 .sr(1)
1674 .m(6)
1675 .n(8)
1676 .k(k)
1677 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1678 }
1679 }
1680
1681 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_strided_a) {
1682 TEST_REQUIRES_ARM_NEON_FMA;
1683 for (size_t k = 1; k < 8; k++) {
1684 GemmMicrokernelTester()
1685 .mr(6)
1686 .nr(8)
1687 .kr(1)
1688 .sr(1)
1689 .m(6)
1690 .n(8)
1691 .k(k)
1692 .a_stride(11)
1693 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1694 }
1695 }
1696
1697 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
1698 TEST_REQUIRES_ARM_NEON_FMA;
1699 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001700 for (uint32_t n = 1; n <= 8; n++) {
1701 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001702 GemmMicrokernelTester()
1703 .mr(6)
1704 .nr(8)
1705 .kr(1)
1706 .sr(1)
1707 .m(m)
1708 .n(n)
1709 .k(k)
1710 .iterations(1)
1711 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1712 }
1713 }
1714 }
1715 }
1716
1717 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
1718 TEST_REQUIRES_ARM_NEON_FMA;
1719 for (size_t k = 9; k < 16; k++) {
1720 GemmMicrokernelTester()
1721 .mr(6)
1722 .nr(8)
1723 .kr(1)
1724 .sr(1)
1725 .m(6)
1726 .n(8)
1727 .k(k)
1728 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1729 }
1730 }
1731
1732 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_strided_a) {
1733 TEST_REQUIRES_ARM_NEON_FMA;
1734 for (size_t k = 9; k < 16; k++) {
1735 GemmMicrokernelTester()
1736 .mr(6)
1737 .nr(8)
1738 .kr(1)
1739 .sr(1)
1740 .m(6)
1741 .n(8)
1742 .k(k)
1743 .a_stride(19)
1744 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1745 }
1746 }
1747
1748 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
1749 TEST_REQUIRES_ARM_NEON_FMA;
1750 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001751 for (uint32_t n = 1; n <= 8; n++) {
1752 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001753 GemmMicrokernelTester()
1754 .mr(6)
1755 .nr(8)
1756 .kr(1)
1757 .sr(1)
1758 .m(m)
1759 .n(n)
1760 .k(k)
1761 .iterations(1)
1762 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1763 }
1764 }
1765 }
1766 }
1767
1768 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
1769 TEST_REQUIRES_ARM_NEON_FMA;
1770 for (size_t k = 12; k <= 40; k += 4) {
1771 GemmMicrokernelTester()
1772 .mr(6)
1773 .nr(8)
1774 .kr(1)
1775 .sr(1)
1776 .m(6)
1777 .n(8)
1778 .k(k)
1779 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1780 }
1781 }
1782
1783 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_strided_a) {
1784 TEST_REQUIRES_ARM_NEON_FMA;
1785 for (size_t k = 12; k <= 40; k += 4) {
1786 GemmMicrokernelTester()
1787 .mr(6)
1788 .nr(8)
1789 .kr(1)
1790 .sr(1)
1791 .m(6)
1792 .n(8)
1793 .k(k)
1794 .a_stride(43)
1795 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1796 }
1797 }
1798
1799 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
1800 TEST_REQUIRES_ARM_NEON_FMA;
1801 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001802 for (uint32_t n = 1; n <= 8; n++) {
1803 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001804 GemmMicrokernelTester()
1805 .mr(6)
1806 .nr(8)
1807 .kr(1)
1808 .sr(1)
1809 .m(m)
1810 .n(n)
1811 .k(k)
1812 .iterations(1)
1813 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1814 }
1815 }
1816 }
1817 }
1818
1819 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
1820 TEST_REQUIRES_ARM_NEON_FMA;
1821 for (uint32_t n = 9; n < 16; n++) {
1822 for (size_t k = 1; k <= 20; k += 5) {
1823 GemmMicrokernelTester()
1824 .mr(6)
1825 .nr(8)
1826 .kr(1)
1827 .sr(1)
1828 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001829 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001830 .k(k)
1831 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1832 }
1833 }
1834 }
1835
1836 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
1837 TEST_REQUIRES_ARM_NEON_FMA;
1838 for (uint32_t n = 9; n < 16; n++) {
1839 for (size_t k = 1; k <= 20; k += 5) {
1840 GemmMicrokernelTester()
1841 .mr(6)
1842 .nr(8)
1843 .kr(1)
1844 .sr(1)
1845 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001846 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001847 .k(k)
1848 .cn_stride(11)
1849 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1850 }
1851 }
1852 }
1853
1854 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_a) {
1855 TEST_REQUIRES_ARM_NEON_FMA;
1856 for (uint32_t n = 9; n < 16; n++) {
1857 for (size_t k = 1; k <= 20; k += 5) {
1858 GemmMicrokernelTester()
1859 .mr(6)
1860 .nr(8)
1861 .kr(1)
1862 .sr(1)
1863 .m(6)
1864 .n(n)
1865 .k(k)
1866 .a_stride(23)
1867 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1868 }
1869 }
1870 }
1871
1872 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
1873 TEST_REQUIRES_ARM_NEON_FMA;
1874 for (uint32_t n = 9; n < 16; n++) {
1875 for (size_t k = 1; k <= 20; k += 5) {
1876 for (uint32_t m = 1; m <= 6; m++) {
1877 GemmMicrokernelTester()
1878 .mr(6)
1879 .nr(8)
1880 .kr(1)
1881 .sr(1)
1882 .m(m)
1883 .n(n)
1884 .k(k)
1885 .iterations(1)
1886 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1887 }
1888 }
1889 }
1890 }
1891
1892 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
1893 TEST_REQUIRES_ARM_NEON_FMA;
1894 for (uint32_t n = 16; n <= 24; n += 8) {
1895 for (size_t k = 1; k <= 20; k += 5) {
1896 GemmMicrokernelTester()
1897 .mr(6)
1898 .nr(8)
1899 .kr(1)
1900 .sr(1)
1901 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001902 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001903 .k(k)
1904 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1905 }
1906 }
1907 }
1908
1909 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
1910 TEST_REQUIRES_ARM_NEON_FMA;
1911 for (uint32_t n = 16; n <= 24; n += 8) {
1912 for (size_t k = 1; k <= 20; k += 5) {
1913 GemmMicrokernelTester()
1914 .mr(6)
1915 .nr(8)
1916 .kr(1)
1917 .sr(1)
1918 .m(6)
1919 .n(n)
1920 .k(k)
1921 .cn_stride(11)
1922 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1923 }
1924 }
1925 }
1926
1927 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_a) {
1928 TEST_REQUIRES_ARM_NEON_FMA;
1929 for (uint32_t n = 16; n <= 24; n += 8) {
1930 for (size_t k = 1; k <= 20; k += 5) {
1931 GemmMicrokernelTester()
1932 .mr(6)
1933 .nr(8)
1934 .kr(1)
1935 .sr(1)
1936 .m(6)
1937 .n(n)
1938 .k(k)
1939 .a_stride(23)
1940 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1941 }
1942 }
1943 }
1944
1945 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
1946 TEST_REQUIRES_ARM_NEON_FMA;
1947 for (uint32_t n = 16; n <= 24; n += 8) {
1948 for (size_t k = 1; k <= 20; k += 5) {
1949 for (uint32_t m = 1; m <= 6; m++) {
1950 GemmMicrokernelTester()
1951 .mr(6)
1952 .nr(8)
1953 .kr(1)
1954 .sr(1)
1955 .m(m)
1956 .n(n)
1957 .k(k)
1958 .iterations(1)
1959 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1960 }
1961 }
1962 }
1963 }
1964
1965 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
1966 TEST_REQUIRES_ARM_NEON_FMA;
1967 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001968 for (uint32_t n = 1; n <= 8; n++) {
1969 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001970 GemmMicrokernelTester()
1971 .mr(6)
1972 .nr(8)
1973 .kr(1)
1974 .sr(1)
1975 .m(m)
1976 .n(n)
1977 .k(k)
1978 .cm_stride(11)
1979 .iterations(1)
1980 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1981 }
1982 }
1983 }
1984 }
1985
1986 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
1987 TEST_REQUIRES_ARM_NEON_FMA;
1988 GemmMicrokernelTester()
1989 .mr(6)
1990 .nr(8)
1991 .kr(1)
1992 .sr(1)
1993 .m(6)
1994 .n(8)
1995 .k(4)
1996 .qmin(128)
1997 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1998 }
1999
2000 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
2001 TEST_REQUIRES_ARM_NEON_FMA;
2002 GemmMicrokernelTester()
2003 .mr(6)
2004 .nr(8)
2005 .kr(1)
2006 .sr(1)
2007 .m(6)
2008 .n(8)
2009 .k(4)
2010 .qmax(128)
2011 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2012 }
2013
2014 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
2015 TEST_REQUIRES_ARM_NEON_FMA;
2016 GemmMicrokernelTester()
2017 .mr(6)
2018 .nr(8)
2019 .kr(1)
2020 .sr(1)
2021 .m(6)
2022 .n(8)
2023 .k(4)
2024 .cm_stride(11)
2025 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
2026 }
2027#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2028
2029
2030#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2031 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8) {
2032 TEST_REQUIRES_ARM_NEON_FMA;
2033 GemmMicrokernelTester()
2034 .mr(6)
2035 .nr(8)
2036 .kr(1)
2037 .sr(1)
2038 .m(6)
2039 .n(8)
2040 .k(8)
2041 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2042 }
2043
2044 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cn) {
2045 TEST_REQUIRES_ARM_NEON_FMA;
2046 GemmMicrokernelTester()
2047 .mr(6)
2048 .nr(8)
2049 .kr(1)
2050 .sr(1)
2051 .m(6)
2052 .n(8)
2053 .k(8)
2054 .cn_stride(11)
2055 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2056 }
2057
2058 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_strided_a) {
2059 TEST_REQUIRES_ARM_NEON_FMA;
2060 GemmMicrokernelTester()
2061 .mr(6)
2062 .nr(8)
2063 .kr(1)
2064 .sr(1)
2065 .m(6)
2066 .n(8)
2067 .k(8)
2068 .a_stride(11)
2069 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2070 }
2071
2072 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile) {
2073 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002074 for (uint32_t n = 1; n <= 8; n++) {
2075 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002076 GemmMicrokernelTester()
2077 .mr(6)
2078 .nr(8)
2079 .kr(1)
2080 .sr(1)
2081 .m(m)
2082 .n(n)
2083 .k(8)
2084 .iterations(1)
2085 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2086 }
2087 }
2088 }
2089
2090 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile_m) {
2091 TEST_REQUIRES_ARM_NEON_FMA;
2092 for (uint32_t m = 1; m <= 6; m++) {
2093 GemmMicrokernelTester()
2094 .mr(6)
2095 .nr(8)
2096 .kr(1)
2097 .sr(1)
2098 .m(m)
2099 .n(8)
2100 .k(8)
2101 .iterations(1)
2102 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2103 }
2104 }
2105
2106 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile_n) {
2107 TEST_REQUIRES_ARM_NEON_FMA;
2108 for (uint32_t n = 1; n <= 8; n++) {
2109 GemmMicrokernelTester()
2110 .mr(6)
2111 .nr(8)
2112 .kr(1)
2113 .sr(1)
2114 .m(6)
2115 .n(n)
2116 .k(8)
2117 .iterations(1)
2118 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2119 }
2120 }
2121
2122 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16) {
2123 TEST_REQUIRES_ARM_NEON_FMA;
2124 GemmMicrokernelTester()
2125 .mr(6)
2126 .nr(8)
2127 .kr(1)
2128 .sr(1)
2129 .m(6)
2130 .n(8)
2131 .k(16)
2132 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2133 }
2134
2135 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16_strided_a) {
2136 TEST_REQUIRES_ARM_NEON_FMA;
2137 GemmMicrokernelTester()
2138 .mr(6)
2139 .nr(8)
2140 .kr(1)
2141 .sr(1)
2142 .m(6)
2143 .n(8)
2144 .k(16)
2145 .a_stride(19)
2146 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2147 }
2148
2149 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16_subtile) {
2150 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002151 for (uint32_t n = 1; n <= 8; n++) {
2152 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002153 GemmMicrokernelTester()
2154 .mr(6)
2155 .nr(8)
2156 .kr(1)
2157 .sr(1)
2158 .m(m)
2159 .n(n)
2160 .k(16)
2161 .iterations(1)
2162 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2163 }
2164 }
2165 }
2166
2167 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16) {
2168 TEST_REQUIRES_ARM_NEON_FMA;
2169 for (size_t k = 1; k < 16; k++) {
2170 GemmMicrokernelTester()
2171 .mr(6)
2172 .nr(8)
2173 .kr(1)
2174 .sr(1)
2175 .m(6)
2176 .n(8)
2177 .k(k)
2178 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2179 }
2180 }
2181
2182 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16_strided_a) {
2183 TEST_REQUIRES_ARM_NEON_FMA;
2184 for (size_t k = 1; k < 16; k++) {
2185 GemmMicrokernelTester()
2186 .mr(6)
2187 .nr(8)
2188 .kr(1)
2189 .sr(1)
2190 .m(6)
2191 .n(8)
2192 .k(k)
2193 .a_stride(19)
2194 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2195 }
2196 }
2197
2198 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16_subtile) {
2199 TEST_REQUIRES_ARM_NEON_FMA;
2200 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002201 for (uint32_t n = 1; n <= 8; n++) {
2202 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002203 GemmMicrokernelTester()
2204 .mr(6)
2205 .nr(8)
2206 .kr(1)
2207 .sr(1)
2208 .m(m)
2209 .n(n)
2210 .k(k)
2211 .iterations(1)
2212 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2213 }
2214 }
2215 }
2216 }
2217
2218 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16) {
2219 TEST_REQUIRES_ARM_NEON_FMA;
2220 for (size_t k = 17; k < 32; k++) {
2221 GemmMicrokernelTester()
2222 .mr(6)
2223 .nr(8)
2224 .kr(1)
2225 .sr(1)
2226 .m(6)
2227 .n(8)
2228 .k(k)
2229 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2230 }
2231 }
2232
2233 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16_strided_a) {
2234 TEST_REQUIRES_ARM_NEON_FMA;
2235 for (size_t k = 17; k < 32; k++) {
2236 GemmMicrokernelTester()
2237 .mr(6)
2238 .nr(8)
2239 .kr(1)
2240 .sr(1)
2241 .m(6)
2242 .n(8)
2243 .k(k)
2244 .a_stride(37)
2245 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2246 }
2247 }
2248
2249 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16_subtile) {
2250 TEST_REQUIRES_ARM_NEON_FMA;
2251 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002252 for (uint32_t n = 1; n <= 8; n++) {
2253 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002254 GemmMicrokernelTester()
2255 .mr(6)
2256 .nr(8)
2257 .kr(1)
2258 .sr(1)
2259 .m(m)
2260 .n(n)
2261 .k(k)
2262 .iterations(1)
2263 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2264 }
2265 }
2266 }
2267 }
2268
2269 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8) {
2270 TEST_REQUIRES_ARM_NEON_FMA;
2271 for (size_t k = 24; k <= 80; k += 8) {
2272 GemmMicrokernelTester()
2273 .mr(6)
2274 .nr(8)
2275 .kr(1)
2276 .sr(1)
2277 .m(6)
2278 .n(8)
2279 .k(k)
2280 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2281 }
2282 }
2283
2284 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8_strided_a) {
2285 TEST_REQUIRES_ARM_NEON_FMA;
2286 for (size_t k = 24; k <= 80; k += 8) {
2287 GemmMicrokernelTester()
2288 .mr(6)
2289 .nr(8)
2290 .kr(1)
2291 .sr(1)
2292 .m(6)
2293 .n(8)
2294 .k(k)
2295 .a_stride(83)
2296 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2297 }
2298 }
2299
2300 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8_subtile) {
2301 TEST_REQUIRES_ARM_NEON_FMA;
2302 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002303 for (uint32_t n = 1; n <= 8; n++) {
2304 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002305 GemmMicrokernelTester()
2306 .mr(6)
2307 .nr(8)
2308 .kr(1)
2309 .sr(1)
2310 .m(m)
2311 .n(n)
2312 .k(k)
2313 .iterations(1)
2314 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2315 }
2316 }
2317 }
2318 }
2319
2320 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8) {
2321 TEST_REQUIRES_ARM_NEON_FMA;
2322 for (uint32_t n = 9; n < 16; n++) {
2323 for (size_t k = 1; k <= 40; k += 9) {
2324 GemmMicrokernelTester()
2325 .mr(6)
2326 .nr(8)
2327 .kr(1)
2328 .sr(1)
2329 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002330 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002331 .k(k)
2332 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2333 }
2334 }
2335 }
2336
2337 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_strided_cn) {
2338 TEST_REQUIRES_ARM_NEON_FMA;
2339 for (uint32_t n = 9; n < 16; n++) {
2340 for (size_t k = 1; k <= 40; k += 9) {
2341 GemmMicrokernelTester()
2342 .mr(6)
2343 .nr(8)
2344 .kr(1)
2345 .sr(1)
2346 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002347 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002348 .k(k)
2349 .cn_stride(11)
2350 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2351 }
2352 }
2353 }
2354
2355 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_strided_a) {
2356 TEST_REQUIRES_ARM_NEON_FMA;
2357 for (uint32_t n = 9; n < 16; n++) {
2358 for (size_t k = 1; k <= 40; k += 9) {
2359 GemmMicrokernelTester()
2360 .mr(6)
2361 .nr(8)
2362 .kr(1)
2363 .sr(1)
2364 .m(6)
2365 .n(n)
2366 .k(k)
2367 .a_stride(43)
2368 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2369 }
2370 }
2371 }
2372
2373 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_subtile) {
2374 TEST_REQUIRES_ARM_NEON_FMA;
2375 for (uint32_t n = 9; n < 16; n++) {
2376 for (size_t k = 1; k <= 40; k += 9) {
2377 for (uint32_t m = 1; m <= 6; m++) {
2378 GemmMicrokernelTester()
2379 .mr(6)
2380 .nr(8)
2381 .kr(1)
2382 .sr(1)
2383 .m(m)
2384 .n(n)
2385 .k(k)
2386 .iterations(1)
2387 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2388 }
2389 }
2390 }
2391 }
2392
2393 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8) {
2394 TEST_REQUIRES_ARM_NEON_FMA;
2395 for (uint32_t n = 16; n <= 24; n += 8) {
2396 for (size_t k = 1; k <= 40; k += 9) {
2397 GemmMicrokernelTester()
2398 .mr(6)
2399 .nr(8)
2400 .kr(1)
2401 .sr(1)
2402 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002403 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002404 .k(k)
2405 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2406 }
2407 }
2408 }
2409
2410 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_strided_cn) {
2411 TEST_REQUIRES_ARM_NEON_FMA;
2412 for (uint32_t n = 16; n <= 24; n += 8) {
2413 for (size_t k = 1; k <= 40; k += 9) {
2414 GemmMicrokernelTester()
2415 .mr(6)
2416 .nr(8)
2417 .kr(1)
2418 .sr(1)
2419 .m(6)
2420 .n(n)
2421 .k(k)
2422 .cn_stride(11)
2423 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2424 }
2425 }
2426 }
2427
2428 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_strided_a) {
2429 TEST_REQUIRES_ARM_NEON_FMA;
2430 for (uint32_t n = 16; n <= 24; n += 8) {
2431 for (size_t k = 1; k <= 40; k += 9) {
2432 GemmMicrokernelTester()
2433 .mr(6)
2434 .nr(8)
2435 .kr(1)
2436 .sr(1)
2437 .m(6)
2438 .n(n)
2439 .k(k)
2440 .a_stride(43)
2441 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2442 }
2443 }
2444 }
2445
2446 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_subtile) {
2447 TEST_REQUIRES_ARM_NEON_FMA;
2448 for (uint32_t n = 16; n <= 24; n += 8) {
2449 for (size_t k = 1; k <= 40; k += 9) {
2450 for (uint32_t m = 1; m <= 6; m++) {
2451 GemmMicrokernelTester()
2452 .mr(6)
2453 .nr(8)
2454 .kr(1)
2455 .sr(1)
2456 .m(m)
2457 .n(n)
2458 .k(k)
2459 .iterations(1)
2460 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2461 }
2462 }
2463 }
2464 }
2465
2466 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cm_subtile) {
2467 TEST_REQUIRES_ARM_NEON_FMA;
2468 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002469 for (uint32_t n = 1; n <= 8; n++) {
2470 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002471 GemmMicrokernelTester()
2472 .mr(6)
2473 .nr(8)
2474 .kr(1)
2475 .sr(1)
2476 .m(m)
2477 .n(n)
2478 .k(k)
2479 .cm_stride(11)
2480 .iterations(1)
2481 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2482 }
2483 }
2484 }
2485 }
2486
2487 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, qmin) {
2488 TEST_REQUIRES_ARM_NEON_FMA;
2489 GemmMicrokernelTester()
2490 .mr(6)
2491 .nr(8)
2492 .kr(1)
2493 .sr(1)
2494 .m(6)
2495 .n(8)
2496 .k(8)
2497 .qmin(128)
2498 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2499 }
2500
2501 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, qmax) {
2502 TEST_REQUIRES_ARM_NEON_FMA;
2503 GemmMicrokernelTester()
2504 .mr(6)
2505 .nr(8)
2506 .kr(1)
2507 .sr(1)
2508 .m(6)
2509 .n(8)
2510 .k(8)
2511 .qmax(128)
2512 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2513 }
2514
2515 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cm) {
2516 TEST_REQUIRES_ARM_NEON_FMA;
2517 GemmMicrokernelTester()
2518 .mr(6)
2519 .nr(8)
2520 .kr(1)
2521 .sr(1)
2522 .m(6)
2523 .n(8)
2524 .k(8)
2525 .cm_stride(11)
2526 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
2527 }
2528#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2529
2530
2531#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2532 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
2533 TEST_REQUIRES_ARM_NEON_FMA;
2534 GemmMicrokernelTester()
2535 .mr(6)
2536 .nr(8)
2537 .kr(1)
2538 .sr(1)
2539 .m(6)
2540 .n(8)
2541 .k(8)
2542 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2543 }
2544
2545 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
2546 TEST_REQUIRES_ARM_NEON_FMA;
2547 GemmMicrokernelTester()
2548 .mr(6)
2549 .nr(8)
2550 .kr(1)
2551 .sr(1)
2552 .m(6)
2553 .n(8)
2554 .k(8)
2555 .cn_stride(11)
2556 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2557 }
2558
2559 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
2560 TEST_REQUIRES_ARM_NEON_FMA;
2561 GemmMicrokernelTester()
2562 .mr(6)
2563 .nr(8)
2564 .kr(1)
2565 .sr(1)
2566 .m(6)
2567 .n(8)
2568 .k(8)
2569 .a_stride(11)
2570 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2571 }
2572
2573 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
2574 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002575 for (uint32_t n = 1; n <= 8; n++) {
2576 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002577 GemmMicrokernelTester()
2578 .mr(6)
2579 .nr(8)
2580 .kr(1)
2581 .sr(1)
2582 .m(m)
2583 .n(n)
2584 .k(8)
2585 .iterations(1)
2586 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2587 }
2588 }
2589 }
2590
2591 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
2592 TEST_REQUIRES_ARM_NEON_FMA;
2593 for (uint32_t m = 1; m <= 6; m++) {
2594 GemmMicrokernelTester()
2595 .mr(6)
2596 .nr(8)
2597 .kr(1)
2598 .sr(1)
2599 .m(m)
2600 .n(8)
2601 .k(8)
2602 .iterations(1)
2603 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2604 }
2605 }
2606
2607 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
2608 TEST_REQUIRES_ARM_NEON_FMA;
2609 for (uint32_t n = 1; n <= 8; n++) {
2610 GemmMicrokernelTester()
2611 .mr(6)
2612 .nr(8)
2613 .kr(1)
2614 .sr(1)
2615 .m(6)
2616 .n(n)
2617 .k(8)
2618 .iterations(1)
2619 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2620 }
2621 }
2622
2623 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
2624 TEST_REQUIRES_ARM_NEON_FMA;
2625 GemmMicrokernelTester()
2626 .mr(6)
2627 .nr(8)
2628 .kr(1)
2629 .sr(1)
2630 .m(6)
2631 .n(8)
2632 .k(16)
2633 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2634 }
2635
2636 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
2637 TEST_REQUIRES_ARM_NEON_FMA;
2638 GemmMicrokernelTester()
2639 .mr(6)
2640 .nr(8)
2641 .kr(1)
2642 .sr(1)
2643 .m(6)
2644 .n(8)
2645 .k(16)
2646 .a_stride(19)
2647 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2648 }
2649
2650 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
2651 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002652 for (uint32_t n = 1; n <= 8; n++) {
2653 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002654 GemmMicrokernelTester()
2655 .mr(6)
2656 .nr(8)
2657 .kr(1)
2658 .sr(1)
2659 .m(m)
2660 .n(n)
2661 .k(16)
2662 .iterations(1)
2663 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2664 }
2665 }
2666 }
2667
2668 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
2669 TEST_REQUIRES_ARM_NEON_FMA;
2670 for (size_t k = 1; k < 16; k++) {
2671 GemmMicrokernelTester()
2672 .mr(6)
2673 .nr(8)
2674 .kr(1)
2675 .sr(1)
2676 .m(6)
2677 .n(8)
2678 .k(k)
2679 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2680 }
2681 }
2682
2683 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
2684 TEST_REQUIRES_ARM_NEON_FMA;
2685 for (size_t k = 1; k < 16; k++) {
2686 GemmMicrokernelTester()
2687 .mr(6)
2688 .nr(8)
2689 .kr(1)
2690 .sr(1)
2691 .m(6)
2692 .n(8)
2693 .k(k)
2694 .a_stride(19)
2695 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2696 }
2697 }
2698
2699 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
2700 TEST_REQUIRES_ARM_NEON_FMA;
2701 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002702 for (uint32_t n = 1; n <= 8; n++) {
2703 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002704 GemmMicrokernelTester()
2705 .mr(6)
2706 .nr(8)
2707 .kr(1)
2708 .sr(1)
2709 .m(m)
2710 .n(n)
2711 .k(k)
2712 .iterations(1)
2713 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2714 }
2715 }
2716 }
2717 }
2718
2719 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
2720 TEST_REQUIRES_ARM_NEON_FMA;
2721 for (size_t k = 17; k < 32; k++) {
2722 GemmMicrokernelTester()
2723 .mr(6)
2724 .nr(8)
2725 .kr(1)
2726 .sr(1)
2727 .m(6)
2728 .n(8)
2729 .k(k)
2730 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2731 }
2732 }
2733
2734 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
2735 TEST_REQUIRES_ARM_NEON_FMA;
2736 for (size_t k = 17; k < 32; k++) {
2737 GemmMicrokernelTester()
2738 .mr(6)
2739 .nr(8)
2740 .kr(1)
2741 .sr(1)
2742 .m(6)
2743 .n(8)
2744 .k(k)
2745 .a_stride(37)
2746 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2747 }
2748 }
2749
2750 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
2751 TEST_REQUIRES_ARM_NEON_FMA;
2752 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002753 for (uint32_t n = 1; n <= 8; n++) {
2754 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002755 GemmMicrokernelTester()
2756 .mr(6)
2757 .nr(8)
2758 .kr(1)
2759 .sr(1)
2760 .m(m)
2761 .n(n)
2762 .k(k)
2763 .iterations(1)
2764 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2765 }
2766 }
2767 }
2768 }
2769
2770 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
2771 TEST_REQUIRES_ARM_NEON_FMA;
2772 for (size_t k = 24; k <= 80; k += 8) {
2773 GemmMicrokernelTester()
2774 .mr(6)
2775 .nr(8)
2776 .kr(1)
2777 .sr(1)
2778 .m(6)
2779 .n(8)
2780 .k(k)
2781 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2782 }
2783 }
2784
2785 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
2786 TEST_REQUIRES_ARM_NEON_FMA;
2787 for (size_t k = 24; k <= 80; k += 8) {
2788 GemmMicrokernelTester()
2789 .mr(6)
2790 .nr(8)
2791 .kr(1)
2792 .sr(1)
2793 .m(6)
2794 .n(8)
2795 .k(k)
2796 .a_stride(83)
2797 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2798 }
2799 }
2800
2801 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
2802 TEST_REQUIRES_ARM_NEON_FMA;
2803 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002804 for (uint32_t n = 1; n <= 8; n++) {
2805 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002806 GemmMicrokernelTester()
2807 .mr(6)
2808 .nr(8)
2809 .kr(1)
2810 .sr(1)
2811 .m(m)
2812 .n(n)
2813 .k(k)
2814 .iterations(1)
2815 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2816 }
2817 }
2818 }
2819 }
2820
2821 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
2822 TEST_REQUIRES_ARM_NEON_FMA;
2823 for (uint32_t n = 9; n < 16; n++) {
2824 for (size_t k = 1; k <= 40; k += 9) {
2825 GemmMicrokernelTester()
2826 .mr(6)
2827 .nr(8)
2828 .kr(1)
2829 .sr(1)
2830 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002831 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002832 .k(k)
2833 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2834 }
2835 }
2836 }
2837
2838 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
2839 TEST_REQUIRES_ARM_NEON_FMA;
2840 for (uint32_t n = 9; n < 16; n++) {
2841 for (size_t k = 1; k <= 40; k += 9) {
2842 GemmMicrokernelTester()
2843 .mr(6)
2844 .nr(8)
2845 .kr(1)
2846 .sr(1)
2847 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002848 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002849 .k(k)
2850 .cn_stride(11)
2851 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2852 }
2853 }
2854 }
2855
2856 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
2857 TEST_REQUIRES_ARM_NEON_FMA;
2858 for (uint32_t n = 9; n < 16; n++) {
2859 for (size_t k = 1; k <= 40; k += 9) {
2860 GemmMicrokernelTester()
2861 .mr(6)
2862 .nr(8)
2863 .kr(1)
2864 .sr(1)
2865 .m(6)
2866 .n(n)
2867 .k(k)
2868 .a_stride(43)
2869 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2870 }
2871 }
2872 }
2873
2874 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
2875 TEST_REQUIRES_ARM_NEON_FMA;
2876 for (uint32_t n = 9; n < 16; n++) {
2877 for (size_t k = 1; k <= 40; k += 9) {
2878 for (uint32_t m = 1; m <= 6; m++) {
2879 GemmMicrokernelTester()
2880 .mr(6)
2881 .nr(8)
2882 .kr(1)
2883 .sr(1)
2884 .m(m)
2885 .n(n)
2886 .k(k)
2887 .iterations(1)
2888 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2889 }
2890 }
2891 }
2892 }
2893
2894 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
2895 TEST_REQUIRES_ARM_NEON_FMA;
2896 for (uint32_t n = 16; n <= 24; n += 8) {
2897 for (size_t k = 1; k <= 40; k += 9) {
2898 GemmMicrokernelTester()
2899 .mr(6)
2900 .nr(8)
2901 .kr(1)
2902 .sr(1)
2903 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002904 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002905 .k(k)
2906 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2907 }
2908 }
2909 }
2910
2911 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
2912 TEST_REQUIRES_ARM_NEON_FMA;
2913 for (uint32_t n = 16; n <= 24; n += 8) {
2914 for (size_t k = 1; k <= 40; k += 9) {
2915 GemmMicrokernelTester()
2916 .mr(6)
2917 .nr(8)
2918 .kr(1)
2919 .sr(1)
2920 .m(6)
2921 .n(n)
2922 .k(k)
2923 .cn_stride(11)
2924 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2925 }
2926 }
2927 }
2928
2929 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
2930 TEST_REQUIRES_ARM_NEON_FMA;
2931 for (uint32_t n = 16; n <= 24; n += 8) {
2932 for (size_t k = 1; k <= 40; k += 9) {
2933 GemmMicrokernelTester()
2934 .mr(6)
2935 .nr(8)
2936 .kr(1)
2937 .sr(1)
2938 .m(6)
2939 .n(n)
2940 .k(k)
2941 .a_stride(43)
2942 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2943 }
2944 }
2945 }
2946
2947 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
2948 TEST_REQUIRES_ARM_NEON_FMA;
2949 for (uint32_t n = 16; n <= 24; n += 8) {
2950 for (size_t k = 1; k <= 40; k += 9) {
2951 for (uint32_t m = 1; m <= 6; m++) {
2952 GemmMicrokernelTester()
2953 .mr(6)
2954 .nr(8)
2955 .kr(1)
2956 .sr(1)
2957 .m(m)
2958 .n(n)
2959 .k(k)
2960 .iterations(1)
2961 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2962 }
2963 }
2964 }
2965 }
2966
2967 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
2968 TEST_REQUIRES_ARM_NEON_FMA;
2969 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002970 for (uint32_t n = 1; n <= 8; n++) {
2971 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002972 GemmMicrokernelTester()
2973 .mr(6)
2974 .nr(8)
2975 .kr(1)
2976 .sr(1)
2977 .m(m)
2978 .n(n)
2979 .k(k)
2980 .cm_stride(11)
2981 .iterations(1)
2982 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2983 }
2984 }
2985 }
2986 }
2987
2988 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
2989 TEST_REQUIRES_ARM_NEON_FMA;
2990 GemmMicrokernelTester()
2991 .mr(6)
2992 .nr(8)
2993 .kr(1)
2994 .sr(1)
2995 .m(6)
2996 .n(8)
2997 .k(8)
2998 .qmin(128)
2999 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3000 }
3001
3002 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
3003 TEST_REQUIRES_ARM_NEON_FMA;
3004 GemmMicrokernelTester()
3005 .mr(6)
3006 .nr(8)
3007 .kr(1)
3008 .sr(1)
3009 .m(6)
3010 .n(8)
3011 .k(8)
3012 .qmax(128)
3013 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3014 }
3015
3016 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
3017 TEST_REQUIRES_ARM_NEON_FMA;
3018 GemmMicrokernelTester()
3019 .mr(6)
3020 .nr(8)
3021 .kr(1)
3022 .sr(1)
3023 .m(6)
3024 .n(8)
3025 .k(8)
3026 .cm_stride(11)
3027 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3028 }
3029#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3030
3031
3032#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3033 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2) {
3034 TEST_REQUIRES_ARM_NEON_FMA;
3035 GemmMicrokernelTester()
3036 .mr(1)
3037 .nr(8)
3038 .kr(1)
3039 .sr(1)
3040 .m(1)
3041 .n(8)
3042 .k(2)
3043 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3044 }
3045
3046 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, strided_cn) {
3047 TEST_REQUIRES_ARM_NEON_FMA;
3048 GemmMicrokernelTester()
3049 .mr(1)
3050 .nr(8)
3051 .kr(1)
3052 .sr(1)
3053 .m(1)
3054 .n(8)
3055 .k(2)
3056 .cn_stride(11)
3057 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3058 }
3059
3060 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_strided_a) {
3061 TEST_REQUIRES_ARM_NEON_FMA;
3062 GemmMicrokernelTester()
3063 .mr(1)
3064 .nr(8)
3065 .kr(1)
3066 .sr(1)
3067 .m(1)
3068 .n(8)
3069 .k(2)
3070 .a_stride(5)
3071 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3072 }
3073
3074 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
3075 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003076 for (uint32_t n = 1; n <= 8; n++) {
3077 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003078 GemmMicrokernelTester()
3079 .mr(1)
3080 .nr(8)
3081 .kr(1)
3082 .sr(1)
3083 .m(m)
3084 .n(n)
3085 .k(2)
3086 .iterations(1)
3087 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3088 }
3089 }
3090 }
3091
3092 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
3093 TEST_REQUIRES_ARM_NEON_FMA;
3094 for (uint32_t m = 1; m <= 1; m++) {
3095 GemmMicrokernelTester()
3096 .mr(1)
3097 .nr(8)
3098 .kr(1)
3099 .sr(1)
3100 .m(m)
3101 .n(8)
3102 .k(2)
3103 .iterations(1)
3104 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3105 }
3106 }
3107
3108 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
3109 TEST_REQUIRES_ARM_NEON_FMA;
3110 for (uint32_t n = 1; n <= 8; n++) {
3111 GemmMicrokernelTester()
3112 .mr(1)
3113 .nr(8)
3114 .kr(1)
3115 .sr(1)
3116 .m(1)
3117 .n(n)
3118 .k(2)
3119 .iterations(1)
3120 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3121 }
3122 }
3123
3124 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_lt_2) {
3125 TEST_REQUIRES_ARM_NEON_FMA;
3126 for (size_t k = 1; k < 2; k++) {
3127 GemmMicrokernelTester()
3128 .mr(1)
3129 .nr(8)
3130 .kr(1)
3131 .sr(1)
3132 .m(1)
3133 .n(8)
3134 .k(k)
3135 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3136 }
3137 }
3138
3139 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_lt_2_strided_a) {
3140 TEST_REQUIRES_ARM_NEON_FMA;
3141 for (size_t k = 1; k < 2; k++) {
3142 GemmMicrokernelTester()
3143 .mr(1)
3144 .nr(8)
3145 .kr(1)
3146 .sr(1)
3147 .m(1)
3148 .n(8)
3149 .k(k)
3150 .a_stride(5)
3151 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3152 }
3153 }
3154
3155 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
3156 TEST_REQUIRES_ARM_NEON_FMA;
3157 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003158 for (uint32_t n = 1; n <= 8; n++) {
3159 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003160 GemmMicrokernelTester()
3161 .mr(1)
3162 .nr(8)
3163 .kr(1)
3164 .sr(1)
3165 .m(m)
3166 .n(n)
3167 .k(k)
3168 .iterations(1)
3169 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3170 }
3171 }
3172 }
3173 }
3174
3175 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_gt_2) {
3176 TEST_REQUIRES_ARM_NEON_FMA;
3177 for (size_t k = 3; k < 4; k++) {
3178 GemmMicrokernelTester()
3179 .mr(1)
3180 .nr(8)
3181 .kr(1)
3182 .sr(1)
3183 .m(1)
3184 .n(8)
3185 .k(k)
3186 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3187 }
3188 }
3189
3190 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_gt_2_strided_a) {
3191 TEST_REQUIRES_ARM_NEON_FMA;
3192 for (size_t k = 3; k < 4; k++) {
3193 GemmMicrokernelTester()
3194 .mr(1)
3195 .nr(8)
3196 .kr(1)
3197 .sr(1)
3198 .m(1)
3199 .n(8)
3200 .k(k)
3201 .a_stride(7)
3202 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3203 }
3204 }
3205
3206 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
3207 TEST_REQUIRES_ARM_NEON_FMA;
3208 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003209 for (uint32_t n = 1; n <= 8; n++) {
3210 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003211 GemmMicrokernelTester()
3212 .mr(1)
3213 .nr(8)
3214 .kr(1)
3215 .sr(1)
3216 .m(m)
3217 .n(n)
3218 .k(k)
3219 .iterations(1)
3220 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3221 }
3222 }
3223 }
3224 }
3225
3226 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_div_2) {
3227 TEST_REQUIRES_ARM_NEON_FMA;
3228 for (size_t k = 4; k <= 20; k += 2) {
3229 GemmMicrokernelTester()
3230 .mr(1)
3231 .nr(8)
3232 .kr(1)
3233 .sr(1)
3234 .m(1)
3235 .n(8)
3236 .k(k)
3237 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3238 }
3239 }
3240
3241 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_div_2_strided_a) {
3242 TEST_REQUIRES_ARM_NEON_FMA;
3243 for (size_t k = 4; k <= 20; k += 2) {
3244 GemmMicrokernelTester()
3245 .mr(1)
3246 .nr(8)
3247 .kr(1)
3248 .sr(1)
3249 .m(1)
3250 .n(8)
3251 .k(k)
3252 .a_stride(23)
3253 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3254 }
3255 }
3256
3257 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
3258 TEST_REQUIRES_ARM_NEON_FMA;
3259 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003260 for (uint32_t n = 1; n <= 8; n++) {
3261 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003262 GemmMicrokernelTester()
3263 .mr(1)
3264 .nr(8)
3265 .kr(1)
3266 .sr(1)
3267 .m(m)
3268 .n(n)
3269 .k(k)
3270 .iterations(1)
3271 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3272 }
3273 }
3274 }
3275 }
3276
3277 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8) {
3278 TEST_REQUIRES_ARM_NEON_FMA;
3279 for (uint32_t n = 9; n < 16; n++) {
3280 for (size_t k = 1; k <= 10; k += 3) {
3281 GemmMicrokernelTester()
3282 .mr(1)
3283 .nr(8)
3284 .kr(1)
3285 .sr(1)
3286 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003287 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003288 .k(k)
3289 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3290 }
3291 }
3292 }
3293
3294 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
3295 TEST_REQUIRES_ARM_NEON_FMA;
3296 for (uint32_t n = 9; n < 16; n++) {
3297 for (size_t k = 1; k <= 10; k += 3) {
3298 GemmMicrokernelTester()
3299 .mr(1)
3300 .nr(8)
3301 .kr(1)
3302 .sr(1)
3303 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003304 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003305 .k(k)
3306 .cn_stride(11)
3307 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3308 }
3309 }
3310 }
3311
3312 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_a) {
3313 TEST_REQUIRES_ARM_NEON_FMA;
3314 for (uint32_t n = 9; n < 16; n++) {
3315 for (size_t k = 1; k <= 10; k += 3) {
3316 GemmMicrokernelTester()
3317 .mr(1)
3318 .nr(8)
3319 .kr(1)
3320 .sr(1)
3321 .m(1)
3322 .n(n)
3323 .k(k)
3324 .a_stride(13)
3325 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3326 }
3327 }
3328 }
3329
3330 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
3331 TEST_REQUIRES_ARM_NEON_FMA;
3332 for (uint32_t n = 9; n < 16; n++) {
3333 for (size_t k = 1; k <= 10; k += 3) {
3334 for (uint32_t m = 1; m <= 1; m++) {
3335 GemmMicrokernelTester()
3336 .mr(1)
3337 .nr(8)
3338 .kr(1)
3339 .sr(1)
3340 .m(m)
3341 .n(n)
3342 .k(k)
3343 .iterations(1)
3344 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3345 }
3346 }
3347 }
3348 }
3349
3350 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8) {
3351 TEST_REQUIRES_ARM_NEON_FMA;
3352 for (uint32_t n = 16; n <= 24; n += 8) {
3353 for (size_t k = 1; k <= 10; k += 3) {
3354 GemmMicrokernelTester()
3355 .mr(1)
3356 .nr(8)
3357 .kr(1)
3358 .sr(1)
3359 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003360 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003361 .k(k)
3362 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3363 }
3364 }
3365 }
3366
3367 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
3368 TEST_REQUIRES_ARM_NEON_FMA;
3369 for (uint32_t n = 16; n <= 24; n += 8) {
3370 for (size_t k = 1; k <= 10; k += 3) {
3371 GemmMicrokernelTester()
3372 .mr(1)
3373 .nr(8)
3374 .kr(1)
3375 .sr(1)
3376 .m(1)
3377 .n(n)
3378 .k(k)
3379 .cn_stride(11)
3380 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3381 }
3382 }
3383 }
3384
3385 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8_strided_a) {
3386 TEST_REQUIRES_ARM_NEON_FMA;
3387 for (uint32_t n = 16; n <= 24; n += 8) {
3388 for (size_t k = 1; k <= 10; k += 3) {
3389 GemmMicrokernelTester()
3390 .mr(1)
3391 .nr(8)
3392 .kr(1)
3393 .sr(1)
3394 .m(1)
3395 .n(n)
3396 .k(k)
3397 .a_stride(13)
3398 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3399 }
3400 }
3401 }
3402
3403 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
3404 TEST_REQUIRES_ARM_NEON_FMA;
3405 for (uint32_t n = 16; n <= 24; n += 8) {
3406 for (size_t k = 1; k <= 10; k += 3) {
3407 for (uint32_t m = 1; m <= 1; m++) {
3408 GemmMicrokernelTester()
3409 .mr(1)
3410 .nr(8)
3411 .kr(1)
3412 .sr(1)
3413 .m(m)
3414 .n(n)
3415 .k(k)
3416 .iterations(1)
3417 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3418 }
3419 }
3420 }
3421 }
3422
3423 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
3424 TEST_REQUIRES_ARM_NEON_FMA;
3425 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003426 for (uint32_t n = 1; n <= 8; n++) {
3427 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003428 GemmMicrokernelTester()
3429 .mr(1)
3430 .nr(8)
3431 .kr(1)
3432 .sr(1)
3433 .m(m)
3434 .n(n)
3435 .k(k)
3436 .cm_stride(11)
3437 .iterations(1)
3438 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3439 }
3440 }
3441 }
3442 }
3443
3444 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, qmin) {
3445 TEST_REQUIRES_ARM_NEON_FMA;
3446 GemmMicrokernelTester()
3447 .mr(1)
3448 .nr(8)
3449 .kr(1)
3450 .sr(1)
3451 .m(1)
3452 .n(8)
3453 .k(2)
3454 .qmin(128)
3455 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3456 }
3457
3458 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, qmax) {
3459 TEST_REQUIRES_ARM_NEON_FMA;
3460 GemmMicrokernelTester()
3461 .mr(1)
3462 .nr(8)
3463 .kr(1)
3464 .sr(1)
3465 .m(1)
3466 .n(8)
3467 .k(2)
3468 .qmax(128)
3469 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3470 }
3471
3472 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_LD64, strided_cm) {
3473 TEST_REQUIRES_ARM_NEON_FMA;
3474 GemmMicrokernelTester()
3475 .mr(1)
3476 .nr(8)
3477 .kr(1)
3478 .sr(1)
3479 .m(1)
3480 .n(8)
3481 .k(2)
3482 .cm_stride(11)
3483 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3484 }
3485#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3486
3487
3488#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3489 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2) {
3490 TEST_REQUIRES_ARM_NEON_FMA;
3491 GemmMicrokernelTester()
3492 .mr(6)
3493 .nr(8)
3494 .kr(1)
3495 .sr(1)
3496 .m(6)
3497 .n(8)
3498 .k(2)
3499 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3500 }
3501
3502 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cn) {
3503 TEST_REQUIRES_ARM_NEON_FMA;
3504 GemmMicrokernelTester()
3505 .mr(6)
3506 .nr(8)
3507 .kr(1)
3508 .sr(1)
3509 .m(6)
3510 .n(8)
3511 .k(2)
3512 .cn_stride(11)
3513 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3514 }
3515
3516 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_strided_a) {
3517 TEST_REQUIRES_ARM_NEON_FMA;
3518 GemmMicrokernelTester()
3519 .mr(6)
3520 .nr(8)
3521 .kr(1)
3522 .sr(1)
3523 .m(6)
3524 .n(8)
3525 .k(2)
3526 .a_stride(5)
3527 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3528 }
3529
3530 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
3531 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003532 for (uint32_t n = 1; n <= 8; n++) {
3533 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003534 GemmMicrokernelTester()
3535 .mr(6)
3536 .nr(8)
3537 .kr(1)
3538 .sr(1)
3539 .m(m)
3540 .n(n)
3541 .k(2)
3542 .iterations(1)
3543 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3544 }
3545 }
3546 }
3547
3548 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
3549 TEST_REQUIRES_ARM_NEON_FMA;
3550 for (uint32_t m = 1; m <= 6; m++) {
3551 GemmMicrokernelTester()
3552 .mr(6)
3553 .nr(8)
3554 .kr(1)
3555 .sr(1)
3556 .m(m)
3557 .n(8)
3558 .k(2)
3559 .iterations(1)
3560 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3561 }
3562 }
3563
3564 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
3565 TEST_REQUIRES_ARM_NEON_FMA;
3566 for (uint32_t n = 1; n <= 8; n++) {
3567 GemmMicrokernelTester()
3568 .mr(6)
3569 .nr(8)
3570 .kr(1)
3571 .sr(1)
3572 .m(6)
3573 .n(n)
3574 .k(2)
3575 .iterations(1)
3576 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3577 }
3578 }
3579
3580 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2) {
3581 TEST_REQUIRES_ARM_NEON_FMA;
3582 for (size_t k = 1; k < 2; k++) {
3583 GemmMicrokernelTester()
3584 .mr(6)
3585 .nr(8)
3586 .kr(1)
3587 .sr(1)
3588 .m(6)
3589 .n(8)
3590 .k(k)
3591 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3592 }
3593 }
3594
3595 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2_strided_a) {
3596 TEST_REQUIRES_ARM_NEON_FMA;
3597 for (size_t k = 1; k < 2; k++) {
3598 GemmMicrokernelTester()
3599 .mr(6)
3600 .nr(8)
3601 .kr(1)
3602 .sr(1)
3603 .m(6)
3604 .n(8)
3605 .k(k)
3606 .a_stride(5)
3607 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3608 }
3609 }
3610
3611 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
3612 TEST_REQUIRES_ARM_NEON_FMA;
3613 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003614 for (uint32_t n = 1; n <= 8; n++) {
3615 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003616 GemmMicrokernelTester()
3617 .mr(6)
3618 .nr(8)
3619 .kr(1)
3620 .sr(1)
3621 .m(m)
3622 .n(n)
3623 .k(k)
3624 .iterations(1)
3625 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3626 }
3627 }
3628 }
3629 }
3630
3631 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2) {
3632 TEST_REQUIRES_ARM_NEON_FMA;
3633 for (size_t k = 3; k < 4; k++) {
3634 GemmMicrokernelTester()
3635 .mr(6)
3636 .nr(8)
3637 .kr(1)
3638 .sr(1)
3639 .m(6)
3640 .n(8)
3641 .k(k)
3642 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3643 }
3644 }
3645
3646 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2_strided_a) {
3647 TEST_REQUIRES_ARM_NEON_FMA;
3648 for (size_t k = 3; k < 4; k++) {
3649 GemmMicrokernelTester()
3650 .mr(6)
3651 .nr(8)
3652 .kr(1)
3653 .sr(1)
3654 .m(6)
3655 .n(8)
3656 .k(k)
3657 .a_stride(7)
3658 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3659 }
3660 }
3661
3662 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
3663 TEST_REQUIRES_ARM_NEON_FMA;
3664 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003665 for (uint32_t n = 1; n <= 8; n++) {
3666 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003667 GemmMicrokernelTester()
3668 .mr(6)
3669 .nr(8)
3670 .kr(1)
3671 .sr(1)
3672 .m(m)
3673 .n(n)
3674 .k(k)
3675 .iterations(1)
3676 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3677 }
3678 }
3679 }
3680 }
3681
3682 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2) {
3683 TEST_REQUIRES_ARM_NEON_FMA;
3684 for (size_t k = 4; k <= 20; k += 2) {
3685 GemmMicrokernelTester()
3686 .mr(6)
3687 .nr(8)
3688 .kr(1)
3689 .sr(1)
3690 .m(6)
3691 .n(8)
3692 .k(k)
3693 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3694 }
3695 }
3696
3697 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2_strided_a) {
3698 TEST_REQUIRES_ARM_NEON_FMA;
3699 for (size_t k = 4; k <= 20; k += 2) {
3700 GemmMicrokernelTester()
3701 .mr(6)
3702 .nr(8)
3703 .kr(1)
3704 .sr(1)
3705 .m(6)
3706 .n(8)
3707 .k(k)
3708 .a_stride(23)
3709 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3710 }
3711 }
3712
3713 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
3714 TEST_REQUIRES_ARM_NEON_FMA;
3715 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003716 for (uint32_t n = 1; n <= 8; n++) {
3717 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003718 GemmMicrokernelTester()
3719 .mr(6)
3720 .nr(8)
3721 .kr(1)
3722 .sr(1)
3723 .m(m)
3724 .n(n)
3725 .k(k)
3726 .iterations(1)
3727 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3728 }
3729 }
3730 }
3731 }
3732
3733 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8) {
3734 TEST_REQUIRES_ARM_NEON_FMA;
3735 for (uint32_t n = 9; n < 16; n++) {
3736 for (size_t k = 1; k <= 10; k += 3) {
3737 GemmMicrokernelTester()
3738 .mr(6)
3739 .nr(8)
3740 .kr(1)
3741 .sr(1)
3742 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003743 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003744 .k(k)
3745 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3746 }
3747 }
3748 }
3749
3750 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
3751 TEST_REQUIRES_ARM_NEON_FMA;
3752 for (uint32_t n = 9; n < 16; n++) {
3753 for (size_t k = 1; k <= 10; k += 3) {
3754 GemmMicrokernelTester()
3755 .mr(6)
3756 .nr(8)
3757 .kr(1)
3758 .sr(1)
3759 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003760 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003761 .k(k)
3762 .cn_stride(11)
3763 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3764 }
3765 }
3766 }
3767
3768 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_a) {
3769 TEST_REQUIRES_ARM_NEON_FMA;
3770 for (uint32_t n = 9; n < 16; n++) {
3771 for (size_t k = 1; k <= 10; k += 3) {
3772 GemmMicrokernelTester()
3773 .mr(6)
3774 .nr(8)
3775 .kr(1)
3776 .sr(1)
3777 .m(6)
3778 .n(n)
3779 .k(k)
3780 .a_stride(13)
3781 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3782 }
3783 }
3784 }
3785
3786 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
3787 TEST_REQUIRES_ARM_NEON_FMA;
3788 for (uint32_t n = 9; n < 16; n++) {
3789 for (size_t k = 1; k <= 10; k += 3) {
3790 for (uint32_t m = 1; m <= 6; m++) {
3791 GemmMicrokernelTester()
3792 .mr(6)
3793 .nr(8)
3794 .kr(1)
3795 .sr(1)
3796 .m(m)
3797 .n(n)
3798 .k(k)
3799 .iterations(1)
3800 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3801 }
3802 }
3803 }
3804 }
3805
3806 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8) {
3807 TEST_REQUIRES_ARM_NEON_FMA;
3808 for (uint32_t n = 16; n <= 24; n += 8) {
3809 for (size_t k = 1; k <= 10; k += 3) {
3810 GemmMicrokernelTester()
3811 .mr(6)
3812 .nr(8)
3813 .kr(1)
3814 .sr(1)
3815 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003816 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003817 .k(k)
3818 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3819 }
3820 }
3821 }
3822
3823 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
3824 TEST_REQUIRES_ARM_NEON_FMA;
3825 for (uint32_t n = 16; n <= 24; n += 8) {
3826 for (size_t k = 1; k <= 10; k += 3) {
3827 GemmMicrokernelTester()
3828 .mr(6)
3829 .nr(8)
3830 .kr(1)
3831 .sr(1)
3832 .m(6)
3833 .n(n)
3834 .k(k)
3835 .cn_stride(11)
3836 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3837 }
3838 }
3839 }
3840
3841 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_strided_a) {
3842 TEST_REQUIRES_ARM_NEON_FMA;
3843 for (uint32_t n = 16; n <= 24; n += 8) {
3844 for (size_t k = 1; k <= 10; k += 3) {
3845 GemmMicrokernelTester()
3846 .mr(6)
3847 .nr(8)
3848 .kr(1)
3849 .sr(1)
3850 .m(6)
3851 .n(n)
3852 .k(k)
3853 .a_stride(13)
3854 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3855 }
3856 }
3857 }
3858
3859 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
3860 TEST_REQUIRES_ARM_NEON_FMA;
3861 for (uint32_t n = 16; n <= 24; n += 8) {
3862 for (size_t k = 1; k <= 10; k += 3) {
3863 for (uint32_t m = 1; m <= 6; m++) {
3864 GemmMicrokernelTester()
3865 .mr(6)
3866 .nr(8)
3867 .kr(1)
3868 .sr(1)
3869 .m(m)
3870 .n(n)
3871 .k(k)
3872 .iterations(1)
3873 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3874 }
3875 }
3876 }
3877 }
3878
3879 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
3880 TEST_REQUIRES_ARM_NEON_FMA;
3881 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003882 for (uint32_t n = 1; n <= 8; n++) {
3883 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003884 GemmMicrokernelTester()
3885 .mr(6)
3886 .nr(8)
3887 .kr(1)
3888 .sr(1)
3889 .m(m)
3890 .n(n)
3891 .k(k)
3892 .cm_stride(11)
3893 .iterations(1)
3894 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3895 }
3896 }
3897 }
3898 }
3899
3900 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmin) {
3901 TEST_REQUIRES_ARM_NEON_FMA;
3902 GemmMicrokernelTester()
3903 .mr(6)
3904 .nr(8)
3905 .kr(1)
3906 .sr(1)
3907 .m(6)
3908 .n(8)
3909 .k(2)
3910 .qmin(128)
3911 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3912 }
3913
3914 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmax) {
3915 TEST_REQUIRES_ARM_NEON_FMA;
3916 GemmMicrokernelTester()
3917 .mr(6)
3918 .nr(8)
3919 .kr(1)
3920 .sr(1)
3921 .m(6)
3922 .n(8)
3923 .k(2)
3924 .qmax(128)
3925 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3926 }
3927
3928 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm) {
3929 TEST_REQUIRES_ARM_NEON_FMA;
3930 GemmMicrokernelTester()
3931 .mr(6)
3932 .nr(8)
3933 .kr(1)
3934 .sr(1)
3935 .m(6)
3936 .n(8)
3937 .k(2)
3938 .cm_stride(11)
3939 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3940 }
3941#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3942
3943
3944#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3945 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4) {
3946 TEST_REQUIRES_ARM_NEON_FMA;
3947 GemmMicrokernelTester()
3948 .mr(6)
3949 .nr(8)
3950 .kr(1)
3951 .sr(1)
3952 .m(6)
3953 .n(8)
3954 .k(4)
3955 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3956 }
3957
3958 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cn) {
3959 TEST_REQUIRES_ARM_NEON_FMA;
3960 GemmMicrokernelTester()
3961 .mr(6)
3962 .nr(8)
3963 .kr(1)
3964 .sr(1)
3965 .m(6)
3966 .n(8)
3967 .k(4)
3968 .cn_stride(11)
3969 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3970 }
3971
3972 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_strided_a) {
3973 TEST_REQUIRES_ARM_NEON_FMA;
3974 GemmMicrokernelTester()
3975 .mr(6)
3976 .nr(8)
3977 .kr(1)
3978 .sr(1)
3979 .m(6)
3980 .n(8)
3981 .k(4)
3982 .a_stride(7)
3983 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3984 }
3985
3986 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
3987 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003988 for (uint32_t n = 1; n <= 8; n++) {
3989 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003990 GemmMicrokernelTester()
3991 .mr(6)
3992 .nr(8)
3993 .kr(1)
3994 .sr(1)
3995 .m(m)
3996 .n(n)
3997 .k(4)
3998 .iterations(1)
3999 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4000 }
4001 }
4002 }
4003
4004 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
4005 TEST_REQUIRES_ARM_NEON_FMA;
4006 for (uint32_t m = 1; m <= 6; m++) {
4007 GemmMicrokernelTester()
4008 .mr(6)
4009 .nr(8)
4010 .kr(1)
4011 .sr(1)
4012 .m(m)
4013 .n(8)
4014 .k(4)
4015 .iterations(1)
4016 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4017 }
4018 }
4019
4020 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
4021 TEST_REQUIRES_ARM_NEON_FMA;
4022 for (uint32_t n = 1; n <= 8; n++) {
4023 GemmMicrokernelTester()
4024 .mr(6)
4025 .nr(8)
4026 .kr(1)
4027 .sr(1)
4028 .m(6)
4029 .n(n)
4030 .k(4)
4031 .iterations(1)
4032 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4033 }
4034 }
4035
4036 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4) {
4037 TEST_REQUIRES_ARM_NEON_FMA;
4038 for (size_t k = 1; k < 4; k++) {
4039 GemmMicrokernelTester()
4040 .mr(6)
4041 .nr(8)
4042 .kr(1)
4043 .sr(1)
4044 .m(6)
4045 .n(8)
4046 .k(k)
4047 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4048 }
4049 }
4050
4051 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4_strided_a) {
4052 TEST_REQUIRES_ARM_NEON_FMA;
4053 for (size_t k = 1; k < 4; k++) {
4054 GemmMicrokernelTester()
4055 .mr(6)
4056 .nr(8)
4057 .kr(1)
4058 .sr(1)
4059 .m(6)
4060 .n(8)
4061 .k(k)
4062 .a_stride(7)
4063 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4064 }
4065 }
4066
4067 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
4068 TEST_REQUIRES_ARM_NEON_FMA;
4069 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004070 for (uint32_t n = 1; n <= 8; n++) {
4071 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004072 GemmMicrokernelTester()
4073 .mr(6)
4074 .nr(8)
4075 .kr(1)
4076 .sr(1)
4077 .m(m)
4078 .n(n)
4079 .k(k)
4080 .iterations(1)
4081 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4082 }
4083 }
4084 }
4085 }
4086
4087 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4) {
4088 TEST_REQUIRES_ARM_NEON_FMA;
4089 for (size_t k = 5; k < 8; k++) {
4090 GemmMicrokernelTester()
4091 .mr(6)
4092 .nr(8)
4093 .kr(1)
4094 .sr(1)
4095 .m(6)
4096 .n(8)
4097 .k(k)
4098 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4099 }
4100 }
4101
4102 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4_strided_a) {
4103 TEST_REQUIRES_ARM_NEON_FMA;
4104 for (size_t k = 5; k < 8; k++) {
4105 GemmMicrokernelTester()
4106 .mr(6)
4107 .nr(8)
4108 .kr(1)
4109 .sr(1)
4110 .m(6)
4111 .n(8)
4112 .k(k)
4113 .a_stride(11)
4114 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4115 }
4116 }
4117
4118 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
4119 TEST_REQUIRES_ARM_NEON_FMA;
4120 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004121 for (uint32_t n = 1; n <= 8; n++) {
4122 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004123 GemmMicrokernelTester()
4124 .mr(6)
4125 .nr(8)
4126 .kr(1)
4127 .sr(1)
4128 .m(m)
4129 .n(n)
4130 .k(k)
4131 .iterations(1)
4132 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4133 }
4134 }
4135 }
4136 }
4137
4138 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4) {
4139 TEST_REQUIRES_ARM_NEON_FMA;
4140 for (size_t k = 8; k <= 40; k += 4) {
4141 GemmMicrokernelTester()
4142 .mr(6)
4143 .nr(8)
4144 .kr(1)
4145 .sr(1)
4146 .m(6)
4147 .n(8)
4148 .k(k)
4149 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4150 }
4151 }
4152
4153 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4_strided_a) {
4154 TEST_REQUIRES_ARM_NEON_FMA;
4155 for (size_t k = 8; k <= 40; k += 4) {
4156 GemmMicrokernelTester()
4157 .mr(6)
4158 .nr(8)
4159 .kr(1)
4160 .sr(1)
4161 .m(6)
4162 .n(8)
4163 .k(k)
4164 .a_stride(43)
4165 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4166 }
4167 }
4168
4169 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
4170 TEST_REQUIRES_ARM_NEON_FMA;
4171 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004172 for (uint32_t n = 1; n <= 8; n++) {
4173 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004174 GemmMicrokernelTester()
4175 .mr(6)
4176 .nr(8)
4177 .kr(1)
4178 .sr(1)
4179 .m(m)
4180 .n(n)
4181 .k(k)
4182 .iterations(1)
4183 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4184 }
4185 }
4186 }
4187 }
4188
4189 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8) {
4190 TEST_REQUIRES_ARM_NEON_FMA;
4191 for (uint32_t n = 9; n < 16; n++) {
4192 for (size_t k = 1; k <= 20; k += 5) {
4193 GemmMicrokernelTester()
4194 .mr(6)
4195 .nr(8)
4196 .kr(1)
4197 .sr(1)
4198 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004199 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004200 .k(k)
4201 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4202 }
4203 }
4204 }
4205
4206 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
4207 TEST_REQUIRES_ARM_NEON_FMA;
4208 for (uint32_t n = 9; n < 16; n++) {
4209 for (size_t k = 1; k <= 20; k += 5) {
4210 GemmMicrokernelTester()
4211 .mr(6)
4212 .nr(8)
4213 .kr(1)
4214 .sr(1)
4215 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004216 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004217 .k(k)
4218 .cn_stride(11)
4219 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4220 }
4221 }
4222 }
4223
4224 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_a) {
4225 TEST_REQUIRES_ARM_NEON_FMA;
4226 for (uint32_t n = 9; n < 16; n++) {
4227 for (size_t k = 1; k <= 20; k += 5) {
4228 GemmMicrokernelTester()
4229 .mr(6)
4230 .nr(8)
4231 .kr(1)
4232 .sr(1)
4233 .m(6)
4234 .n(n)
4235 .k(k)
4236 .a_stride(23)
4237 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4238 }
4239 }
4240 }
4241
4242 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
4243 TEST_REQUIRES_ARM_NEON_FMA;
4244 for (uint32_t n = 9; n < 16; n++) {
4245 for (size_t k = 1; k <= 20; k += 5) {
4246 for (uint32_t m = 1; m <= 6; m++) {
4247 GemmMicrokernelTester()
4248 .mr(6)
4249 .nr(8)
4250 .kr(1)
4251 .sr(1)
4252 .m(m)
4253 .n(n)
4254 .k(k)
4255 .iterations(1)
4256 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4257 }
4258 }
4259 }
4260 }
4261
4262 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8) {
4263 TEST_REQUIRES_ARM_NEON_FMA;
4264 for (uint32_t n = 16; n <= 24; n += 8) {
4265 for (size_t k = 1; k <= 20; k += 5) {
4266 GemmMicrokernelTester()
4267 .mr(6)
4268 .nr(8)
4269 .kr(1)
4270 .sr(1)
4271 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004272 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004273 .k(k)
4274 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4275 }
4276 }
4277 }
4278
4279 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
4280 TEST_REQUIRES_ARM_NEON_FMA;
4281 for (uint32_t n = 16; n <= 24; n += 8) {
4282 for (size_t k = 1; k <= 20; k += 5) {
4283 GemmMicrokernelTester()
4284 .mr(6)
4285 .nr(8)
4286 .kr(1)
4287 .sr(1)
4288 .m(6)
4289 .n(n)
4290 .k(k)
4291 .cn_stride(11)
4292 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4293 }
4294 }
4295 }
4296
4297 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_strided_a) {
4298 TEST_REQUIRES_ARM_NEON_FMA;
4299 for (uint32_t n = 16; n <= 24; n += 8) {
4300 for (size_t k = 1; k <= 20; k += 5) {
4301 GemmMicrokernelTester()
4302 .mr(6)
4303 .nr(8)
4304 .kr(1)
4305 .sr(1)
4306 .m(6)
4307 .n(n)
4308 .k(k)
4309 .a_stride(23)
4310 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4311 }
4312 }
4313 }
4314
4315 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
4316 TEST_REQUIRES_ARM_NEON_FMA;
4317 for (uint32_t n = 16; n <= 24; n += 8) {
4318 for (size_t k = 1; k <= 20; k += 5) {
4319 for (uint32_t m = 1; m <= 6; m++) {
4320 GemmMicrokernelTester()
4321 .mr(6)
4322 .nr(8)
4323 .kr(1)
4324 .sr(1)
4325 .m(m)
4326 .n(n)
4327 .k(k)
4328 .iterations(1)
4329 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4330 }
4331 }
4332 }
4333 }
4334
4335 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
4336 TEST_REQUIRES_ARM_NEON_FMA;
4337 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004338 for (uint32_t n = 1; n <= 8; n++) {
4339 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004340 GemmMicrokernelTester()
4341 .mr(6)
4342 .nr(8)
4343 .kr(1)
4344 .sr(1)
4345 .m(m)
4346 .n(n)
4347 .k(k)
4348 .cm_stride(11)
4349 .iterations(1)
4350 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4351 }
4352 }
4353 }
4354 }
4355
4356 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmin) {
4357 TEST_REQUIRES_ARM_NEON_FMA;
4358 GemmMicrokernelTester()
4359 .mr(6)
4360 .nr(8)
4361 .kr(1)
4362 .sr(1)
4363 .m(6)
4364 .n(8)
4365 .k(4)
4366 .qmin(128)
4367 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4368 }
4369
4370 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmax) {
4371 TEST_REQUIRES_ARM_NEON_FMA;
4372 GemmMicrokernelTester()
4373 .mr(6)
4374 .nr(8)
4375 .kr(1)
4376 .sr(1)
4377 .m(6)
4378 .n(8)
4379 .k(4)
4380 .qmax(128)
4381 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4382 }
4383
4384 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm) {
4385 TEST_REQUIRES_ARM_NEON_FMA;
4386 GemmMicrokernelTester()
4387 .mr(6)
4388 .nr(8)
4389 .kr(1)
4390 .sr(1)
4391 .m(6)
4392 .n(8)
4393 .k(4)
4394 .cm_stride(11)
4395 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
4396 }
4397#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4398
4399
4400#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4401 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2) {
4402 TEST_REQUIRES_ARM_NEON;
4403 GemmMicrokernelTester()
4404 .mr(1)
4405 .nr(8)
4406 .kr(1)
4407 .sr(1)
4408 .m(1)
4409 .n(8)
4410 .k(2)
4411 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4412 }
4413
4414 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, strided_cn) {
4415 TEST_REQUIRES_ARM_NEON;
4416 GemmMicrokernelTester()
4417 .mr(1)
4418 .nr(8)
4419 .kr(1)
4420 .sr(1)
4421 .m(1)
4422 .n(8)
4423 .k(2)
4424 .cn_stride(11)
4425 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4426 }
4427
4428 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_strided_a) {
4429 TEST_REQUIRES_ARM_NEON;
4430 GemmMicrokernelTester()
4431 .mr(1)
4432 .nr(8)
4433 .kr(1)
4434 .sr(1)
4435 .m(1)
4436 .n(8)
4437 .k(2)
4438 .a_stride(5)
4439 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4440 }
4441
4442 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile) {
4443 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004444 for (uint32_t n = 1; n <= 8; n++) {
4445 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004446 GemmMicrokernelTester()
4447 .mr(1)
4448 .nr(8)
4449 .kr(1)
4450 .sr(1)
4451 .m(m)
4452 .n(n)
4453 .k(2)
4454 .iterations(1)
4455 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4456 }
4457 }
4458 }
4459
4460 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
4461 TEST_REQUIRES_ARM_NEON;
4462 for (uint32_t m = 1; m <= 1; m++) {
4463 GemmMicrokernelTester()
4464 .mr(1)
4465 .nr(8)
4466 .kr(1)
4467 .sr(1)
4468 .m(m)
4469 .n(8)
4470 .k(2)
4471 .iterations(1)
4472 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4473 }
4474 }
4475
4476 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
4477 TEST_REQUIRES_ARM_NEON;
4478 for (uint32_t n = 1; n <= 8; n++) {
4479 GemmMicrokernelTester()
4480 .mr(1)
4481 .nr(8)
4482 .kr(1)
4483 .sr(1)
4484 .m(1)
4485 .n(n)
4486 .k(2)
4487 .iterations(1)
4488 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4489 }
4490 }
4491
4492 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_lt_2) {
4493 TEST_REQUIRES_ARM_NEON;
4494 for (size_t k = 1; k < 2; k++) {
4495 GemmMicrokernelTester()
4496 .mr(1)
4497 .nr(8)
4498 .kr(1)
4499 .sr(1)
4500 .m(1)
4501 .n(8)
4502 .k(k)
4503 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4504 }
4505 }
4506
4507 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_lt_2_strided_a) {
4508 TEST_REQUIRES_ARM_NEON;
4509 for (size_t k = 1; k < 2; k++) {
4510 GemmMicrokernelTester()
4511 .mr(1)
4512 .nr(8)
4513 .kr(1)
4514 .sr(1)
4515 .m(1)
4516 .n(8)
4517 .k(k)
4518 .a_stride(5)
4519 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4520 }
4521 }
4522
4523 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_lt_2_subtile) {
4524 TEST_REQUIRES_ARM_NEON;
4525 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004526 for (uint32_t n = 1; n <= 8; n++) {
4527 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004528 GemmMicrokernelTester()
4529 .mr(1)
4530 .nr(8)
4531 .kr(1)
4532 .sr(1)
4533 .m(m)
4534 .n(n)
4535 .k(k)
4536 .iterations(1)
4537 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4538 }
4539 }
4540 }
4541 }
4542
4543 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_gt_2) {
4544 TEST_REQUIRES_ARM_NEON;
4545 for (size_t k = 3; k < 4; k++) {
4546 GemmMicrokernelTester()
4547 .mr(1)
4548 .nr(8)
4549 .kr(1)
4550 .sr(1)
4551 .m(1)
4552 .n(8)
4553 .k(k)
4554 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4555 }
4556 }
4557
4558 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_gt_2_strided_a) {
4559 TEST_REQUIRES_ARM_NEON;
4560 for (size_t k = 3; k < 4; k++) {
4561 GemmMicrokernelTester()
4562 .mr(1)
4563 .nr(8)
4564 .kr(1)
4565 .sr(1)
4566 .m(1)
4567 .n(8)
4568 .k(k)
4569 .a_stride(7)
4570 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4571 }
4572 }
4573
4574 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_gt_2_subtile) {
4575 TEST_REQUIRES_ARM_NEON;
4576 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004577 for (uint32_t n = 1; n <= 8; n++) {
4578 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004579 GemmMicrokernelTester()
4580 .mr(1)
4581 .nr(8)
4582 .kr(1)
4583 .sr(1)
4584 .m(m)
4585 .n(n)
4586 .k(k)
4587 .iterations(1)
4588 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4589 }
4590 }
4591 }
4592 }
4593
4594 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_div_2) {
4595 TEST_REQUIRES_ARM_NEON;
4596 for (size_t k = 4; k <= 20; k += 2) {
4597 GemmMicrokernelTester()
4598 .mr(1)
4599 .nr(8)
4600 .kr(1)
4601 .sr(1)
4602 .m(1)
4603 .n(8)
4604 .k(k)
4605 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4606 }
4607 }
4608
4609 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_div_2_strided_a) {
4610 TEST_REQUIRES_ARM_NEON;
4611 for (size_t k = 4; k <= 20; k += 2) {
4612 GemmMicrokernelTester()
4613 .mr(1)
4614 .nr(8)
4615 .kr(1)
4616 .sr(1)
4617 .m(1)
4618 .n(8)
4619 .k(k)
4620 .a_stride(23)
4621 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4622 }
4623 }
4624
4625 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, k_div_2_subtile) {
4626 TEST_REQUIRES_ARM_NEON;
4627 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004628 for (uint32_t n = 1; n <= 8; n++) {
4629 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004630 GemmMicrokernelTester()
4631 .mr(1)
4632 .nr(8)
4633 .kr(1)
4634 .sr(1)
4635 .m(m)
4636 .n(n)
4637 .k(k)
4638 .iterations(1)
4639 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4640 }
4641 }
4642 }
4643 }
4644
4645 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_gt_8) {
4646 TEST_REQUIRES_ARM_NEON;
4647 for (uint32_t n = 9; n < 16; n++) {
4648 for (size_t k = 1; k <= 10; k += 3) {
4649 GemmMicrokernelTester()
4650 .mr(1)
4651 .nr(8)
4652 .kr(1)
4653 .sr(1)
4654 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004655 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004656 .k(k)
4657 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4658 }
4659 }
4660 }
4661
4662 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
4663 TEST_REQUIRES_ARM_NEON;
4664 for (uint32_t n = 9; n < 16; n++) {
4665 for (size_t k = 1; k <= 10; k += 3) {
4666 GemmMicrokernelTester()
4667 .mr(1)
4668 .nr(8)
4669 .kr(1)
4670 .sr(1)
4671 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004672 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004673 .k(k)
4674 .cn_stride(11)
4675 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4676 }
4677 }
4678 }
4679
4680 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_strided_a) {
4681 TEST_REQUIRES_ARM_NEON;
4682 for (uint32_t n = 9; n < 16; n++) {
4683 for (size_t k = 1; k <= 10; k += 3) {
4684 GemmMicrokernelTester()
4685 .mr(1)
4686 .nr(8)
4687 .kr(1)
4688 .sr(1)
4689 .m(1)
4690 .n(n)
4691 .k(k)
4692 .a_stride(13)
4693 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4694 }
4695 }
4696 }
4697
4698 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_subtile) {
4699 TEST_REQUIRES_ARM_NEON;
4700 for (uint32_t n = 9; n < 16; n++) {
4701 for (size_t k = 1; k <= 10; k += 3) {
4702 for (uint32_t m = 1; m <= 1; m++) {
4703 GemmMicrokernelTester()
4704 .mr(1)
4705 .nr(8)
4706 .kr(1)
4707 .sr(1)
4708 .m(m)
4709 .n(n)
4710 .k(k)
4711 .iterations(1)
4712 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4713 }
4714 }
4715 }
4716 }
4717
4718 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_div_8) {
4719 TEST_REQUIRES_ARM_NEON;
4720 for (uint32_t n = 16; n <= 24; n += 8) {
4721 for (size_t k = 1; k <= 10; k += 3) {
4722 GemmMicrokernelTester()
4723 .mr(1)
4724 .nr(8)
4725 .kr(1)
4726 .sr(1)
4727 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004728 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004729 .k(k)
4730 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4731 }
4732 }
4733 }
4734
4735 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_div_8_strided_cn) {
4736 TEST_REQUIRES_ARM_NEON;
4737 for (uint32_t n = 16; n <= 24; n += 8) {
4738 for (size_t k = 1; k <= 10; k += 3) {
4739 GemmMicrokernelTester()
4740 .mr(1)
4741 .nr(8)
4742 .kr(1)
4743 .sr(1)
4744 .m(1)
4745 .n(n)
4746 .k(k)
4747 .cn_stride(11)
4748 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4749 }
4750 }
4751 }
4752
4753 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_div_8_strided_a) {
4754 TEST_REQUIRES_ARM_NEON;
4755 for (uint32_t n = 16; n <= 24; n += 8) {
4756 for (size_t k = 1; k <= 10; k += 3) {
4757 GemmMicrokernelTester()
4758 .mr(1)
4759 .nr(8)
4760 .kr(1)
4761 .sr(1)
4762 .m(1)
4763 .n(n)
4764 .k(k)
4765 .a_stride(13)
4766 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4767 }
4768 }
4769 }
4770
4771 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, n_div_8_subtile) {
4772 TEST_REQUIRES_ARM_NEON;
4773 for (uint32_t n = 16; n <= 24; n += 8) {
4774 for (size_t k = 1; k <= 10; k += 3) {
4775 for (uint32_t m = 1; m <= 1; m++) {
4776 GemmMicrokernelTester()
4777 .mr(1)
4778 .nr(8)
4779 .kr(1)
4780 .sr(1)
4781 .m(m)
4782 .n(n)
4783 .k(k)
4784 .iterations(1)
4785 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4786 }
4787 }
4788 }
4789 }
4790
4791 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, strided_cm_subtile) {
4792 TEST_REQUIRES_ARM_NEON;
4793 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004794 for (uint32_t n = 1; n <= 8; n++) {
4795 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004796 GemmMicrokernelTester()
4797 .mr(1)
4798 .nr(8)
4799 .kr(1)
4800 .sr(1)
4801 .m(m)
4802 .n(n)
4803 .k(k)
4804 .cm_stride(11)
4805 .iterations(1)
4806 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4807 }
4808 }
4809 }
4810 }
4811
4812 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, qmin) {
4813 TEST_REQUIRES_ARM_NEON;
4814 GemmMicrokernelTester()
4815 .mr(1)
4816 .nr(8)
4817 .kr(1)
4818 .sr(1)
4819 .m(1)
4820 .n(8)
4821 .k(2)
4822 .qmin(128)
4823 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4824 }
4825
4826 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, qmax) {
4827 TEST_REQUIRES_ARM_NEON;
4828 GemmMicrokernelTester()
4829 .mr(1)
4830 .nr(8)
4831 .kr(1)
4832 .sr(1)
4833 .m(1)
4834 .n(8)
4835 .k(2)
4836 .qmax(128)
4837 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4838 }
4839
4840 TEST(F32_GEMMINC_MINMAX_1X8__NEON_LANE_LD64, strided_cm) {
4841 TEST_REQUIRES_ARM_NEON;
4842 GemmMicrokernelTester()
4843 .mr(1)
4844 .nr(8)
4845 .kr(1)
4846 .sr(1)
4847 .m(1)
4848 .n(8)
4849 .k(2)
4850 .cm_stride(11)
4851 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4852 }
4853#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4854
4855
4856#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4857 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2) {
4858 TEST_REQUIRES_ARM_NEON;
4859 GemmMicrokernelTester()
4860 .mr(4)
4861 .nr(8)
4862 .kr(1)
4863 .sr(1)
4864 .m(4)
4865 .n(8)
4866 .k(2)
4867 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4868 }
4869
4870 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, strided_cn) {
4871 TEST_REQUIRES_ARM_NEON;
4872 GemmMicrokernelTester()
4873 .mr(4)
4874 .nr(8)
4875 .kr(1)
4876 .sr(1)
4877 .m(4)
4878 .n(8)
4879 .k(2)
4880 .cn_stride(11)
4881 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4882 }
4883
4884 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_strided_a) {
4885 TEST_REQUIRES_ARM_NEON;
4886 GemmMicrokernelTester()
4887 .mr(4)
4888 .nr(8)
4889 .kr(1)
4890 .sr(1)
4891 .m(4)
4892 .n(8)
4893 .k(2)
4894 .a_stride(5)
4895 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4896 }
4897
4898 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile) {
4899 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004900 for (uint32_t n = 1; n <= 8; n++) {
4901 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004902 GemmMicrokernelTester()
4903 .mr(4)
4904 .nr(8)
4905 .kr(1)
4906 .sr(1)
4907 .m(m)
4908 .n(n)
4909 .k(2)
4910 .iterations(1)
4911 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4912 }
4913 }
4914 }
4915
4916 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
4917 TEST_REQUIRES_ARM_NEON;
4918 for (uint32_t m = 1; m <= 4; m++) {
4919 GemmMicrokernelTester()
4920 .mr(4)
4921 .nr(8)
4922 .kr(1)
4923 .sr(1)
4924 .m(m)
4925 .n(8)
4926 .k(2)
4927 .iterations(1)
4928 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4929 }
4930 }
4931
4932 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
4933 TEST_REQUIRES_ARM_NEON;
4934 for (uint32_t n = 1; n <= 8; n++) {
4935 GemmMicrokernelTester()
4936 .mr(4)
4937 .nr(8)
4938 .kr(1)
4939 .sr(1)
4940 .m(4)
4941 .n(n)
4942 .k(2)
4943 .iterations(1)
4944 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4945 }
4946 }
4947
4948 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_lt_2) {
4949 TEST_REQUIRES_ARM_NEON;
4950 for (size_t k = 1; k < 2; k++) {
4951 GemmMicrokernelTester()
4952 .mr(4)
4953 .nr(8)
4954 .kr(1)
4955 .sr(1)
4956 .m(4)
4957 .n(8)
4958 .k(k)
4959 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4960 }
4961 }
4962
4963 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_lt_2_strided_a) {
4964 TEST_REQUIRES_ARM_NEON;
4965 for (size_t k = 1; k < 2; k++) {
4966 GemmMicrokernelTester()
4967 .mr(4)
4968 .nr(8)
4969 .kr(1)
4970 .sr(1)
4971 .m(4)
4972 .n(8)
4973 .k(k)
4974 .a_stride(5)
4975 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4976 }
4977 }
4978
4979 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_lt_2_subtile) {
4980 TEST_REQUIRES_ARM_NEON;
4981 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004982 for (uint32_t n = 1; n <= 8; n++) {
4983 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004984 GemmMicrokernelTester()
4985 .mr(4)
4986 .nr(8)
4987 .kr(1)
4988 .sr(1)
4989 .m(m)
4990 .n(n)
4991 .k(k)
4992 .iterations(1)
4993 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
4994 }
4995 }
4996 }
4997 }
4998
4999 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_gt_2) {
5000 TEST_REQUIRES_ARM_NEON;
5001 for (size_t k = 3; k < 4; k++) {
5002 GemmMicrokernelTester()
5003 .mr(4)
5004 .nr(8)
5005 .kr(1)
5006 .sr(1)
5007 .m(4)
5008 .n(8)
5009 .k(k)
5010 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5011 }
5012 }
5013
5014 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_gt_2_strided_a) {
5015 TEST_REQUIRES_ARM_NEON;
5016 for (size_t k = 3; k < 4; k++) {
5017 GemmMicrokernelTester()
5018 .mr(4)
5019 .nr(8)
5020 .kr(1)
5021 .sr(1)
5022 .m(4)
5023 .n(8)
5024 .k(k)
5025 .a_stride(7)
5026 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5027 }
5028 }
5029
5030 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_gt_2_subtile) {
5031 TEST_REQUIRES_ARM_NEON;
5032 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005033 for (uint32_t n = 1; n <= 8; n++) {
5034 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005035 GemmMicrokernelTester()
5036 .mr(4)
5037 .nr(8)
5038 .kr(1)
5039 .sr(1)
5040 .m(m)
5041 .n(n)
5042 .k(k)
5043 .iterations(1)
5044 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5045 }
5046 }
5047 }
5048 }
5049
5050 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_div_2) {
5051 TEST_REQUIRES_ARM_NEON;
5052 for (size_t k = 4; k <= 20; k += 2) {
5053 GemmMicrokernelTester()
5054 .mr(4)
5055 .nr(8)
5056 .kr(1)
5057 .sr(1)
5058 .m(4)
5059 .n(8)
5060 .k(k)
5061 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5062 }
5063 }
5064
5065 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_div_2_strided_a) {
5066 TEST_REQUIRES_ARM_NEON;
5067 for (size_t k = 4; k <= 20; k += 2) {
5068 GemmMicrokernelTester()
5069 .mr(4)
5070 .nr(8)
5071 .kr(1)
5072 .sr(1)
5073 .m(4)
5074 .n(8)
5075 .k(k)
5076 .a_stride(23)
5077 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5078 }
5079 }
5080
5081 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, k_div_2_subtile) {
5082 TEST_REQUIRES_ARM_NEON;
5083 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005084 for (uint32_t n = 1; n <= 8; n++) {
5085 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005086 GemmMicrokernelTester()
5087 .mr(4)
5088 .nr(8)
5089 .kr(1)
5090 .sr(1)
5091 .m(m)
5092 .n(n)
5093 .k(k)
5094 .iterations(1)
5095 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5096 }
5097 }
5098 }
5099 }
5100
5101 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_gt_8) {
5102 TEST_REQUIRES_ARM_NEON;
5103 for (uint32_t n = 9; n < 16; n++) {
5104 for (size_t k = 1; k <= 10; k += 3) {
5105 GemmMicrokernelTester()
5106 .mr(4)
5107 .nr(8)
5108 .kr(1)
5109 .sr(1)
5110 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005111 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005112 .k(k)
5113 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5114 }
5115 }
5116 }
5117
5118 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
5119 TEST_REQUIRES_ARM_NEON;
5120 for (uint32_t n = 9; n < 16; n++) {
5121 for (size_t k = 1; k <= 10; k += 3) {
5122 GemmMicrokernelTester()
5123 .mr(4)
5124 .nr(8)
5125 .kr(1)
5126 .sr(1)
5127 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005128 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005129 .k(k)
5130 .cn_stride(11)
5131 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5132 }
5133 }
5134 }
5135
5136 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_strided_a) {
5137 TEST_REQUIRES_ARM_NEON;
5138 for (uint32_t n = 9; n < 16; n++) {
5139 for (size_t k = 1; k <= 10; k += 3) {
5140 GemmMicrokernelTester()
5141 .mr(4)
5142 .nr(8)
5143 .kr(1)
5144 .sr(1)
5145 .m(4)
5146 .n(n)
5147 .k(k)
5148 .a_stride(13)
5149 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5150 }
5151 }
5152 }
5153
5154 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_subtile) {
5155 TEST_REQUIRES_ARM_NEON;
5156 for (uint32_t n = 9; n < 16; n++) {
5157 for (size_t k = 1; k <= 10; k += 3) {
5158 for (uint32_t m = 1; m <= 4; m++) {
5159 GemmMicrokernelTester()
5160 .mr(4)
5161 .nr(8)
5162 .kr(1)
5163 .sr(1)
5164 .m(m)
5165 .n(n)
5166 .k(k)
5167 .iterations(1)
5168 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5169 }
5170 }
5171 }
5172 }
5173
5174 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_div_8) {
5175 TEST_REQUIRES_ARM_NEON;
5176 for (uint32_t n = 16; n <= 24; n += 8) {
5177 for (size_t k = 1; k <= 10; k += 3) {
5178 GemmMicrokernelTester()
5179 .mr(4)
5180 .nr(8)
5181 .kr(1)
5182 .sr(1)
5183 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005184 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005185 .k(k)
5186 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5187 }
5188 }
5189 }
5190
5191 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_div_8_strided_cn) {
5192 TEST_REQUIRES_ARM_NEON;
5193 for (uint32_t n = 16; n <= 24; n += 8) {
5194 for (size_t k = 1; k <= 10; k += 3) {
5195 GemmMicrokernelTester()
5196 .mr(4)
5197 .nr(8)
5198 .kr(1)
5199 .sr(1)
5200 .m(4)
5201 .n(n)
5202 .k(k)
5203 .cn_stride(11)
5204 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5205 }
5206 }
5207 }
5208
5209 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_div_8_strided_a) {
5210 TEST_REQUIRES_ARM_NEON;
5211 for (uint32_t n = 16; n <= 24; n += 8) {
5212 for (size_t k = 1; k <= 10; k += 3) {
5213 GemmMicrokernelTester()
5214 .mr(4)
5215 .nr(8)
5216 .kr(1)
5217 .sr(1)
5218 .m(4)
5219 .n(n)
5220 .k(k)
5221 .a_stride(13)
5222 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5223 }
5224 }
5225 }
5226
5227 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, n_div_8_subtile) {
5228 TEST_REQUIRES_ARM_NEON;
5229 for (uint32_t n = 16; n <= 24; n += 8) {
5230 for (size_t k = 1; k <= 10; k += 3) {
5231 for (uint32_t m = 1; m <= 4; m++) {
5232 GemmMicrokernelTester()
5233 .mr(4)
5234 .nr(8)
5235 .kr(1)
5236 .sr(1)
5237 .m(m)
5238 .n(n)
5239 .k(k)
5240 .iterations(1)
5241 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5242 }
5243 }
5244 }
5245 }
5246
5247 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, strided_cm_subtile) {
5248 TEST_REQUIRES_ARM_NEON;
5249 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005250 for (uint32_t n = 1; n <= 8; n++) {
5251 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005252 GemmMicrokernelTester()
5253 .mr(4)
5254 .nr(8)
5255 .kr(1)
5256 .sr(1)
5257 .m(m)
5258 .n(n)
5259 .k(k)
5260 .cm_stride(11)
5261 .iterations(1)
5262 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5263 }
5264 }
5265 }
5266 }
5267
5268 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, qmin) {
5269 TEST_REQUIRES_ARM_NEON;
5270 GemmMicrokernelTester()
5271 .mr(4)
5272 .nr(8)
5273 .kr(1)
5274 .sr(1)
5275 .m(4)
5276 .n(8)
5277 .k(2)
5278 .qmin(128)
5279 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5280 }
5281
5282 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, qmax) {
5283 TEST_REQUIRES_ARM_NEON;
5284 GemmMicrokernelTester()
5285 .mr(4)
5286 .nr(8)
5287 .kr(1)
5288 .sr(1)
5289 .m(4)
5290 .n(8)
5291 .k(2)
5292 .qmax(128)
5293 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5294 }
5295
5296 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD64, strided_cm) {
5297 TEST_REQUIRES_ARM_NEON;
5298 GemmMicrokernelTester()
5299 .mr(4)
5300 .nr(8)
5301 .kr(1)
5302 .sr(1)
5303 .m(4)
5304 .n(8)
5305 .k(2)
5306 .cm_stride(11)
5307 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5308 }
5309#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5310
5311
5312#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5313 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4) {
5314 TEST_REQUIRES_ARM_NEON;
5315 GemmMicrokernelTester()
5316 .mr(4)
5317 .nr(8)
5318 .kr(1)
5319 .sr(1)
5320 .m(4)
5321 .n(8)
5322 .k(4)
5323 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5324 }
5325
5326 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, strided_cn) {
5327 TEST_REQUIRES_ARM_NEON;
5328 GemmMicrokernelTester()
5329 .mr(4)
5330 .nr(8)
5331 .kr(1)
5332 .sr(1)
5333 .m(4)
5334 .n(8)
5335 .k(4)
5336 .cn_stride(11)
5337 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5338 }
5339
5340 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_strided_a) {
5341 TEST_REQUIRES_ARM_NEON;
5342 GemmMicrokernelTester()
5343 .mr(4)
5344 .nr(8)
5345 .kr(1)
5346 .sr(1)
5347 .m(4)
5348 .n(8)
5349 .k(4)
5350 .a_stride(7)
5351 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5352 }
5353
5354 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile) {
5355 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005356 for (uint32_t n = 1; n <= 8; n++) {
5357 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005358 GemmMicrokernelTester()
5359 .mr(4)
5360 .nr(8)
5361 .kr(1)
5362 .sr(1)
5363 .m(m)
5364 .n(n)
5365 .k(4)
5366 .iterations(1)
5367 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5368 }
5369 }
5370 }
5371
5372 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
5373 TEST_REQUIRES_ARM_NEON;
5374 for (uint32_t m = 1; m <= 4; m++) {
5375 GemmMicrokernelTester()
5376 .mr(4)
5377 .nr(8)
5378 .kr(1)
5379 .sr(1)
5380 .m(m)
5381 .n(8)
5382 .k(4)
5383 .iterations(1)
5384 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5385 }
5386 }
5387
5388 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
5389 TEST_REQUIRES_ARM_NEON;
5390 for (uint32_t n = 1; n <= 8; n++) {
5391 GemmMicrokernelTester()
5392 .mr(4)
5393 .nr(8)
5394 .kr(1)
5395 .sr(1)
5396 .m(4)
5397 .n(n)
5398 .k(4)
5399 .iterations(1)
5400 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5401 }
5402 }
5403
5404 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_lt_4) {
5405 TEST_REQUIRES_ARM_NEON;
5406 for (size_t k = 1; k < 4; k++) {
5407 GemmMicrokernelTester()
5408 .mr(4)
5409 .nr(8)
5410 .kr(1)
5411 .sr(1)
5412 .m(4)
5413 .n(8)
5414 .k(k)
5415 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5416 }
5417 }
5418
5419 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_lt_4_strided_a) {
5420 TEST_REQUIRES_ARM_NEON;
5421 for (size_t k = 1; k < 4; k++) {
5422 GemmMicrokernelTester()
5423 .mr(4)
5424 .nr(8)
5425 .kr(1)
5426 .sr(1)
5427 .m(4)
5428 .n(8)
5429 .k(k)
5430 .a_stride(7)
5431 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5432 }
5433 }
5434
5435 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_lt_4_subtile) {
5436 TEST_REQUIRES_ARM_NEON;
5437 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005438 for (uint32_t n = 1; n <= 8; n++) {
5439 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005440 GemmMicrokernelTester()
5441 .mr(4)
5442 .nr(8)
5443 .kr(1)
5444 .sr(1)
5445 .m(m)
5446 .n(n)
5447 .k(k)
5448 .iterations(1)
5449 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5450 }
5451 }
5452 }
5453 }
5454
5455 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_gt_4) {
5456 TEST_REQUIRES_ARM_NEON;
5457 for (size_t k = 5; k < 8; k++) {
5458 GemmMicrokernelTester()
5459 .mr(4)
5460 .nr(8)
5461 .kr(1)
5462 .sr(1)
5463 .m(4)
5464 .n(8)
5465 .k(k)
5466 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5467 }
5468 }
5469
5470 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_gt_4_strided_a) {
5471 TEST_REQUIRES_ARM_NEON;
5472 for (size_t k = 5; k < 8; k++) {
5473 GemmMicrokernelTester()
5474 .mr(4)
5475 .nr(8)
5476 .kr(1)
5477 .sr(1)
5478 .m(4)
5479 .n(8)
5480 .k(k)
5481 .a_stride(11)
5482 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5483 }
5484 }
5485
5486 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_gt_4_subtile) {
5487 TEST_REQUIRES_ARM_NEON;
5488 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005489 for (uint32_t n = 1; n <= 8; n++) {
5490 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005491 GemmMicrokernelTester()
5492 .mr(4)
5493 .nr(8)
5494 .kr(1)
5495 .sr(1)
5496 .m(m)
5497 .n(n)
5498 .k(k)
5499 .iterations(1)
5500 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5501 }
5502 }
5503 }
5504 }
5505
5506 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_div_4) {
5507 TEST_REQUIRES_ARM_NEON;
5508 for (size_t k = 8; k <= 40; k += 4) {
5509 GemmMicrokernelTester()
5510 .mr(4)
5511 .nr(8)
5512 .kr(1)
5513 .sr(1)
5514 .m(4)
5515 .n(8)
5516 .k(k)
5517 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5518 }
5519 }
5520
5521 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_div_4_strided_a) {
5522 TEST_REQUIRES_ARM_NEON;
5523 for (size_t k = 8; k <= 40; k += 4) {
5524 GemmMicrokernelTester()
5525 .mr(4)
5526 .nr(8)
5527 .kr(1)
5528 .sr(1)
5529 .m(4)
5530 .n(8)
5531 .k(k)
5532 .a_stride(43)
5533 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5534 }
5535 }
5536
5537 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, k_div_4_subtile) {
5538 TEST_REQUIRES_ARM_NEON;
5539 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005540 for (uint32_t n = 1; n <= 8; n++) {
5541 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005542 GemmMicrokernelTester()
5543 .mr(4)
5544 .nr(8)
5545 .kr(1)
5546 .sr(1)
5547 .m(m)
5548 .n(n)
5549 .k(k)
5550 .iterations(1)
5551 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5552 }
5553 }
5554 }
5555 }
5556
5557 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_gt_8) {
5558 TEST_REQUIRES_ARM_NEON;
5559 for (uint32_t n = 9; n < 16; n++) {
5560 for (size_t k = 1; k <= 20; k += 5) {
5561 GemmMicrokernelTester()
5562 .mr(4)
5563 .nr(8)
5564 .kr(1)
5565 .sr(1)
5566 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005567 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005568 .k(k)
5569 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5570 }
5571 }
5572 }
5573
5574 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
5575 TEST_REQUIRES_ARM_NEON;
5576 for (uint32_t n = 9; n < 16; n++) {
5577 for (size_t k = 1; k <= 20; k += 5) {
5578 GemmMicrokernelTester()
5579 .mr(4)
5580 .nr(8)
5581 .kr(1)
5582 .sr(1)
5583 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005584 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005585 .k(k)
5586 .cn_stride(11)
5587 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5588 }
5589 }
5590 }
5591
5592 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_strided_a) {
5593 TEST_REQUIRES_ARM_NEON;
5594 for (uint32_t n = 9; n < 16; n++) {
5595 for (size_t k = 1; k <= 20; k += 5) {
5596 GemmMicrokernelTester()
5597 .mr(4)
5598 .nr(8)
5599 .kr(1)
5600 .sr(1)
5601 .m(4)
5602 .n(n)
5603 .k(k)
5604 .a_stride(23)
5605 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5606 }
5607 }
5608 }
5609
5610 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_subtile) {
5611 TEST_REQUIRES_ARM_NEON;
5612 for (uint32_t n = 9; n < 16; n++) {
5613 for (size_t k = 1; k <= 20; k += 5) {
5614 for (uint32_t m = 1; m <= 4; m++) {
5615 GemmMicrokernelTester()
5616 .mr(4)
5617 .nr(8)
5618 .kr(1)
5619 .sr(1)
5620 .m(m)
5621 .n(n)
5622 .k(k)
5623 .iterations(1)
5624 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5625 }
5626 }
5627 }
5628 }
5629
5630 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_div_8) {
5631 TEST_REQUIRES_ARM_NEON;
5632 for (uint32_t n = 16; n <= 24; n += 8) {
5633 for (size_t k = 1; k <= 20; k += 5) {
5634 GemmMicrokernelTester()
5635 .mr(4)
5636 .nr(8)
5637 .kr(1)
5638 .sr(1)
5639 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005640 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005641 .k(k)
5642 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5643 }
5644 }
5645 }
5646
5647 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_div_8_strided_cn) {
5648 TEST_REQUIRES_ARM_NEON;
5649 for (uint32_t n = 16; n <= 24; n += 8) {
5650 for (size_t k = 1; k <= 20; k += 5) {
5651 GemmMicrokernelTester()
5652 .mr(4)
5653 .nr(8)
5654 .kr(1)
5655 .sr(1)
5656 .m(4)
5657 .n(n)
5658 .k(k)
5659 .cn_stride(11)
5660 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5661 }
5662 }
5663 }
5664
5665 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_div_8_strided_a) {
5666 TEST_REQUIRES_ARM_NEON;
5667 for (uint32_t n = 16; n <= 24; n += 8) {
5668 for (size_t k = 1; k <= 20; k += 5) {
5669 GemmMicrokernelTester()
5670 .mr(4)
5671 .nr(8)
5672 .kr(1)
5673 .sr(1)
5674 .m(4)
5675 .n(n)
5676 .k(k)
5677 .a_stride(23)
5678 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5679 }
5680 }
5681 }
5682
5683 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, n_div_8_subtile) {
5684 TEST_REQUIRES_ARM_NEON;
5685 for (uint32_t n = 16; n <= 24; n += 8) {
5686 for (size_t k = 1; k <= 20; k += 5) {
5687 for (uint32_t m = 1; m <= 4; m++) {
5688 GemmMicrokernelTester()
5689 .mr(4)
5690 .nr(8)
5691 .kr(1)
5692 .sr(1)
5693 .m(m)
5694 .n(n)
5695 .k(k)
5696 .iterations(1)
5697 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5698 }
5699 }
5700 }
5701 }
5702
5703 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, strided_cm_subtile) {
5704 TEST_REQUIRES_ARM_NEON;
5705 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005706 for (uint32_t n = 1; n <= 8; n++) {
5707 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005708 GemmMicrokernelTester()
5709 .mr(4)
5710 .nr(8)
5711 .kr(1)
5712 .sr(1)
5713 .m(m)
5714 .n(n)
5715 .k(k)
5716 .cm_stride(11)
5717 .iterations(1)
5718 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5719 }
5720 }
5721 }
5722 }
5723
5724 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, qmin) {
5725 TEST_REQUIRES_ARM_NEON;
5726 GemmMicrokernelTester()
5727 .mr(4)
5728 .nr(8)
5729 .kr(1)
5730 .sr(1)
5731 .m(4)
5732 .n(8)
5733 .k(4)
5734 .qmin(128)
5735 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5736 }
5737
5738 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, qmax) {
5739 TEST_REQUIRES_ARM_NEON;
5740 GemmMicrokernelTester()
5741 .mr(4)
5742 .nr(8)
5743 .kr(1)
5744 .sr(1)
5745 .m(4)
5746 .n(8)
5747 .k(4)
5748 .qmax(128)
5749 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5750 }
5751
5752 TEST(F32_GEMMINC_MINMAX_4X8__NEON_LANE_LD128, strided_cm) {
5753 TEST_REQUIRES_ARM_NEON;
5754 GemmMicrokernelTester()
5755 .mr(4)
5756 .nr(8)
5757 .kr(1)
5758 .sr(1)
5759 .m(4)
5760 .n(8)
5761 .k(4)
5762 .cm_stride(11)
5763 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
5764 }
5765#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5766
5767
5768#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5769 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2) {
5770 TEST_REQUIRES_ARM_NEON;
5771 GemmMicrokernelTester()
5772 .mr(5)
5773 .nr(8)
5774 .kr(1)
5775 .sr(1)
5776 .m(5)
5777 .n(8)
5778 .k(2)
5779 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5780 }
5781
5782 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, strided_cn) {
5783 TEST_REQUIRES_ARM_NEON;
5784 GemmMicrokernelTester()
5785 .mr(5)
5786 .nr(8)
5787 .kr(1)
5788 .sr(1)
5789 .m(5)
5790 .n(8)
5791 .k(2)
5792 .cn_stride(11)
5793 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5794 }
5795
5796 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_strided_a) {
5797 TEST_REQUIRES_ARM_NEON;
5798 GemmMicrokernelTester()
5799 .mr(5)
5800 .nr(8)
5801 .kr(1)
5802 .sr(1)
5803 .m(5)
5804 .n(8)
5805 .k(2)
5806 .a_stride(5)
5807 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5808 }
5809
5810 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_subtile) {
5811 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005812 for (uint32_t n = 1; n <= 8; n++) {
5813 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005814 GemmMicrokernelTester()
5815 .mr(5)
5816 .nr(8)
5817 .kr(1)
5818 .sr(1)
5819 .m(m)
5820 .n(n)
5821 .k(2)
5822 .iterations(1)
5823 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5824 }
5825 }
5826 }
5827
5828 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
5829 TEST_REQUIRES_ARM_NEON;
5830 for (uint32_t m = 1; m <= 5; m++) {
5831 GemmMicrokernelTester()
5832 .mr(5)
5833 .nr(8)
5834 .kr(1)
5835 .sr(1)
5836 .m(m)
5837 .n(8)
5838 .k(2)
5839 .iterations(1)
5840 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5841 }
5842 }
5843
5844 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
5845 TEST_REQUIRES_ARM_NEON;
5846 for (uint32_t n = 1; n <= 8; n++) {
5847 GemmMicrokernelTester()
5848 .mr(5)
5849 .nr(8)
5850 .kr(1)
5851 .sr(1)
5852 .m(5)
5853 .n(n)
5854 .k(2)
5855 .iterations(1)
5856 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5857 }
5858 }
5859
5860 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_lt_2) {
5861 TEST_REQUIRES_ARM_NEON;
5862 for (size_t k = 1; k < 2; k++) {
5863 GemmMicrokernelTester()
5864 .mr(5)
5865 .nr(8)
5866 .kr(1)
5867 .sr(1)
5868 .m(5)
5869 .n(8)
5870 .k(k)
5871 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5872 }
5873 }
5874
5875 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_lt_2_strided_a) {
5876 TEST_REQUIRES_ARM_NEON;
5877 for (size_t k = 1; k < 2; k++) {
5878 GemmMicrokernelTester()
5879 .mr(5)
5880 .nr(8)
5881 .kr(1)
5882 .sr(1)
5883 .m(5)
5884 .n(8)
5885 .k(k)
5886 .a_stride(5)
5887 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5888 }
5889 }
5890
5891 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_lt_2_subtile) {
5892 TEST_REQUIRES_ARM_NEON;
5893 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005894 for (uint32_t n = 1; n <= 8; n++) {
5895 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005896 GemmMicrokernelTester()
5897 .mr(5)
5898 .nr(8)
5899 .kr(1)
5900 .sr(1)
5901 .m(m)
5902 .n(n)
5903 .k(k)
5904 .iterations(1)
5905 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5906 }
5907 }
5908 }
5909 }
5910
5911 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_gt_2) {
5912 TEST_REQUIRES_ARM_NEON;
5913 for (size_t k = 3; k < 4; k++) {
5914 GemmMicrokernelTester()
5915 .mr(5)
5916 .nr(8)
5917 .kr(1)
5918 .sr(1)
5919 .m(5)
5920 .n(8)
5921 .k(k)
5922 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5923 }
5924 }
5925
5926 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_gt_2_strided_a) {
5927 TEST_REQUIRES_ARM_NEON;
5928 for (size_t k = 3; k < 4; k++) {
5929 GemmMicrokernelTester()
5930 .mr(5)
5931 .nr(8)
5932 .kr(1)
5933 .sr(1)
5934 .m(5)
5935 .n(8)
5936 .k(k)
5937 .a_stride(7)
5938 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5939 }
5940 }
5941
5942 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_gt_2_subtile) {
5943 TEST_REQUIRES_ARM_NEON;
5944 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005945 for (uint32_t n = 1; n <= 8; n++) {
5946 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005947 GemmMicrokernelTester()
5948 .mr(5)
5949 .nr(8)
5950 .kr(1)
5951 .sr(1)
5952 .m(m)
5953 .n(n)
5954 .k(k)
5955 .iterations(1)
5956 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5957 }
5958 }
5959 }
5960 }
5961
5962 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_div_2) {
5963 TEST_REQUIRES_ARM_NEON;
5964 for (size_t k = 4; k <= 20; k += 2) {
5965 GemmMicrokernelTester()
5966 .mr(5)
5967 .nr(8)
5968 .kr(1)
5969 .sr(1)
5970 .m(5)
5971 .n(8)
5972 .k(k)
5973 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5974 }
5975 }
5976
5977 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_div_2_strided_a) {
5978 TEST_REQUIRES_ARM_NEON;
5979 for (size_t k = 4; k <= 20; k += 2) {
5980 GemmMicrokernelTester()
5981 .mr(5)
5982 .nr(8)
5983 .kr(1)
5984 .sr(1)
5985 .m(5)
5986 .n(8)
5987 .k(k)
5988 .a_stride(23)
5989 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
5990 }
5991 }
5992
5993 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, k_div_2_subtile) {
5994 TEST_REQUIRES_ARM_NEON;
5995 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005996 for (uint32_t n = 1; n <= 8; n++) {
5997 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005998 GemmMicrokernelTester()
5999 .mr(5)
6000 .nr(8)
6001 .kr(1)
6002 .sr(1)
6003 .m(m)
6004 .n(n)
6005 .k(k)
6006 .iterations(1)
6007 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6008 }
6009 }
6010 }
6011 }
6012
6013 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_gt_8) {
6014 TEST_REQUIRES_ARM_NEON;
6015 for (uint32_t n = 9; n < 16; n++) {
6016 for (size_t k = 1; k <= 10; k += 3) {
6017 GemmMicrokernelTester()
6018 .mr(5)
6019 .nr(8)
6020 .kr(1)
6021 .sr(1)
6022 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006023 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006024 .k(k)
6025 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6026 }
6027 }
6028 }
6029
6030 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
6031 TEST_REQUIRES_ARM_NEON;
6032 for (uint32_t n = 9; n < 16; n++) {
6033 for (size_t k = 1; k <= 10; k += 3) {
6034 GemmMicrokernelTester()
6035 .mr(5)
6036 .nr(8)
6037 .kr(1)
6038 .sr(1)
6039 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006040 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006041 .k(k)
6042 .cn_stride(11)
6043 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6044 }
6045 }
6046 }
6047
6048 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_gt_8_strided_a) {
6049 TEST_REQUIRES_ARM_NEON;
6050 for (uint32_t n = 9; n < 16; n++) {
6051 for (size_t k = 1; k <= 10; k += 3) {
6052 GemmMicrokernelTester()
6053 .mr(5)
6054 .nr(8)
6055 .kr(1)
6056 .sr(1)
6057 .m(5)
6058 .n(n)
6059 .k(k)
6060 .a_stride(13)
6061 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6062 }
6063 }
6064 }
6065
6066 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_gt_8_subtile) {
6067 TEST_REQUIRES_ARM_NEON;
6068 for (uint32_t n = 9; n < 16; n++) {
6069 for (size_t k = 1; k <= 10; k += 3) {
6070 for (uint32_t m = 1; m <= 5; m++) {
6071 GemmMicrokernelTester()
6072 .mr(5)
6073 .nr(8)
6074 .kr(1)
6075 .sr(1)
6076 .m(m)
6077 .n(n)
6078 .k(k)
6079 .iterations(1)
6080 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6081 }
6082 }
6083 }
6084 }
6085
6086 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_div_8) {
6087 TEST_REQUIRES_ARM_NEON;
6088 for (uint32_t n = 16; n <= 24; n += 8) {
6089 for (size_t k = 1; k <= 10; k += 3) {
6090 GemmMicrokernelTester()
6091 .mr(5)
6092 .nr(8)
6093 .kr(1)
6094 .sr(1)
6095 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006096 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006097 .k(k)
6098 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6099 }
6100 }
6101 }
6102
6103 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_div_8_strided_cn) {
6104 TEST_REQUIRES_ARM_NEON;
6105 for (uint32_t n = 16; n <= 24; n += 8) {
6106 for (size_t k = 1; k <= 10; k += 3) {
6107 GemmMicrokernelTester()
6108 .mr(5)
6109 .nr(8)
6110 .kr(1)
6111 .sr(1)
6112 .m(5)
6113 .n(n)
6114 .k(k)
6115 .cn_stride(11)
6116 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6117 }
6118 }
6119 }
6120
6121 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_div_8_strided_a) {
6122 TEST_REQUIRES_ARM_NEON;
6123 for (uint32_t n = 16; n <= 24; n += 8) {
6124 for (size_t k = 1; k <= 10; k += 3) {
6125 GemmMicrokernelTester()
6126 .mr(5)
6127 .nr(8)
6128 .kr(1)
6129 .sr(1)
6130 .m(5)
6131 .n(n)
6132 .k(k)
6133 .a_stride(13)
6134 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6135 }
6136 }
6137 }
6138
6139 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, n_div_8_subtile) {
6140 TEST_REQUIRES_ARM_NEON;
6141 for (uint32_t n = 16; n <= 24; n += 8) {
6142 for (size_t k = 1; k <= 10; k += 3) {
6143 for (uint32_t m = 1; m <= 5; m++) {
6144 GemmMicrokernelTester()
6145 .mr(5)
6146 .nr(8)
6147 .kr(1)
6148 .sr(1)
6149 .m(m)
6150 .n(n)
6151 .k(k)
6152 .iterations(1)
6153 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6154 }
6155 }
6156 }
6157 }
6158
6159 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, strided_cm_subtile) {
6160 TEST_REQUIRES_ARM_NEON;
6161 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006162 for (uint32_t n = 1; n <= 8; n++) {
6163 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006164 GemmMicrokernelTester()
6165 .mr(5)
6166 .nr(8)
6167 .kr(1)
6168 .sr(1)
6169 .m(m)
6170 .n(n)
6171 .k(k)
6172 .cm_stride(11)
6173 .iterations(1)
6174 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6175 }
6176 }
6177 }
6178 }
6179
6180 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, qmin) {
6181 TEST_REQUIRES_ARM_NEON;
6182 GemmMicrokernelTester()
6183 .mr(5)
6184 .nr(8)
6185 .kr(1)
6186 .sr(1)
6187 .m(5)
6188 .n(8)
6189 .k(2)
6190 .qmin(128)
6191 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6192 }
6193
6194 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, qmax) {
6195 TEST_REQUIRES_ARM_NEON;
6196 GemmMicrokernelTester()
6197 .mr(5)
6198 .nr(8)
6199 .kr(1)
6200 .sr(1)
6201 .m(5)
6202 .n(8)
6203 .k(2)
6204 .qmax(128)
6205 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6206 }
6207
6208 TEST(F32_GEMMINC_MINMAX_5X8__NEON_LANE_LD64, strided_cm) {
6209 TEST_REQUIRES_ARM_NEON;
6210 GemmMicrokernelTester()
6211 .mr(5)
6212 .nr(8)
6213 .kr(1)
6214 .sr(1)
6215 .m(5)
6216 .n(8)
6217 .k(2)
6218 .cm_stride(11)
6219 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6220 }
6221#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6222
6223
6224#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6225 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4) {
6226 TEST_REQUIRES_ARM_NEON;
6227 GemmMicrokernelTester()
6228 .mr(6)
6229 .nr(8)
6230 .kr(1)
6231 .sr(1)
6232 .m(6)
6233 .n(8)
6234 .k(4)
6235 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6236 }
6237
6238 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, strided_cn) {
6239 TEST_REQUIRES_ARM_NEON;
6240 GemmMicrokernelTester()
6241 .mr(6)
6242 .nr(8)
6243 .kr(1)
6244 .sr(1)
6245 .m(6)
6246 .n(8)
6247 .k(4)
6248 .cn_stride(11)
6249 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6250 }
6251
6252 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_strided_a) {
6253 TEST_REQUIRES_ARM_NEON;
6254 GemmMicrokernelTester()
6255 .mr(6)
6256 .nr(8)
6257 .kr(1)
6258 .sr(1)
6259 .m(6)
6260 .n(8)
6261 .k(4)
6262 .a_stride(7)
6263 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6264 }
6265
6266 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile) {
6267 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006268 for (uint32_t n = 1; n <= 8; n++) {
6269 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006270 GemmMicrokernelTester()
6271 .mr(6)
6272 .nr(8)
6273 .kr(1)
6274 .sr(1)
6275 .m(m)
6276 .n(n)
6277 .k(4)
6278 .iterations(1)
6279 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6280 }
6281 }
6282 }
6283
6284 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
6285 TEST_REQUIRES_ARM_NEON;
6286 for (uint32_t m = 1; m <= 6; m++) {
6287 GemmMicrokernelTester()
6288 .mr(6)
6289 .nr(8)
6290 .kr(1)
6291 .sr(1)
6292 .m(m)
6293 .n(8)
6294 .k(4)
6295 .iterations(1)
6296 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6297 }
6298 }
6299
6300 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
6301 TEST_REQUIRES_ARM_NEON;
6302 for (uint32_t n = 1; n <= 8; n++) {
6303 GemmMicrokernelTester()
6304 .mr(6)
6305 .nr(8)
6306 .kr(1)
6307 .sr(1)
6308 .m(6)
6309 .n(n)
6310 .k(4)
6311 .iterations(1)
6312 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6313 }
6314 }
6315
6316 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_lt_4) {
6317 TEST_REQUIRES_ARM_NEON;
6318 for (size_t k = 1; k < 4; k++) {
6319 GemmMicrokernelTester()
6320 .mr(6)
6321 .nr(8)
6322 .kr(1)
6323 .sr(1)
6324 .m(6)
6325 .n(8)
6326 .k(k)
6327 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6328 }
6329 }
6330
6331 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_lt_4_strided_a) {
6332 TEST_REQUIRES_ARM_NEON;
6333 for (size_t k = 1; k < 4; k++) {
6334 GemmMicrokernelTester()
6335 .mr(6)
6336 .nr(8)
6337 .kr(1)
6338 .sr(1)
6339 .m(6)
6340 .n(8)
6341 .k(k)
6342 .a_stride(7)
6343 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6344 }
6345 }
6346
6347 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_lt_4_subtile) {
6348 TEST_REQUIRES_ARM_NEON;
6349 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006350 for (uint32_t n = 1; n <= 8; n++) {
6351 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006352 GemmMicrokernelTester()
6353 .mr(6)
6354 .nr(8)
6355 .kr(1)
6356 .sr(1)
6357 .m(m)
6358 .n(n)
6359 .k(k)
6360 .iterations(1)
6361 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6362 }
6363 }
6364 }
6365 }
6366
6367 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_gt_4) {
6368 TEST_REQUIRES_ARM_NEON;
6369 for (size_t k = 5; k < 8; k++) {
6370 GemmMicrokernelTester()
6371 .mr(6)
6372 .nr(8)
6373 .kr(1)
6374 .sr(1)
6375 .m(6)
6376 .n(8)
6377 .k(k)
6378 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6379 }
6380 }
6381
6382 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_gt_4_strided_a) {
6383 TEST_REQUIRES_ARM_NEON;
6384 for (size_t k = 5; k < 8; k++) {
6385 GemmMicrokernelTester()
6386 .mr(6)
6387 .nr(8)
6388 .kr(1)
6389 .sr(1)
6390 .m(6)
6391 .n(8)
6392 .k(k)
6393 .a_stride(11)
6394 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6395 }
6396 }
6397
6398 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_gt_4_subtile) {
6399 TEST_REQUIRES_ARM_NEON;
6400 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006401 for (uint32_t n = 1; n <= 8; n++) {
6402 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006403 GemmMicrokernelTester()
6404 .mr(6)
6405 .nr(8)
6406 .kr(1)
6407 .sr(1)
6408 .m(m)
6409 .n(n)
6410 .k(k)
6411 .iterations(1)
6412 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6413 }
6414 }
6415 }
6416 }
6417
6418 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_div_4) {
6419 TEST_REQUIRES_ARM_NEON;
6420 for (size_t k = 8; k <= 40; k += 4) {
6421 GemmMicrokernelTester()
6422 .mr(6)
6423 .nr(8)
6424 .kr(1)
6425 .sr(1)
6426 .m(6)
6427 .n(8)
6428 .k(k)
6429 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6430 }
6431 }
6432
6433 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_div_4_strided_a) {
6434 TEST_REQUIRES_ARM_NEON;
6435 for (size_t k = 8; k <= 40; k += 4) {
6436 GemmMicrokernelTester()
6437 .mr(6)
6438 .nr(8)
6439 .kr(1)
6440 .sr(1)
6441 .m(6)
6442 .n(8)
6443 .k(k)
6444 .a_stride(43)
6445 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6446 }
6447 }
6448
6449 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, k_div_4_subtile) {
6450 TEST_REQUIRES_ARM_NEON;
6451 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006452 for (uint32_t n = 1; n <= 8; n++) {
6453 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006454 GemmMicrokernelTester()
6455 .mr(6)
6456 .nr(8)
6457 .kr(1)
6458 .sr(1)
6459 .m(m)
6460 .n(n)
6461 .k(k)
6462 .iterations(1)
6463 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6464 }
6465 }
6466 }
6467 }
6468
6469 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_gt_8) {
6470 TEST_REQUIRES_ARM_NEON;
6471 for (uint32_t n = 9; n < 16; n++) {
6472 for (size_t k = 1; k <= 20; k += 5) {
6473 GemmMicrokernelTester()
6474 .mr(6)
6475 .nr(8)
6476 .kr(1)
6477 .sr(1)
6478 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006479 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006480 .k(k)
6481 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6482 }
6483 }
6484 }
6485
6486 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
6487 TEST_REQUIRES_ARM_NEON;
6488 for (uint32_t n = 9; n < 16; n++) {
6489 for (size_t k = 1; k <= 20; k += 5) {
6490 GemmMicrokernelTester()
6491 .mr(6)
6492 .nr(8)
6493 .kr(1)
6494 .sr(1)
6495 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006496 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006497 .k(k)
6498 .cn_stride(11)
6499 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6500 }
6501 }
6502 }
6503
6504 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_strided_a) {
6505 TEST_REQUIRES_ARM_NEON;
6506 for (uint32_t n = 9; n < 16; n++) {
6507 for (size_t k = 1; k <= 20; k += 5) {
6508 GemmMicrokernelTester()
6509 .mr(6)
6510 .nr(8)
6511 .kr(1)
6512 .sr(1)
6513 .m(6)
6514 .n(n)
6515 .k(k)
6516 .a_stride(23)
6517 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6518 }
6519 }
6520 }
6521
6522 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_subtile) {
6523 TEST_REQUIRES_ARM_NEON;
6524 for (uint32_t n = 9; n < 16; n++) {
6525 for (size_t k = 1; k <= 20; k += 5) {
6526 for (uint32_t m = 1; m <= 6; m++) {
6527 GemmMicrokernelTester()
6528 .mr(6)
6529 .nr(8)
6530 .kr(1)
6531 .sr(1)
6532 .m(m)
6533 .n(n)
6534 .k(k)
6535 .iterations(1)
6536 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6537 }
6538 }
6539 }
6540 }
6541
6542 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_div_8) {
6543 TEST_REQUIRES_ARM_NEON;
6544 for (uint32_t n = 16; n <= 24; n += 8) {
6545 for (size_t k = 1; k <= 20; k += 5) {
6546 GemmMicrokernelTester()
6547 .mr(6)
6548 .nr(8)
6549 .kr(1)
6550 .sr(1)
6551 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006552 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006553 .k(k)
6554 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6555 }
6556 }
6557 }
6558
6559 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_div_8_strided_cn) {
6560 TEST_REQUIRES_ARM_NEON;
6561 for (uint32_t n = 16; n <= 24; n += 8) {
6562 for (size_t k = 1; k <= 20; k += 5) {
6563 GemmMicrokernelTester()
6564 .mr(6)
6565 .nr(8)
6566 .kr(1)
6567 .sr(1)
6568 .m(6)
6569 .n(n)
6570 .k(k)
6571 .cn_stride(11)
6572 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6573 }
6574 }
6575 }
6576
6577 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_div_8_strided_a) {
6578 TEST_REQUIRES_ARM_NEON;
6579 for (uint32_t n = 16; n <= 24; n += 8) {
6580 for (size_t k = 1; k <= 20; k += 5) {
6581 GemmMicrokernelTester()
6582 .mr(6)
6583 .nr(8)
6584 .kr(1)
6585 .sr(1)
6586 .m(6)
6587 .n(n)
6588 .k(k)
6589 .a_stride(23)
6590 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6591 }
6592 }
6593 }
6594
6595 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, n_div_8_subtile) {
6596 TEST_REQUIRES_ARM_NEON;
6597 for (uint32_t n = 16; n <= 24; n += 8) {
6598 for (size_t k = 1; k <= 20; k += 5) {
6599 for (uint32_t m = 1; m <= 6; m++) {
6600 GemmMicrokernelTester()
6601 .mr(6)
6602 .nr(8)
6603 .kr(1)
6604 .sr(1)
6605 .m(m)
6606 .n(n)
6607 .k(k)
6608 .iterations(1)
6609 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6610 }
6611 }
6612 }
6613 }
6614
6615 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, strided_cm_subtile) {
6616 TEST_REQUIRES_ARM_NEON;
6617 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006618 for (uint32_t n = 1; n <= 8; n++) {
6619 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006620 GemmMicrokernelTester()
6621 .mr(6)
6622 .nr(8)
6623 .kr(1)
6624 .sr(1)
6625 .m(m)
6626 .n(n)
6627 .k(k)
6628 .cm_stride(11)
6629 .iterations(1)
6630 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6631 }
6632 }
6633 }
6634 }
6635
6636 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, qmin) {
6637 TEST_REQUIRES_ARM_NEON;
6638 GemmMicrokernelTester()
6639 .mr(6)
6640 .nr(8)
6641 .kr(1)
6642 .sr(1)
6643 .m(6)
6644 .n(8)
6645 .k(4)
6646 .qmin(128)
6647 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6648 }
6649
6650 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, qmax) {
6651 TEST_REQUIRES_ARM_NEON;
6652 GemmMicrokernelTester()
6653 .mr(6)
6654 .nr(8)
6655 .kr(1)
6656 .sr(1)
6657 .m(6)
6658 .n(8)
6659 .k(4)
6660 .qmax(128)
6661 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6662 }
6663
6664 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD128, strided_cm) {
6665 TEST_REQUIRES_ARM_NEON;
6666 GemmMicrokernelTester()
6667 .mr(6)
6668 .nr(8)
6669 .kr(1)
6670 .sr(1)
6671 .m(6)
6672 .n(8)
6673 .k(4)
6674 .cm_stride(11)
6675 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6676 }
6677#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6678
6679
6680#if XNN_ARCH_ARM64
6681 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4) {
6682 TEST_REQUIRES_ARM_NEON_FMA;
6683 GemmMicrokernelTester()
6684 .mr(4)
6685 .nr(8)
6686 .kr(1)
6687 .sr(1)
6688 .m(4)
6689 .n(8)
6690 .k(4)
6691 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6692 }
6693
6694 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cn) {
6695 TEST_REQUIRES_ARM_NEON_FMA;
6696 GemmMicrokernelTester()
6697 .mr(4)
6698 .nr(8)
6699 .kr(1)
6700 .sr(1)
6701 .m(4)
6702 .n(8)
6703 .k(4)
6704 .cn_stride(11)
6705 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6706 }
6707
6708 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_strided_a) {
6709 TEST_REQUIRES_ARM_NEON_FMA;
6710 GemmMicrokernelTester()
6711 .mr(4)
6712 .nr(8)
6713 .kr(1)
6714 .sr(1)
6715 .m(4)
6716 .n(8)
6717 .k(4)
6718 .a_stride(7)
6719 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6720 }
6721
6722 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile) {
6723 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006724 for (uint32_t n = 1; n <= 8; n++) {
6725 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006726 GemmMicrokernelTester()
6727 .mr(4)
6728 .nr(8)
6729 .kr(1)
6730 .sr(1)
6731 .m(m)
6732 .n(n)
6733 .k(4)
6734 .iterations(1)
6735 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6736 }
6737 }
6738 }
6739
6740 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_m) {
6741 TEST_REQUIRES_ARM_NEON_FMA;
6742 for (uint32_t m = 1; m <= 4; m++) {
6743 GemmMicrokernelTester()
6744 .mr(4)
6745 .nr(8)
6746 .kr(1)
6747 .sr(1)
6748 .m(m)
6749 .n(8)
6750 .k(4)
6751 .iterations(1)
6752 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6753 }
6754 }
6755
6756 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_n) {
6757 TEST_REQUIRES_ARM_NEON_FMA;
6758 for (uint32_t n = 1; n <= 8; n++) {
6759 GemmMicrokernelTester()
6760 .mr(4)
6761 .nr(8)
6762 .kr(1)
6763 .sr(1)
6764 .m(4)
6765 .n(n)
6766 .k(4)
6767 .iterations(1)
6768 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6769 }
6770 }
6771
6772 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4) {
6773 TEST_REQUIRES_ARM_NEON_FMA;
6774 for (size_t k = 1; k < 4; k++) {
6775 GemmMicrokernelTester()
6776 .mr(4)
6777 .nr(8)
6778 .kr(1)
6779 .sr(1)
6780 .m(4)
6781 .n(8)
6782 .k(k)
6783 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6784 }
6785 }
6786
6787 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4_strided_a) {
6788 TEST_REQUIRES_ARM_NEON_FMA;
6789 for (size_t k = 1; k < 4; k++) {
6790 GemmMicrokernelTester()
6791 .mr(4)
6792 .nr(8)
6793 .kr(1)
6794 .sr(1)
6795 .m(4)
6796 .n(8)
6797 .k(k)
6798 .a_stride(7)
6799 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6800 }
6801 }
6802
6803 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4_subtile) {
6804 TEST_REQUIRES_ARM_NEON_FMA;
6805 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006806 for (uint32_t n = 1; n <= 8; n++) {
6807 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006808 GemmMicrokernelTester()
6809 .mr(4)
6810 .nr(8)
6811 .kr(1)
6812 .sr(1)
6813 .m(m)
6814 .n(n)
6815 .k(k)
6816 .iterations(1)
6817 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6818 }
6819 }
6820 }
6821 }
6822
6823 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4) {
6824 TEST_REQUIRES_ARM_NEON_FMA;
6825 for (size_t k = 5; k < 8; k++) {
6826 GemmMicrokernelTester()
6827 .mr(4)
6828 .nr(8)
6829 .kr(1)
6830 .sr(1)
6831 .m(4)
6832 .n(8)
6833 .k(k)
6834 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6835 }
6836 }
6837
6838 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4_strided_a) {
6839 TEST_REQUIRES_ARM_NEON_FMA;
6840 for (size_t k = 5; k < 8; k++) {
6841 GemmMicrokernelTester()
6842 .mr(4)
6843 .nr(8)
6844 .kr(1)
6845 .sr(1)
6846 .m(4)
6847 .n(8)
6848 .k(k)
6849 .a_stride(11)
6850 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6851 }
6852 }
6853
6854 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4_subtile) {
6855 TEST_REQUIRES_ARM_NEON_FMA;
6856 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006857 for (uint32_t n = 1; n <= 8; n++) {
6858 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006859 GemmMicrokernelTester()
6860 .mr(4)
6861 .nr(8)
6862 .kr(1)
6863 .sr(1)
6864 .m(m)
6865 .n(n)
6866 .k(k)
6867 .iterations(1)
6868 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6869 }
6870 }
6871 }
6872 }
6873
6874 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4) {
6875 TEST_REQUIRES_ARM_NEON_FMA;
6876 for (size_t k = 8; k <= 40; k += 4) {
6877 GemmMicrokernelTester()
6878 .mr(4)
6879 .nr(8)
6880 .kr(1)
6881 .sr(1)
6882 .m(4)
6883 .n(8)
6884 .k(k)
6885 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6886 }
6887 }
6888
6889 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4_strided_a) {
6890 TEST_REQUIRES_ARM_NEON_FMA;
6891 for (size_t k = 8; k <= 40; k += 4) {
6892 GemmMicrokernelTester()
6893 .mr(4)
6894 .nr(8)
6895 .kr(1)
6896 .sr(1)
6897 .m(4)
6898 .n(8)
6899 .k(k)
6900 .a_stride(43)
6901 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6902 }
6903 }
6904
6905 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4_subtile) {
6906 TEST_REQUIRES_ARM_NEON_FMA;
6907 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006908 for (uint32_t n = 1; n <= 8; n++) {
6909 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006910 GemmMicrokernelTester()
6911 .mr(4)
6912 .nr(8)
6913 .kr(1)
6914 .sr(1)
6915 .m(m)
6916 .n(n)
6917 .k(k)
6918 .iterations(1)
6919 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6920 }
6921 }
6922 }
6923 }
6924
6925 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8) {
6926 TEST_REQUIRES_ARM_NEON_FMA;
6927 for (uint32_t n = 9; n < 16; n++) {
6928 for (size_t k = 1; k <= 20; k += 5) {
6929 GemmMicrokernelTester()
6930 .mr(4)
6931 .nr(8)
6932 .kr(1)
6933 .sr(1)
6934 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006935 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006936 .k(k)
6937 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6938 }
6939 }
6940 }
6941
6942 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_strided_cn) {
6943 TEST_REQUIRES_ARM_NEON_FMA;
6944 for (uint32_t n = 9; n < 16; n++) {
6945 for (size_t k = 1; k <= 20; k += 5) {
6946 GemmMicrokernelTester()
6947 .mr(4)
6948 .nr(8)
6949 .kr(1)
6950 .sr(1)
6951 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006952 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006953 .k(k)
6954 .cn_stride(11)
6955 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6956 }
6957 }
6958 }
6959
6960 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_strided_a) {
6961 TEST_REQUIRES_ARM_NEON_FMA;
6962 for (uint32_t n = 9; n < 16; n++) {
6963 for (size_t k = 1; k <= 20; k += 5) {
6964 GemmMicrokernelTester()
6965 .mr(4)
6966 .nr(8)
6967 .kr(1)
6968 .sr(1)
6969 .m(4)
6970 .n(n)
6971 .k(k)
6972 .a_stride(23)
6973 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6974 }
6975 }
6976 }
6977
6978 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_subtile) {
6979 TEST_REQUIRES_ARM_NEON_FMA;
6980 for (uint32_t n = 9; n < 16; n++) {
6981 for (size_t k = 1; k <= 20; k += 5) {
6982 for (uint32_t m = 1; m <= 4; m++) {
6983 GemmMicrokernelTester()
6984 .mr(4)
6985 .nr(8)
6986 .kr(1)
6987 .sr(1)
6988 .m(m)
6989 .n(n)
6990 .k(k)
6991 .iterations(1)
6992 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
6993 }
6994 }
6995 }
6996 }
6997
6998 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8) {
6999 TEST_REQUIRES_ARM_NEON_FMA;
7000 for (uint32_t n = 16; n <= 24; n += 8) {
7001 for (size_t k = 1; k <= 20; k += 5) {
7002 GemmMicrokernelTester()
7003 .mr(4)
7004 .nr(8)
7005 .kr(1)
7006 .sr(1)
7007 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007008 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007009 .k(k)
7010 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7011 }
7012 }
7013 }
7014
7015 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_strided_cn) {
7016 TEST_REQUIRES_ARM_NEON_FMA;
7017 for (uint32_t n = 16; n <= 24; n += 8) {
7018 for (size_t k = 1; k <= 20; k += 5) {
7019 GemmMicrokernelTester()
7020 .mr(4)
7021 .nr(8)
7022 .kr(1)
7023 .sr(1)
7024 .m(4)
7025 .n(n)
7026 .k(k)
7027 .cn_stride(11)
7028 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7029 }
7030 }
7031 }
7032
7033 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_strided_a) {
7034 TEST_REQUIRES_ARM_NEON_FMA;
7035 for (uint32_t n = 16; n <= 24; n += 8) {
7036 for (size_t k = 1; k <= 20; k += 5) {
7037 GemmMicrokernelTester()
7038 .mr(4)
7039 .nr(8)
7040 .kr(1)
7041 .sr(1)
7042 .m(4)
7043 .n(n)
7044 .k(k)
7045 .a_stride(23)
7046 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7047 }
7048 }
7049 }
7050
7051 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_subtile) {
7052 TEST_REQUIRES_ARM_NEON_FMA;
7053 for (uint32_t n = 16; n <= 24; n += 8) {
7054 for (size_t k = 1; k <= 20; k += 5) {
7055 for (uint32_t m = 1; m <= 4; m++) {
7056 GemmMicrokernelTester()
7057 .mr(4)
7058 .nr(8)
7059 .kr(1)
7060 .sr(1)
7061 .m(m)
7062 .n(n)
7063 .k(k)
7064 .iterations(1)
7065 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7066 }
7067 }
7068 }
7069 }
7070
7071 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm_subtile) {
7072 TEST_REQUIRES_ARM_NEON_FMA;
7073 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007074 for (uint32_t n = 1; n <= 8; n++) {
7075 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007076 GemmMicrokernelTester()
7077 .mr(4)
7078 .nr(8)
7079 .kr(1)
7080 .sr(1)
7081 .m(m)
7082 .n(n)
7083 .k(k)
7084 .cm_stride(11)
7085 .iterations(1)
7086 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7087 }
7088 }
7089 }
7090 }
7091
7092 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, qmin) {
7093 TEST_REQUIRES_ARM_NEON_FMA;
7094 GemmMicrokernelTester()
7095 .mr(4)
7096 .nr(8)
7097 .kr(1)
7098 .sr(1)
7099 .m(4)
7100 .n(8)
7101 .k(4)
7102 .qmin(128)
7103 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7104 }
7105
7106 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, qmax) {
7107 TEST_REQUIRES_ARM_NEON_FMA;
7108 GemmMicrokernelTester()
7109 .mr(4)
7110 .nr(8)
7111 .kr(1)
7112 .sr(1)
7113 .m(4)
7114 .n(8)
7115 .k(4)
7116 .qmax(128)
7117 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7118 }
7119
7120 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm) {
7121 TEST_REQUIRES_ARM_NEON_FMA;
7122 GemmMicrokernelTester()
7123 .mr(4)
7124 .nr(8)
7125 .kr(1)
7126 .sr(1)
7127 .m(4)
7128 .n(8)
7129 .k(4)
7130 .cm_stride(11)
7131 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
7132 }
7133#endif // XNN_ARCH_ARM64
7134
7135
7136#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7137 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2) {
7138 TEST_REQUIRES_ARM_NEON;
7139 GemmMicrokernelTester()
7140 .mr(4)
7141 .nr(8)
7142 .kr(1)
7143 .sr(1)
7144 .m(4)
7145 .n(8)
7146 .k(2)
7147 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7148 }
7149
7150 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, strided_cn) {
7151 TEST_REQUIRES_ARM_NEON;
7152 GemmMicrokernelTester()
7153 .mr(4)
7154 .nr(8)
7155 .kr(1)
7156 .sr(1)
7157 .m(4)
7158 .n(8)
7159 .k(2)
7160 .cn_stride(11)
7161 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7162 }
7163
7164 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_strided_a) {
7165 TEST_REQUIRES_ARM_NEON;
7166 GemmMicrokernelTester()
7167 .mr(4)
7168 .nr(8)
7169 .kr(1)
7170 .sr(1)
7171 .m(4)
7172 .n(8)
7173 .k(2)
7174 .a_stride(5)
7175 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7176 }
7177
7178 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile) {
7179 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007180 for (uint32_t n = 1; n <= 8; n++) {
7181 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007182 GemmMicrokernelTester()
7183 .mr(4)
7184 .nr(8)
7185 .kr(1)
7186 .sr(1)
7187 .m(m)
7188 .n(n)
7189 .k(2)
7190 .iterations(1)
7191 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7192 }
7193 }
7194 }
7195
7196 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
7197 TEST_REQUIRES_ARM_NEON;
7198 for (uint32_t m = 1; m <= 4; m++) {
7199 GemmMicrokernelTester()
7200 .mr(4)
7201 .nr(8)
7202 .kr(1)
7203 .sr(1)
7204 .m(m)
7205 .n(8)
7206 .k(2)
7207 .iterations(1)
7208 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7209 }
7210 }
7211
7212 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
7213 TEST_REQUIRES_ARM_NEON;
7214 for (uint32_t n = 1; n <= 8; n++) {
7215 GemmMicrokernelTester()
7216 .mr(4)
7217 .nr(8)
7218 .kr(1)
7219 .sr(1)
7220 .m(4)
7221 .n(n)
7222 .k(2)
7223 .iterations(1)
7224 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7225 }
7226 }
7227
7228 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_lt_2) {
7229 TEST_REQUIRES_ARM_NEON;
7230 for (size_t k = 1; k < 2; k++) {
7231 GemmMicrokernelTester()
7232 .mr(4)
7233 .nr(8)
7234 .kr(1)
7235 .sr(1)
7236 .m(4)
7237 .n(8)
7238 .k(k)
7239 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7240 }
7241 }
7242
7243 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_lt_2_strided_a) {
7244 TEST_REQUIRES_ARM_NEON;
7245 for (size_t k = 1; k < 2; k++) {
7246 GemmMicrokernelTester()
7247 .mr(4)
7248 .nr(8)
7249 .kr(1)
7250 .sr(1)
7251 .m(4)
7252 .n(8)
7253 .k(k)
7254 .a_stride(5)
7255 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7256 }
7257 }
7258
7259 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_lt_2_subtile) {
7260 TEST_REQUIRES_ARM_NEON;
7261 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007262 for (uint32_t n = 1; n <= 8; n++) {
7263 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007264 GemmMicrokernelTester()
7265 .mr(4)
7266 .nr(8)
7267 .kr(1)
7268 .sr(1)
7269 .m(m)
7270 .n(n)
7271 .k(k)
7272 .iterations(1)
7273 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7274 }
7275 }
7276 }
7277 }
7278
7279 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_gt_2) {
7280 TEST_REQUIRES_ARM_NEON;
7281 for (size_t k = 3; k < 4; k++) {
7282 GemmMicrokernelTester()
7283 .mr(4)
7284 .nr(8)
7285 .kr(1)
7286 .sr(1)
7287 .m(4)
7288 .n(8)
7289 .k(k)
7290 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7291 }
7292 }
7293
7294 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_gt_2_strided_a) {
7295 TEST_REQUIRES_ARM_NEON;
7296 for (size_t k = 3; k < 4; k++) {
7297 GemmMicrokernelTester()
7298 .mr(4)
7299 .nr(8)
7300 .kr(1)
7301 .sr(1)
7302 .m(4)
7303 .n(8)
7304 .k(k)
7305 .a_stride(7)
7306 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7307 }
7308 }
7309
7310 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_gt_2_subtile) {
7311 TEST_REQUIRES_ARM_NEON;
7312 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007313 for (uint32_t n = 1; n <= 8; n++) {
7314 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007315 GemmMicrokernelTester()
7316 .mr(4)
7317 .nr(8)
7318 .kr(1)
7319 .sr(1)
7320 .m(m)
7321 .n(n)
7322 .k(k)
7323 .iterations(1)
7324 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7325 }
7326 }
7327 }
7328 }
7329
7330 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_div_2) {
7331 TEST_REQUIRES_ARM_NEON;
7332 for (size_t k = 4; k <= 20; k += 2) {
7333 GemmMicrokernelTester()
7334 .mr(4)
7335 .nr(8)
7336 .kr(1)
7337 .sr(1)
7338 .m(4)
7339 .n(8)
7340 .k(k)
7341 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7342 }
7343 }
7344
7345 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_div_2_strided_a) {
7346 TEST_REQUIRES_ARM_NEON;
7347 for (size_t k = 4; k <= 20; k += 2) {
7348 GemmMicrokernelTester()
7349 .mr(4)
7350 .nr(8)
7351 .kr(1)
7352 .sr(1)
7353 .m(4)
7354 .n(8)
7355 .k(k)
7356 .a_stride(23)
7357 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7358 }
7359 }
7360
7361 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, k_div_2_subtile) {
7362 TEST_REQUIRES_ARM_NEON;
7363 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007364 for (uint32_t n = 1; n <= 8; n++) {
7365 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007366 GemmMicrokernelTester()
7367 .mr(4)
7368 .nr(8)
7369 .kr(1)
7370 .sr(1)
7371 .m(m)
7372 .n(n)
7373 .k(k)
7374 .iterations(1)
7375 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7376 }
7377 }
7378 }
7379 }
7380
7381 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_gt_8) {
7382 TEST_REQUIRES_ARM_NEON;
7383 for (uint32_t n = 9; n < 16; n++) {
7384 for (size_t k = 1; k <= 10; k += 3) {
7385 GemmMicrokernelTester()
7386 .mr(4)
7387 .nr(8)
7388 .kr(1)
7389 .sr(1)
7390 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007391 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007392 .k(k)
7393 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7394 }
7395 }
7396 }
7397
7398 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
7399 TEST_REQUIRES_ARM_NEON;
7400 for (uint32_t n = 9; n < 16; n++) {
7401 for (size_t k = 1; k <= 10; k += 3) {
7402 GemmMicrokernelTester()
7403 .mr(4)
7404 .nr(8)
7405 .kr(1)
7406 .sr(1)
7407 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007408 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007409 .k(k)
7410 .cn_stride(11)
7411 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7412 }
7413 }
7414 }
7415
7416 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_strided_a) {
7417 TEST_REQUIRES_ARM_NEON;
7418 for (uint32_t n = 9; n < 16; n++) {
7419 for (size_t k = 1; k <= 10; k += 3) {
7420 GemmMicrokernelTester()
7421 .mr(4)
7422 .nr(8)
7423 .kr(1)
7424 .sr(1)
7425 .m(4)
7426 .n(n)
7427 .k(k)
7428 .a_stride(13)
7429 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7430 }
7431 }
7432 }
7433
7434 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_subtile) {
7435 TEST_REQUIRES_ARM_NEON;
7436 for (uint32_t n = 9; n < 16; n++) {
7437 for (size_t k = 1; k <= 10; k += 3) {
7438 for (uint32_t m = 1; m <= 4; m++) {
7439 GemmMicrokernelTester()
7440 .mr(4)
7441 .nr(8)
7442 .kr(1)
7443 .sr(1)
7444 .m(m)
7445 .n(n)
7446 .k(k)
7447 .iterations(1)
7448 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7449 }
7450 }
7451 }
7452 }
7453
7454 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_div_8) {
7455 TEST_REQUIRES_ARM_NEON;
7456 for (uint32_t n = 16; n <= 24; n += 8) {
7457 for (size_t k = 1; k <= 10; k += 3) {
7458 GemmMicrokernelTester()
7459 .mr(4)
7460 .nr(8)
7461 .kr(1)
7462 .sr(1)
7463 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007464 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007465 .k(k)
7466 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7467 }
7468 }
7469 }
7470
7471 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_div_8_strided_cn) {
7472 TEST_REQUIRES_ARM_NEON;
7473 for (uint32_t n = 16; n <= 24; n += 8) {
7474 for (size_t k = 1; k <= 10; k += 3) {
7475 GemmMicrokernelTester()
7476 .mr(4)
7477 .nr(8)
7478 .kr(1)
7479 .sr(1)
7480 .m(4)
7481 .n(n)
7482 .k(k)
7483 .cn_stride(11)
7484 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7485 }
7486 }
7487 }
7488
7489 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_div_8_strided_a) {
7490 TEST_REQUIRES_ARM_NEON;
7491 for (uint32_t n = 16; n <= 24; n += 8) {
7492 for (size_t k = 1; k <= 10; k += 3) {
7493 GemmMicrokernelTester()
7494 .mr(4)
7495 .nr(8)
7496 .kr(1)
7497 .sr(1)
7498 .m(4)
7499 .n(n)
7500 .k(k)
7501 .a_stride(13)
7502 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7503 }
7504 }
7505 }
7506
7507 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, n_div_8_subtile) {
7508 TEST_REQUIRES_ARM_NEON;
7509 for (uint32_t n = 16; n <= 24; n += 8) {
7510 for (size_t k = 1; k <= 10; k += 3) {
7511 for (uint32_t m = 1; m <= 4; m++) {
7512 GemmMicrokernelTester()
7513 .mr(4)
7514 .nr(8)
7515 .kr(1)
7516 .sr(1)
7517 .m(m)
7518 .n(n)
7519 .k(k)
7520 .iterations(1)
7521 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7522 }
7523 }
7524 }
7525 }
7526
7527 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, strided_cm_subtile) {
7528 TEST_REQUIRES_ARM_NEON;
7529 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007530 for (uint32_t n = 1; n <= 8; n++) {
7531 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007532 GemmMicrokernelTester()
7533 .mr(4)
7534 .nr(8)
7535 .kr(1)
7536 .sr(1)
7537 .m(m)
7538 .n(n)
7539 .k(k)
7540 .cm_stride(11)
7541 .iterations(1)
7542 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7543 }
7544 }
7545 }
7546 }
7547
7548 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, qmin) {
7549 TEST_REQUIRES_ARM_NEON;
7550 GemmMicrokernelTester()
7551 .mr(4)
7552 .nr(8)
7553 .kr(1)
7554 .sr(1)
7555 .m(4)
7556 .n(8)
7557 .k(2)
7558 .qmin(128)
7559 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7560 }
7561
7562 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, qmax) {
7563 TEST_REQUIRES_ARM_NEON;
7564 GemmMicrokernelTester()
7565 .mr(4)
7566 .nr(8)
7567 .kr(1)
7568 .sr(1)
7569 .m(4)
7570 .n(8)
7571 .k(2)
7572 .qmax(128)
7573 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7574 }
7575
7576 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD64, strided_cm) {
7577 TEST_REQUIRES_ARM_NEON;
7578 GemmMicrokernelTester()
7579 .mr(4)
7580 .nr(8)
7581 .kr(1)
7582 .sr(1)
7583 .m(4)
7584 .n(8)
7585 .k(2)
7586 .cm_stride(11)
7587 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
7588 }
7589#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7590
7591
7592#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7593 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4) {
7594 TEST_REQUIRES_ARM_NEON;
7595 GemmMicrokernelTester()
7596 .mr(4)
7597 .nr(8)
7598 .kr(1)
7599 .sr(1)
7600 .m(4)
7601 .n(8)
7602 .k(4)
7603 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7604 }
7605
7606 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, strided_cn) {
7607 TEST_REQUIRES_ARM_NEON;
7608 GemmMicrokernelTester()
7609 .mr(4)
7610 .nr(8)
7611 .kr(1)
7612 .sr(1)
7613 .m(4)
7614 .n(8)
7615 .k(4)
7616 .cn_stride(11)
7617 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7618 }
7619
7620 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_strided_a) {
7621 TEST_REQUIRES_ARM_NEON;
7622 GemmMicrokernelTester()
7623 .mr(4)
7624 .nr(8)
7625 .kr(1)
7626 .sr(1)
7627 .m(4)
7628 .n(8)
7629 .k(4)
7630 .a_stride(7)
7631 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7632 }
7633
7634 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile) {
7635 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007636 for (uint32_t n = 1; n <= 8; n++) {
7637 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007638 GemmMicrokernelTester()
7639 .mr(4)
7640 .nr(8)
7641 .kr(1)
7642 .sr(1)
7643 .m(m)
7644 .n(n)
7645 .k(4)
7646 .iterations(1)
7647 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7648 }
7649 }
7650 }
7651
7652 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_m) {
7653 TEST_REQUIRES_ARM_NEON;
7654 for (uint32_t m = 1; m <= 4; m++) {
7655 GemmMicrokernelTester()
7656 .mr(4)
7657 .nr(8)
7658 .kr(1)
7659 .sr(1)
7660 .m(m)
7661 .n(8)
7662 .k(4)
7663 .iterations(1)
7664 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7665 }
7666 }
7667
7668 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_n) {
7669 TEST_REQUIRES_ARM_NEON;
7670 for (uint32_t n = 1; n <= 8; n++) {
7671 GemmMicrokernelTester()
7672 .mr(4)
7673 .nr(8)
7674 .kr(1)
7675 .sr(1)
7676 .m(4)
7677 .n(n)
7678 .k(4)
7679 .iterations(1)
7680 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7681 }
7682 }
7683
7684 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_lt_4) {
7685 TEST_REQUIRES_ARM_NEON;
7686 for (size_t k = 1; k < 4; k++) {
7687 GemmMicrokernelTester()
7688 .mr(4)
7689 .nr(8)
7690 .kr(1)
7691 .sr(1)
7692 .m(4)
7693 .n(8)
7694 .k(k)
7695 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7696 }
7697 }
7698
7699 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_lt_4_strided_a) {
7700 TEST_REQUIRES_ARM_NEON;
7701 for (size_t k = 1; k < 4; k++) {
7702 GemmMicrokernelTester()
7703 .mr(4)
7704 .nr(8)
7705 .kr(1)
7706 .sr(1)
7707 .m(4)
7708 .n(8)
7709 .k(k)
7710 .a_stride(7)
7711 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7712 }
7713 }
7714
7715 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_lt_4_subtile) {
7716 TEST_REQUIRES_ARM_NEON;
7717 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007718 for (uint32_t n = 1; n <= 8; n++) {
7719 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007720 GemmMicrokernelTester()
7721 .mr(4)
7722 .nr(8)
7723 .kr(1)
7724 .sr(1)
7725 .m(m)
7726 .n(n)
7727 .k(k)
7728 .iterations(1)
7729 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7730 }
7731 }
7732 }
7733 }
7734
7735 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_gt_4) {
7736 TEST_REQUIRES_ARM_NEON;
7737 for (size_t k = 5; k < 8; k++) {
7738 GemmMicrokernelTester()
7739 .mr(4)
7740 .nr(8)
7741 .kr(1)
7742 .sr(1)
7743 .m(4)
7744 .n(8)
7745 .k(k)
7746 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7747 }
7748 }
7749
7750 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_gt_4_strided_a) {
7751 TEST_REQUIRES_ARM_NEON;
7752 for (size_t k = 5; k < 8; k++) {
7753 GemmMicrokernelTester()
7754 .mr(4)
7755 .nr(8)
7756 .kr(1)
7757 .sr(1)
7758 .m(4)
7759 .n(8)
7760 .k(k)
7761 .a_stride(11)
7762 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7763 }
7764 }
7765
7766 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_gt_4_subtile) {
7767 TEST_REQUIRES_ARM_NEON;
7768 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007769 for (uint32_t n = 1; n <= 8; n++) {
7770 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007771 GemmMicrokernelTester()
7772 .mr(4)
7773 .nr(8)
7774 .kr(1)
7775 .sr(1)
7776 .m(m)
7777 .n(n)
7778 .k(k)
7779 .iterations(1)
7780 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7781 }
7782 }
7783 }
7784 }
7785
7786 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_div_4) {
7787 TEST_REQUIRES_ARM_NEON;
7788 for (size_t k = 8; k <= 40; k += 4) {
7789 GemmMicrokernelTester()
7790 .mr(4)
7791 .nr(8)
7792 .kr(1)
7793 .sr(1)
7794 .m(4)
7795 .n(8)
7796 .k(k)
7797 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7798 }
7799 }
7800
7801 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_div_4_strided_a) {
7802 TEST_REQUIRES_ARM_NEON;
7803 for (size_t k = 8; k <= 40; k += 4) {
7804 GemmMicrokernelTester()
7805 .mr(4)
7806 .nr(8)
7807 .kr(1)
7808 .sr(1)
7809 .m(4)
7810 .n(8)
7811 .k(k)
7812 .a_stride(43)
7813 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7814 }
7815 }
7816
7817 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, k_div_4_subtile) {
7818 TEST_REQUIRES_ARM_NEON;
7819 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007820 for (uint32_t n = 1; n <= 8; n++) {
7821 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007822 GemmMicrokernelTester()
7823 .mr(4)
7824 .nr(8)
7825 .kr(1)
7826 .sr(1)
7827 .m(m)
7828 .n(n)
7829 .k(k)
7830 .iterations(1)
7831 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7832 }
7833 }
7834 }
7835 }
7836
7837 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_gt_8) {
7838 TEST_REQUIRES_ARM_NEON;
7839 for (uint32_t n = 9; n < 16; n++) {
7840 for (size_t k = 1; k <= 20; k += 5) {
7841 GemmMicrokernelTester()
7842 .mr(4)
7843 .nr(8)
7844 .kr(1)
7845 .sr(1)
7846 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007847 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007848 .k(k)
7849 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7850 }
7851 }
7852 }
7853
7854 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_strided_cn) {
7855 TEST_REQUIRES_ARM_NEON;
7856 for (uint32_t n = 9; n < 16; n++) {
7857 for (size_t k = 1; k <= 20; k += 5) {
7858 GemmMicrokernelTester()
7859 .mr(4)
7860 .nr(8)
7861 .kr(1)
7862 .sr(1)
7863 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007864 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007865 .k(k)
7866 .cn_stride(11)
7867 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7868 }
7869 }
7870 }
7871
7872 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_strided_a) {
7873 TEST_REQUIRES_ARM_NEON;
7874 for (uint32_t n = 9; n < 16; n++) {
7875 for (size_t k = 1; k <= 20; k += 5) {
7876 GemmMicrokernelTester()
7877 .mr(4)
7878 .nr(8)
7879 .kr(1)
7880 .sr(1)
7881 .m(4)
7882 .n(n)
7883 .k(k)
7884 .a_stride(23)
7885 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7886 }
7887 }
7888 }
7889
7890 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_subtile) {
7891 TEST_REQUIRES_ARM_NEON;
7892 for (uint32_t n = 9; n < 16; n++) {
7893 for (size_t k = 1; k <= 20; k += 5) {
7894 for (uint32_t m = 1; m <= 4; m++) {
7895 GemmMicrokernelTester()
7896 .mr(4)
7897 .nr(8)
7898 .kr(1)
7899 .sr(1)
7900 .m(m)
7901 .n(n)
7902 .k(k)
7903 .iterations(1)
7904 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7905 }
7906 }
7907 }
7908 }
7909
7910 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_div_8) {
7911 TEST_REQUIRES_ARM_NEON;
7912 for (uint32_t n = 16; n <= 24; n += 8) {
7913 for (size_t k = 1; k <= 20; k += 5) {
7914 GemmMicrokernelTester()
7915 .mr(4)
7916 .nr(8)
7917 .kr(1)
7918 .sr(1)
7919 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007920 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007921 .k(k)
7922 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7923 }
7924 }
7925 }
7926
7927 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_div_8_strided_cn) {
7928 TEST_REQUIRES_ARM_NEON;
7929 for (uint32_t n = 16; n <= 24; n += 8) {
7930 for (size_t k = 1; k <= 20; k += 5) {
7931 GemmMicrokernelTester()
7932 .mr(4)
7933 .nr(8)
7934 .kr(1)
7935 .sr(1)
7936 .m(4)
7937 .n(n)
7938 .k(k)
7939 .cn_stride(11)
7940 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7941 }
7942 }
7943 }
7944
7945 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_div_8_strided_a) {
7946 TEST_REQUIRES_ARM_NEON;
7947 for (uint32_t n = 16; n <= 24; n += 8) {
7948 for (size_t k = 1; k <= 20; k += 5) {
7949 GemmMicrokernelTester()
7950 .mr(4)
7951 .nr(8)
7952 .kr(1)
7953 .sr(1)
7954 .m(4)
7955 .n(n)
7956 .k(k)
7957 .a_stride(23)
7958 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7959 }
7960 }
7961 }
7962
7963 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, n_div_8_subtile) {
7964 TEST_REQUIRES_ARM_NEON;
7965 for (uint32_t n = 16; n <= 24; n += 8) {
7966 for (size_t k = 1; k <= 20; k += 5) {
7967 for (uint32_t m = 1; m <= 4; m++) {
7968 GemmMicrokernelTester()
7969 .mr(4)
7970 .nr(8)
7971 .kr(1)
7972 .sr(1)
7973 .m(m)
7974 .n(n)
7975 .k(k)
7976 .iterations(1)
7977 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7978 }
7979 }
7980 }
7981 }
7982
7983 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, strided_cm_subtile) {
7984 TEST_REQUIRES_ARM_NEON;
7985 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007986 for (uint32_t n = 1; n <= 8; n++) {
7987 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007988 GemmMicrokernelTester()
7989 .mr(4)
7990 .nr(8)
7991 .kr(1)
7992 .sr(1)
7993 .m(m)
7994 .n(n)
7995 .k(k)
7996 .cm_stride(11)
7997 .iterations(1)
7998 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
7999 }
8000 }
8001 }
8002 }
8003
8004 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, qmin) {
8005 TEST_REQUIRES_ARM_NEON;
8006 GemmMicrokernelTester()
8007 .mr(4)
8008 .nr(8)
8009 .kr(1)
8010 .sr(1)
8011 .m(4)
8012 .n(8)
8013 .k(4)
8014 .qmin(128)
8015 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
8016 }
8017
8018 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, qmax) {
8019 TEST_REQUIRES_ARM_NEON;
8020 GemmMicrokernelTester()
8021 .mr(4)
8022 .nr(8)
8023 .kr(1)
8024 .sr(1)
8025 .m(4)
8026 .n(8)
8027 .k(4)
8028 .qmax(128)
8029 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
8030 }
8031
8032 TEST(F32_GEMMINC_MINMAX_4X8__NEON_DUP_LD128, strided_cm) {
8033 TEST_REQUIRES_ARM_NEON;
8034 GemmMicrokernelTester()
8035 .mr(4)
8036 .nr(8)
8037 .kr(1)
8038 .sr(1)
8039 .m(4)
8040 .n(8)
8041 .k(4)
8042 .cm_stride(11)
8043 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
8044 }
8045#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8046
8047
8048#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8049 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2) {
8050 TEST_REQUIRES_ARM_NEON_FMA;
8051 GemmMicrokernelTester()
8052 .mr(1)
8053 .nr(8)
8054 .kr(1)
8055 .sr(1)
8056 .m(1)
8057 .n(8)
8058 .k(2)
8059 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8060 }
8061
8062 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cn) {
8063 TEST_REQUIRES_ARM_NEON_FMA;
8064 GemmMicrokernelTester()
8065 .mr(1)
8066 .nr(8)
8067 .kr(1)
8068 .sr(1)
8069 .m(1)
8070 .n(8)
8071 .k(2)
8072 .cn_stride(11)
8073 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8074 }
8075
8076 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_strided_a) {
8077 TEST_REQUIRES_ARM_NEON_FMA;
8078 GemmMicrokernelTester()
8079 .mr(1)
8080 .nr(8)
8081 .kr(1)
8082 .sr(1)
8083 .m(1)
8084 .n(8)
8085 .k(2)
8086 .a_stride(5)
8087 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8088 }
8089
8090 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
8091 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008092 for (uint32_t n = 1; n <= 8; n++) {
8093 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008094 GemmMicrokernelTester()
8095 .mr(1)
8096 .nr(8)
8097 .kr(1)
8098 .sr(1)
8099 .m(m)
8100 .n(n)
8101 .k(2)
8102 .iterations(1)
8103 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8104 }
8105 }
8106 }
8107
8108 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
8109 TEST_REQUIRES_ARM_NEON_FMA;
8110 for (uint32_t m = 1; m <= 1; m++) {
8111 GemmMicrokernelTester()
8112 .mr(1)
8113 .nr(8)
8114 .kr(1)
8115 .sr(1)
8116 .m(m)
8117 .n(8)
8118 .k(2)
8119 .iterations(1)
8120 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8121 }
8122 }
8123
8124 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
8125 TEST_REQUIRES_ARM_NEON_FMA;
8126 for (uint32_t n = 1; n <= 8; n++) {
8127 GemmMicrokernelTester()
8128 .mr(1)
8129 .nr(8)
8130 .kr(1)
8131 .sr(1)
8132 .m(1)
8133 .n(n)
8134 .k(2)
8135 .iterations(1)
8136 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8137 }
8138 }
8139
8140 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2) {
8141 TEST_REQUIRES_ARM_NEON_FMA;
8142 for (size_t k = 1; k < 2; k++) {
8143 GemmMicrokernelTester()
8144 .mr(1)
8145 .nr(8)
8146 .kr(1)
8147 .sr(1)
8148 .m(1)
8149 .n(8)
8150 .k(k)
8151 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8152 }
8153 }
8154
8155 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2_strided_a) {
8156 TEST_REQUIRES_ARM_NEON_FMA;
8157 for (size_t k = 1; k < 2; k++) {
8158 GemmMicrokernelTester()
8159 .mr(1)
8160 .nr(8)
8161 .kr(1)
8162 .sr(1)
8163 .m(1)
8164 .n(8)
8165 .k(k)
8166 .a_stride(5)
8167 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8168 }
8169 }
8170
8171 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
8172 TEST_REQUIRES_ARM_NEON_FMA;
8173 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008174 for (uint32_t n = 1; n <= 8; n++) {
8175 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008176 GemmMicrokernelTester()
8177 .mr(1)
8178 .nr(8)
8179 .kr(1)
8180 .sr(1)
8181 .m(m)
8182 .n(n)
8183 .k(k)
8184 .iterations(1)
8185 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8186 }
8187 }
8188 }
8189 }
8190
8191 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2) {
8192 TEST_REQUIRES_ARM_NEON_FMA;
8193 for (size_t k = 3; k < 4; k++) {
8194 GemmMicrokernelTester()
8195 .mr(1)
8196 .nr(8)
8197 .kr(1)
8198 .sr(1)
8199 .m(1)
8200 .n(8)
8201 .k(k)
8202 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8203 }
8204 }
8205
8206 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2_strided_a) {
8207 TEST_REQUIRES_ARM_NEON_FMA;
8208 for (size_t k = 3; k < 4; k++) {
8209 GemmMicrokernelTester()
8210 .mr(1)
8211 .nr(8)
8212 .kr(1)
8213 .sr(1)
8214 .m(1)
8215 .n(8)
8216 .k(k)
8217 .a_stride(7)
8218 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8219 }
8220 }
8221
8222 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
8223 TEST_REQUIRES_ARM_NEON_FMA;
8224 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008225 for (uint32_t n = 1; n <= 8; n++) {
8226 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008227 GemmMicrokernelTester()
8228 .mr(1)
8229 .nr(8)
8230 .kr(1)
8231 .sr(1)
8232 .m(m)
8233 .n(n)
8234 .k(k)
8235 .iterations(1)
8236 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8237 }
8238 }
8239 }
8240 }
8241
8242 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2) {
8243 TEST_REQUIRES_ARM_NEON_FMA;
8244 for (size_t k = 4; k <= 20; k += 2) {
8245 GemmMicrokernelTester()
8246 .mr(1)
8247 .nr(8)
8248 .kr(1)
8249 .sr(1)
8250 .m(1)
8251 .n(8)
8252 .k(k)
8253 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8254 }
8255 }
8256
8257 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2_strided_a) {
8258 TEST_REQUIRES_ARM_NEON_FMA;
8259 for (size_t k = 4; k <= 20; k += 2) {
8260 GemmMicrokernelTester()
8261 .mr(1)
8262 .nr(8)
8263 .kr(1)
8264 .sr(1)
8265 .m(1)
8266 .n(8)
8267 .k(k)
8268 .a_stride(23)
8269 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8270 }
8271 }
8272
8273 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
8274 TEST_REQUIRES_ARM_NEON_FMA;
8275 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008276 for (uint32_t n = 1; n <= 8; n++) {
8277 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008278 GemmMicrokernelTester()
8279 .mr(1)
8280 .nr(8)
8281 .kr(1)
8282 .sr(1)
8283 .m(m)
8284 .n(n)
8285 .k(k)
8286 .iterations(1)
8287 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8288 }
8289 }
8290 }
8291 }
8292
8293 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8) {
8294 TEST_REQUIRES_ARM_NEON_FMA;
8295 for (uint32_t n = 9; n < 16; n++) {
8296 for (size_t k = 1; k <= 10; k += 3) {
8297 GemmMicrokernelTester()
8298 .mr(1)
8299 .nr(8)
8300 .kr(1)
8301 .sr(1)
8302 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008303 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008304 .k(k)
8305 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8306 }
8307 }
8308 }
8309
8310 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
8311 TEST_REQUIRES_ARM_NEON_FMA;
8312 for (uint32_t n = 9; n < 16; n++) {
8313 for (size_t k = 1; k <= 10; k += 3) {
8314 GemmMicrokernelTester()
8315 .mr(1)
8316 .nr(8)
8317 .kr(1)
8318 .sr(1)
8319 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008320 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008321 .k(k)
8322 .cn_stride(11)
8323 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8324 }
8325 }
8326 }
8327
8328 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_strided_a) {
8329 TEST_REQUIRES_ARM_NEON_FMA;
8330 for (uint32_t n = 9; n < 16; n++) {
8331 for (size_t k = 1; k <= 10; k += 3) {
8332 GemmMicrokernelTester()
8333 .mr(1)
8334 .nr(8)
8335 .kr(1)
8336 .sr(1)
8337 .m(1)
8338 .n(n)
8339 .k(k)
8340 .a_stride(13)
8341 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8342 }
8343 }
8344 }
8345
8346 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
8347 TEST_REQUIRES_ARM_NEON_FMA;
8348 for (uint32_t n = 9; n < 16; n++) {
8349 for (size_t k = 1; k <= 10; k += 3) {
8350 for (uint32_t m = 1; m <= 1; m++) {
8351 GemmMicrokernelTester()
8352 .mr(1)
8353 .nr(8)
8354 .kr(1)
8355 .sr(1)
8356 .m(m)
8357 .n(n)
8358 .k(k)
8359 .iterations(1)
8360 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8361 }
8362 }
8363 }
8364 }
8365
8366 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8) {
8367 TEST_REQUIRES_ARM_NEON_FMA;
8368 for (uint32_t n = 16; n <= 24; n += 8) {
8369 for (size_t k = 1; k <= 10; k += 3) {
8370 GemmMicrokernelTester()
8371 .mr(1)
8372 .nr(8)
8373 .kr(1)
8374 .sr(1)
8375 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008376 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008377 .k(k)
8378 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8379 }
8380 }
8381 }
8382
8383 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
8384 TEST_REQUIRES_ARM_NEON_FMA;
8385 for (uint32_t n = 16; n <= 24; n += 8) {
8386 for (size_t k = 1; k <= 10; k += 3) {
8387 GemmMicrokernelTester()
8388 .mr(1)
8389 .nr(8)
8390 .kr(1)
8391 .sr(1)
8392 .m(1)
8393 .n(n)
8394 .k(k)
8395 .cn_stride(11)
8396 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8397 }
8398 }
8399 }
8400
8401 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_strided_a) {
8402 TEST_REQUIRES_ARM_NEON_FMA;
8403 for (uint32_t n = 16; n <= 24; n += 8) {
8404 for (size_t k = 1; k <= 10; k += 3) {
8405 GemmMicrokernelTester()
8406 .mr(1)
8407 .nr(8)
8408 .kr(1)
8409 .sr(1)
8410 .m(1)
8411 .n(n)
8412 .k(k)
8413 .a_stride(13)
8414 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8415 }
8416 }
8417 }
8418
8419 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
8420 TEST_REQUIRES_ARM_NEON_FMA;
8421 for (uint32_t n = 16; n <= 24; n += 8) {
8422 for (size_t k = 1; k <= 10; k += 3) {
8423 for (uint32_t m = 1; m <= 1; m++) {
8424 GemmMicrokernelTester()
8425 .mr(1)
8426 .nr(8)
8427 .kr(1)
8428 .sr(1)
8429 .m(m)
8430 .n(n)
8431 .k(k)
8432 .iterations(1)
8433 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8434 }
8435 }
8436 }
8437 }
8438
8439 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
8440 TEST_REQUIRES_ARM_NEON_FMA;
8441 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008442 for (uint32_t n = 1; n <= 8; n++) {
8443 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008444 GemmMicrokernelTester()
8445 .mr(1)
8446 .nr(8)
8447 .kr(1)
8448 .sr(1)
8449 .m(m)
8450 .n(n)
8451 .k(k)
8452 .cm_stride(11)
8453 .iterations(1)
8454 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8455 }
8456 }
8457 }
8458 }
8459
8460 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, qmin) {
8461 TEST_REQUIRES_ARM_NEON_FMA;
8462 GemmMicrokernelTester()
8463 .mr(1)
8464 .nr(8)
8465 .kr(1)
8466 .sr(1)
8467 .m(1)
8468 .n(8)
8469 .k(2)
8470 .qmin(128)
8471 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8472 }
8473
8474 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, qmax) {
8475 TEST_REQUIRES_ARM_NEON_FMA;
8476 GemmMicrokernelTester()
8477 .mr(1)
8478 .nr(8)
8479 .kr(1)
8480 .sr(1)
8481 .m(1)
8482 .n(8)
8483 .k(2)
8484 .qmax(128)
8485 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8486 }
8487
8488 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cm) {
8489 TEST_REQUIRES_ARM_NEON_FMA;
8490 GemmMicrokernelTester()
8491 .mr(1)
8492 .nr(8)
8493 .kr(1)
8494 .sr(1)
8495 .m(1)
8496 .n(8)
8497 .k(2)
8498 .cm_stride(11)
8499 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8500 }
8501#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8502
8503
8504#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8505 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2) {
8506 TEST_REQUIRES_ARM_NEON_FMA;
8507 GemmMicrokernelTester()
8508 .mr(6)
8509 .nr(8)
8510 .kr(1)
8511 .sr(1)
8512 .m(6)
8513 .n(8)
8514 .k(2)
8515 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8516 }
8517
8518 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cn) {
8519 TEST_REQUIRES_ARM_NEON_FMA;
8520 GemmMicrokernelTester()
8521 .mr(6)
8522 .nr(8)
8523 .kr(1)
8524 .sr(1)
8525 .m(6)
8526 .n(8)
8527 .k(2)
8528 .cn_stride(11)
8529 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8530 }
8531
8532 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_strided_a) {
8533 TEST_REQUIRES_ARM_NEON_FMA;
8534 GemmMicrokernelTester()
8535 .mr(6)
8536 .nr(8)
8537 .kr(1)
8538 .sr(1)
8539 .m(6)
8540 .n(8)
8541 .k(2)
8542 .a_stride(5)
8543 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8544 }
8545
8546 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
8547 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008548 for (uint32_t n = 1; n <= 8; n++) {
8549 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008550 GemmMicrokernelTester()
8551 .mr(6)
8552 .nr(8)
8553 .kr(1)
8554 .sr(1)
8555 .m(m)
8556 .n(n)
8557 .k(2)
8558 .iterations(1)
8559 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8560 }
8561 }
8562 }
8563
8564 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
8565 TEST_REQUIRES_ARM_NEON_FMA;
8566 for (uint32_t m = 1; m <= 6; m++) {
8567 GemmMicrokernelTester()
8568 .mr(6)
8569 .nr(8)
8570 .kr(1)
8571 .sr(1)
8572 .m(m)
8573 .n(8)
8574 .k(2)
8575 .iterations(1)
8576 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8577 }
8578 }
8579
8580 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
8581 TEST_REQUIRES_ARM_NEON_FMA;
8582 for (uint32_t n = 1; n <= 8; n++) {
8583 GemmMicrokernelTester()
8584 .mr(6)
8585 .nr(8)
8586 .kr(1)
8587 .sr(1)
8588 .m(6)
8589 .n(n)
8590 .k(2)
8591 .iterations(1)
8592 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8593 }
8594 }
8595
8596 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2) {
8597 TEST_REQUIRES_ARM_NEON_FMA;
8598 for (size_t k = 1; k < 2; k++) {
8599 GemmMicrokernelTester()
8600 .mr(6)
8601 .nr(8)
8602 .kr(1)
8603 .sr(1)
8604 .m(6)
8605 .n(8)
8606 .k(k)
8607 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8608 }
8609 }
8610
8611 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2_strided_a) {
8612 TEST_REQUIRES_ARM_NEON_FMA;
8613 for (size_t k = 1; k < 2; k++) {
8614 GemmMicrokernelTester()
8615 .mr(6)
8616 .nr(8)
8617 .kr(1)
8618 .sr(1)
8619 .m(6)
8620 .n(8)
8621 .k(k)
8622 .a_stride(5)
8623 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8624 }
8625 }
8626
8627 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
8628 TEST_REQUIRES_ARM_NEON_FMA;
8629 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008630 for (uint32_t n = 1; n <= 8; n++) {
8631 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008632 GemmMicrokernelTester()
8633 .mr(6)
8634 .nr(8)
8635 .kr(1)
8636 .sr(1)
8637 .m(m)
8638 .n(n)
8639 .k(k)
8640 .iterations(1)
8641 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8642 }
8643 }
8644 }
8645 }
8646
8647 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2) {
8648 TEST_REQUIRES_ARM_NEON_FMA;
8649 for (size_t k = 3; k < 4; k++) {
8650 GemmMicrokernelTester()
8651 .mr(6)
8652 .nr(8)
8653 .kr(1)
8654 .sr(1)
8655 .m(6)
8656 .n(8)
8657 .k(k)
8658 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8659 }
8660 }
8661
8662 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2_strided_a) {
8663 TEST_REQUIRES_ARM_NEON_FMA;
8664 for (size_t k = 3; k < 4; k++) {
8665 GemmMicrokernelTester()
8666 .mr(6)
8667 .nr(8)
8668 .kr(1)
8669 .sr(1)
8670 .m(6)
8671 .n(8)
8672 .k(k)
8673 .a_stride(7)
8674 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8675 }
8676 }
8677
8678 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
8679 TEST_REQUIRES_ARM_NEON_FMA;
8680 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008681 for (uint32_t n = 1; n <= 8; n++) {
8682 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008683 GemmMicrokernelTester()
8684 .mr(6)
8685 .nr(8)
8686 .kr(1)
8687 .sr(1)
8688 .m(m)
8689 .n(n)
8690 .k(k)
8691 .iterations(1)
8692 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8693 }
8694 }
8695 }
8696 }
8697
8698 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2) {
8699 TEST_REQUIRES_ARM_NEON_FMA;
8700 for (size_t k = 4; k <= 20; k += 2) {
8701 GemmMicrokernelTester()
8702 .mr(6)
8703 .nr(8)
8704 .kr(1)
8705 .sr(1)
8706 .m(6)
8707 .n(8)
8708 .k(k)
8709 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8710 }
8711 }
8712
8713 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2_strided_a) {
8714 TEST_REQUIRES_ARM_NEON_FMA;
8715 for (size_t k = 4; k <= 20; k += 2) {
8716 GemmMicrokernelTester()
8717 .mr(6)
8718 .nr(8)
8719 .kr(1)
8720 .sr(1)
8721 .m(6)
8722 .n(8)
8723 .k(k)
8724 .a_stride(23)
8725 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8726 }
8727 }
8728
8729 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
8730 TEST_REQUIRES_ARM_NEON_FMA;
8731 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008732 for (uint32_t n = 1; n <= 8; n++) {
8733 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008734 GemmMicrokernelTester()
8735 .mr(6)
8736 .nr(8)
8737 .kr(1)
8738 .sr(1)
8739 .m(m)
8740 .n(n)
8741 .k(k)
8742 .iterations(1)
8743 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8744 }
8745 }
8746 }
8747 }
8748
8749 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8) {
8750 TEST_REQUIRES_ARM_NEON_FMA;
8751 for (uint32_t n = 9; n < 16; n++) {
8752 for (size_t k = 1; k <= 10; k += 3) {
8753 GemmMicrokernelTester()
8754 .mr(6)
8755 .nr(8)
8756 .kr(1)
8757 .sr(1)
8758 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008759 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008760 .k(k)
8761 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8762 }
8763 }
8764 }
8765
8766 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
8767 TEST_REQUIRES_ARM_NEON_FMA;
8768 for (uint32_t n = 9; n < 16; n++) {
8769 for (size_t k = 1; k <= 10; k += 3) {
8770 GemmMicrokernelTester()
8771 .mr(6)
8772 .nr(8)
8773 .kr(1)
8774 .sr(1)
8775 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008776 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008777 .k(k)
8778 .cn_stride(11)
8779 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8780 }
8781 }
8782 }
8783
8784 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_strided_a) {
8785 TEST_REQUIRES_ARM_NEON_FMA;
8786 for (uint32_t n = 9; n < 16; n++) {
8787 for (size_t k = 1; k <= 10; k += 3) {
8788 GemmMicrokernelTester()
8789 .mr(6)
8790 .nr(8)
8791 .kr(1)
8792 .sr(1)
8793 .m(6)
8794 .n(n)
8795 .k(k)
8796 .a_stride(13)
8797 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8798 }
8799 }
8800 }
8801
8802 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
8803 TEST_REQUIRES_ARM_NEON_FMA;
8804 for (uint32_t n = 9; n < 16; n++) {
8805 for (size_t k = 1; k <= 10; k += 3) {
8806 for (uint32_t m = 1; m <= 6; m++) {
8807 GemmMicrokernelTester()
8808 .mr(6)
8809 .nr(8)
8810 .kr(1)
8811 .sr(1)
8812 .m(m)
8813 .n(n)
8814 .k(k)
8815 .iterations(1)
8816 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8817 }
8818 }
8819 }
8820 }
8821
8822 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8) {
8823 TEST_REQUIRES_ARM_NEON_FMA;
8824 for (uint32_t n = 16; n <= 24; n += 8) {
8825 for (size_t k = 1; k <= 10; k += 3) {
8826 GemmMicrokernelTester()
8827 .mr(6)
8828 .nr(8)
8829 .kr(1)
8830 .sr(1)
8831 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008832 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008833 .k(k)
8834 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8835 }
8836 }
8837 }
8838
8839 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
8840 TEST_REQUIRES_ARM_NEON_FMA;
8841 for (uint32_t n = 16; n <= 24; n += 8) {
8842 for (size_t k = 1; k <= 10; k += 3) {
8843 GemmMicrokernelTester()
8844 .mr(6)
8845 .nr(8)
8846 .kr(1)
8847 .sr(1)
8848 .m(6)
8849 .n(n)
8850 .k(k)
8851 .cn_stride(11)
8852 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8853 }
8854 }
8855 }
8856
8857 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_strided_a) {
8858 TEST_REQUIRES_ARM_NEON_FMA;
8859 for (uint32_t n = 16; n <= 24; n += 8) {
8860 for (size_t k = 1; k <= 10; k += 3) {
8861 GemmMicrokernelTester()
8862 .mr(6)
8863 .nr(8)
8864 .kr(1)
8865 .sr(1)
8866 .m(6)
8867 .n(n)
8868 .k(k)
8869 .a_stride(13)
8870 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8871 }
8872 }
8873 }
8874
8875 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
8876 TEST_REQUIRES_ARM_NEON_FMA;
8877 for (uint32_t n = 16; n <= 24; n += 8) {
8878 for (size_t k = 1; k <= 10; k += 3) {
8879 for (uint32_t m = 1; m <= 6; m++) {
8880 GemmMicrokernelTester()
8881 .mr(6)
8882 .nr(8)
8883 .kr(1)
8884 .sr(1)
8885 .m(m)
8886 .n(n)
8887 .k(k)
8888 .iterations(1)
8889 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8890 }
8891 }
8892 }
8893 }
8894
8895 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
8896 TEST_REQUIRES_ARM_NEON_FMA;
8897 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008898 for (uint32_t n = 1; n <= 8; n++) {
8899 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008900 GemmMicrokernelTester()
8901 .mr(6)
8902 .nr(8)
8903 .kr(1)
8904 .sr(1)
8905 .m(m)
8906 .n(n)
8907 .k(k)
8908 .cm_stride(11)
8909 .iterations(1)
8910 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8911 }
8912 }
8913 }
8914 }
8915
8916 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, qmin) {
8917 TEST_REQUIRES_ARM_NEON_FMA;
8918 GemmMicrokernelTester()
8919 .mr(6)
8920 .nr(8)
8921 .kr(1)
8922 .sr(1)
8923 .m(6)
8924 .n(8)
8925 .k(2)
8926 .qmin(128)
8927 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8928 }
8929
8930 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, qmax) {
8931 TEST_REQUIRES_ARM_NEON_FMA;
8932 GemmMicrokernelTester()
8933 .mr(6)
8934 .nr(8)
8935 .kr(1)
8936 .sr(1)
8937 .m(6)
8938 .n(8)
8939 .k(2)
8940 .qmax(128)
8941 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8942 }
8943
8944 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cm) {
8945 TEST_REQUIRES_ARM_NEON_FMA;
8946 GemmMicrokernelTester()
8947 .mr(6)
8948 .nr(8)
8949 .kr(1)
8950 .sr(1)
8951 .m(6)
8952 .n(8)
8953 .k(2)
8954 .cm_stride(11)
8955 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8956 }
8957#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8958
8959
8960#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8961 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4) {
8962 TEST_REQUIRES_ARM_NEON;
8963 GemmMicrokernelTester()
8964 .mr(6)
8965 .nr(8)
8966 .kr(1)
8967 .sr(4)
8968 .m(6)
8969 .n(8)
8970 .k(4)
8971 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
8972 }
8973
8974 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, strided_cn) {
8975 TEST_REQUIRES_ARM_NEON;
8976 GemmMicrokernelTester()
8977 .mr(6)
8978 .nr(8)
8979 .kr(1)
8980 .sr(4)
8981 .m(6)
8982 .n(8)
8983 .k(4)
8984 .cn_stride(11)
8985 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
8986 }
8987
8988 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4_strided_a) {
8989 TEST_REQUIRES_ARM_NEON;
8990 GemmMicrokernelTester()
8991 .mr(6)
8992 .nr(8)
8993 .kr(1)
8994 .sr(4)
8995 .m(6)
8996 .n(8)
8997 .k(4)
8998 .a_stride(7)
8999 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9000 }
9001
9002 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4_subtile) {
9003 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009004 for (uint32_t n = 1; n <= 8; n++) {
9005 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009006 GemmMicrokernelTester()
9007 .mr(6)
9008 .nr(8)
9009 .kr(1)
9010 .sr(4)
9011 .m(m)
9012 .n(n)
9013 .k(4)
9014 .iterations(1)
9015 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9016 }
9017 }
9018 }
9019
9020 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4_subtile_m) {
9021 TEST_REQUIRES_ARM_NEON;
9022 for (uint32_t m = 1; m <= 6; m++) {
9023 GemmMicrokernelTester()
9024 .mr(6)
9025 .nr(8)
9026 .kr(1)
9027 .sr(4)
9028 .m(m)
9029 .n(8)
9030 .k(4)
9031 .iterations(1)
9032 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9033 }
9034 }
9035
9036 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_eq_4_subtile_n) {
9037 TEST_REQUIRES_ARM_NEON;
9038 for (uint32_t n = 1; n <= 8; n++) {
9039 GemmMicrokernelTester()
9040 .mr(6)
9041 .nr(8)
9042 .kr(1)
9043 .sr(4)
9044 .m(6)
9045 .n(n)
9046 .k(4)
9047 .iterations(1)
9048 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9049 }
9050 }
9051
9052 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_lt_4) {
9053 TEST_REQUIRES_ARM_NEON;
9054 for (size_t k = 1; k < 4; k++) {
9055 GemmMicrokernelTester()
9056 .mr(6)
9057 .nr(8)
9058 .kr(1)
9059 .sr(4)
9060 .m(6)
9061 .n(8)
9062 .k(k)
9063 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9064 }
9065 }
9066
9067 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_lt_4_strided_a) {
9068 TEST_REQUIRES_ARM_NEON;
9069 for (size_t k = 1; k < 4; k++) {
9070 GemmMicrokernelTester()
9071 .mr(6)
9072 .nr(8)
9073 .kr(1)
9074 .sr(4)
9075 .m(6)
9076 .n(8)
9077 .k(k)
9078 .a_stride(7)
9079 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9080 }
9081 }
9082
9083 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_lt_4_subtile) {
9084 TEST_REQUIRES_ARM_NEON;
9085 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009086 for (uint32_t n = 1; n <= 8; n++) {
9087 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009088 GemmMicrokernelTester()
9089 .mr(6)
9090 .nr(8)
9091 .kr(1)
9092 .sr(4)
9093 .m(m)
9094 .n(n)
9095 .k(k)
9096 .iterations(1)
9097 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9098 }
9099 }
9100 }
9101 }
9102
9103 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_gt_4) {
9104 TEST_REQUIRES_ARM_NEON;
9105 for (size_t k = 5; k < 8; k++) {
9106 GemmMicrokernelTester()
9107 .mr(6)
9108 .nr(8)
9109 .kr(1)
9110 .sr(4)
9111 .m(6)
9112 .n(8)
9113 .k(k)
9114 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9115 }
9116 }
9117
9118 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_gt_4_strided_a) {
9119 TEST_REQUIRES_ARM_NEON;
9120 for (size_t k = 5; k < 8; k++) {
9121 GemmMicrokernelTester()
9122 .mr(6)
9123 .nr(8)
9124 .kr(1)
9125 .sr(4)
9126 .m(6)
9127 .n(8)
9128 .k(k)
9129 .a_stride(11)
9130 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9131 }
9132 }
9133
9134 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_gt_4_subtile) {
9135 TEST_REQUIRES_ARM_NEON;
9136 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009137 for (uint32_t n = 1; n <= 8; n++) {
9138 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009139 GemmMicrokernelTester()
9140 .mr(6)
9141 .nr(8)
9142 .kr(1)
9143 .sr(4)
9144 .m(m)
9145 .n(n)
9146 .k(k)
9147 .iterations(1)
9148 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9149 }
9150 }
9151 }
9152 }
9153
9154 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_div_4) {
9155 TEST_REQUIRES_ARM_NEON;
9156 for (size_t k = 8; k <= 40; k += 4) {
9157 GemmMicrokernelTester()
9158 .mr(6)
9159 .nr(8)
9160 .kr(1)
9161 .sr(4)
9162 .m(6)
9163 .n(8)
9164 .k(k)
9165 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9166 }
9167 }
9168
9169 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_div_4_strided_a) {
9170 TEST_REQUIRES_ARM_NEON;
9171 for (size_t k = 8; k <= 40; k += 4) {
9172 GemmMicrokernelTester()
9173 .mr(6)
9174 .nr(8)
9175 .kr(1)
9176 .sr(4)
9177 .m(6)
9178 .n(8)
9179 .k(k)
9180 .a_stride(43)
9181 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9182 }
9183 }
9184
9185 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, k_div_4_subtile) {
9186 TEST_REQUIRES_ARM_NEON;
9187 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009188 for (uint32_t n = 1; n <= 8; n++) {
9189 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009190 GemmMicrokernelTester()
9191 .mr(6)
9192 .nr(8)
9193 .kr(1)
9194 .sr(4)
9195 .m(m)
9196 .n(n)
9197 .k(k)
9198 .iterations(1)
9199 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9200 }
9201 }
9202 }
9203 }
9204
9205 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_gt_8) {
9206 TEST_REQUIRES_ARM_NEON;
9207 for (uint32_t n = 9; n < 16; n++) {
9208 for (size_t k = 1; k <= 20; k += 5) {
9209 GemmMicrokernelTester()
9210 .mr(6)
9211 .nr(8)
9212 .kr(1)
9213 .sr(4)
9214 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009215 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009216 .k(k)
9217 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9218 }
9219 }
9220 }
9221
9222 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_gt_8_strided_cn) {
9223 TEST_REQUIRES_ARM_NEON;
9224 for (uint32_t n = 9; n < 16; n++) {
9225 for (size_t k = 1; k <= 20; k += 5) {
9226 GemmMicrokernelTester()
9227 .mr(6)
9228 .nr(8)
9229 .kr(1)
9230 .sr(4)
9231 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009232 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009233 .k(k)
9234 .cn_stride(11)
9235 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9236 }
9237 }
9238 }
9239
9240 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_gt_8_strided_a) {
9241 TEST_REQUIRES_ARM_NEON;
9242 for (uint32_t n = 9; n < 16; n++) {
9243 for (size_t k = 1; k <= 20; k += 5) {
9244 GemmMicrokernelTester()
9245 .mr(6)
9246 .nr(8)
9247 .kr(1)
9248 .sr(4)
9249 .m(6)
9250 .n(n)
9251 .k(k)
9252 .a_stride(23)
9253 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9254 }
9255 }
9256 }
9257
9258 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_gt_8_subtile) {
9259 TEST_REQUIRES_ARM_NEON;
9260 for (uint32_t n = 9; n < 16; n++) {
9261 for (size_t k = 1; k <= 20; k += 5) {
9262 for (uint32_t m = 1; m <= 6; m++) {
9263 GemmMicrokernelTester()
9264 .mr(6)
9265 .nr(8)
9266 .kr(1)
9267 .sr(4)
9268 .m(m)
9269 .n(n)
9270 .k(k)
9271 .iterations(1)
9272 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9273 }
9274 }
9275 }
9276 }
9277
9278 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_div_8) {
9279 TEST_REQUIRES_ARM_NEON;
9280 for (uint32_t n = 16; n <= 24; n += 8) {
9281 for (size_t k = 1; k <= 20; k += 5) {
9282 GemmMicrokernelTester()
9283 .mr(6)
9284 .nr(8)
9285 .kr(1)
9286 .sr(4)
9287 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009288 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009289 .k(k)
9290 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9291 }
9292 }
9293 }
9294
9295 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_div_8_strided_cn) {
9296 TEST_REQUIRES_ARM_NEON;
9297 for (uint32_t n = 16; n <= 24; n += 8) {
9298 for (size_t k = 1; k <= 20; k += 5) {
9299 GemmMicrokernelTester()
9300 .mr(6)
9301 .nr(8)
9302 .kr(1)
9303 .sr(4)
9304 .m(6)
9305 .n(n)
9306 .k(k)
9307 .cn_stride(11)
9308 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9309 }
9310 }
9311 }
9312
9313 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_div_8_strided_a) {
9314 TEST_REQUIRES_ARM_NEON;
9315 for (uint32_t n = 16; n <= 24; n += 8) {
9316 for (size_t k = 1; k <= 20; k += 5) {
9317 GemmMicrokernelTester()
9318 .mr(6)
9319 .nr(8)
9320 .kr(1)
9321 .sr(4)
9322 .m(6)
9323 .n(n)
9324 .k(k)
9325 .a_stride(23)
9326 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9327 }
9328 }
9329 }
9330
9331 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, n_div_8_subtile) {
9332 TEST_REQUIRES_ARM_NEON;
9333 for (uint32_t n = 16; n <= 24; n += 8) {
9334 for (size_t k = 1; k <= 20; k += 5) {
9335 for (uint32_t m = 1; m <= 6; m++) {
9336 GemmMicrokernelTester()
9337 .mr(6)
9338 .nr(8)
9339 .kr(1)
9340 .sr(4)
9341 .m(m)
9342 .n(n)
9343 .k(k)
9344 .iterations(1)
9345 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9346 }
9347 }
9348 }
9349 }
9350
9351 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, strided_cm_subtile) {
9352 TEST_REQUIRES_ARM_NEON;
9353 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009354 for (uint32_t n = 1; n <= 8; n++) {
9355 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009356 GemmMicrokernelTester()
9357 .mr(6)
9358 .nr(8)
9359 .kr(1)
9360 .sr(4)
9361 .m(m)
9362 .n(n)
9363 .k(k)
9364 .cm_stride(11)
9365 .iterations(1)
9366 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9367 }
9368 }
9369 }
9370 }
9371
9372 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, qmin) {
9373 TEST_REQUIRES_ARM_NEON;
9374 GemmMicrokernelTester()
9375 .mr(6)
9376 .nr(8)
9377 .kr(1)
9378 .sr(4)
9379 .m(6)
9380 .n(8)
9381 .k(4)
9382 .qmin(128)
9383 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9384 }
9385
9386 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, qmax) {
9387 TEST_REQUIRES_ARM_NEON;
9388 GemmMicrokernelTester()
9389 .mr(6)
9390 .nr(8)
9391 .kr(1)
9392 .sr(4)
9393 .m(6)
9394 .n(8)
9395 .k(4)
9396 .qmax(128)
9397 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9398 }
9399
9400 TEST(F32_GEMMINC_MINMAX_6X8S4__NEON, strided_cm) {
9401 TEST_REQUIRES_ARM_NEON;
9402 GemmMicrokernelTester()
9403 .mr(6)
9404 .nr(8)
9405 .kr(1)
9406 .sr(4)
9407 .m(6)
9408 .n(8)
9409 .k(4)
9410 .cm_stride(11)
9411 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
9412 }
9413#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9414
9415
9416#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9417 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4) {
9418 TEST_REQUIRES_ARM_NEON;
9419 GemmMicrokernelTester()
9420 .mr(8)
9421 .nr(8)
9422 .kr(1)
9423 .sr(4)
9424 .m(8)
9425 .n(8)
9426 .k(4)
9427 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9428 }
9429
9430 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, strided_cn) {
9431 TEST_REQUIRES_ARM_NEON;
9432 GemmMicrokernelTester()
9433 .mr(8)
9434 .nr(8)
9435 .kr(1)
9436 .sr(4)
9437 .m(8)
9438 .n(8)
9439 .k(4)
9440 .cn_stride(11)
9441 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9442 }
9443
9444 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4_strided_a) {
9445 TEST_REQUIRES_ARM_NEON;
9446 GemmMicrokernelTester()
9447 .mr(8)
9448 .nr(8)
9449 .kr(1)
9450 .sr(4)
9451 .m(8)
9452 .n(8)
9453 .k(4)
9454 .a_stride(7)
9455 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9456 }
9457
9458 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4_subtile) {
9459 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009460 for (uint32_t n = 1; n <= 8; n++) {
9461 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009462 GemmMicrokernelTester()
9463 .mr(8)
9464 .nr(8)
9465 .kr(1)
9466 .sr(4)
9467 .m(m)
9468 .n(n)
9469 .k(4)
9470 .iterations(1)
9471 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9472 }
9473 }
9474 }
9475
9476 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4_subtile_m) {
9477 TEST_REQUIRES_ARM_NEON;
9478 for (uint32_t m = 1; m <= 8; m++) {
9479 GemmMicrokernelTester()
9480 .mr(8)
9481 .nr(8)
9482 .kr(1)
9483 .sr(4)
9484 .m(m)
9485 .n(8)
9486 .k(4)
9487 .iterations(1)
9488 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9489 }
9490 }
9491
9492 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_eq_4_subtile_n) {
9493 TEST_REQUIRES_ARM_NEON;
9494 for (uint32_t n = 1; n <= 8; n++) {
9495 GemmMicrokernelTester()
9496 .mr(8)
9497 .nr(8)
9498 .kr(1)
9499 .sr(4)
9500 .m(8)
9501 .n(n)
9502 .k(4)
9503 .iterations(1)
9504 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9505 }
9506 }
9507
9508 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_lt_4) {
9509 TEST_REQUIRES_ARM_NEON;
9510 for (size_t k = 1; k < 4; k++) {
9511 GemmMicrokernelTester()
9512 .mr(8)
9513 .nr(8)
9514 .kr(1)
9515 .sr(4)
9516 .m(8)
9517 .n(8)
9518 .k(k)
9519 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9520 }
9521 }
9522
9523 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_lt_4_strided_a) {
9524 TEST_REQUIRES_ARM_NEON;
9525 for (size_t k = 1; k < 4; k++) {
9526 GemmMicrokernelTester()
9527 .mr(8)
9528 .nr(8)
9529 .kr(1)
9530 .sr(4)
9531 .m(8)
9532 .n(8)
9533 .k(k)
9534 .a_stride(7)
9535 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9536 }
9537 }
9538
9539 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_lt_4_subtile) {
9540 TEST_REQUIRES_ARM_NEON;
9541 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009542 for (uint32_t n = 1; n <= 8; n++) {
9543 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009544 GemmMicrokernelTester()
9545 .mr(8)
9546 .nr(8)
9547 .kr(1)
9548 .sr(4)
9549 .m(m)
9550 .n(n)
9551 .k(k)
9552 .iterations(1)
9553 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9554 }
9555 }
9556 }
9557 }
9558
9559 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_gt_4) {
9560 TEST_REQUIRES_ARM_NEON;
9561 for (size_t k = 5; k < 8; k++) {
9562 GemmMicrokernelTester()
9563 .mr(8)
9564 .nr(8)
9565 .kr(1)
9566 .sr(4)
9567 .m(8)
9568 .n(8)
9569 .k(k)
9570 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9571 }
9572 }
9573
9574 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_gt_4_strided_a) {
9575 TEST_REQUIRES_ARM_NEON;
9576 for (size_t k = 5; k < 8; k++) {
9577 GemmMicrokernelTester()
9578 .mr(8)
9579 .nr(8)
9580 .kr(1)
9581 .sr(4)
9582 .m(8)
9583 .n(8)
9584 .k(k)
9585 .a_stride(11)
9586 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9587 }
9588 }
9589
9590 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_gt_4_subtile) {
9591 TEST_REQUIRES_ARM_NEON;
9592 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009593 for (uint32_t n = 1; n <= 8; n++) {
9594 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009595 GemmMicrokernelTester()
9596 .mr(8)
9597 .nr(8)
9598 .kr(1)
9599 .sr(4)
9600 .m(m)
9601 .n(n)
9602 .k(k)
9603 .iterations(1)
9604 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9605 }
9606 }
9607 }
9608 }
9609
9610 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_div_4) {
9611 TEST_REQUIRES_ARM_NEON;
9612 for (size_t k = 8; k <= 40; k += 4) {
9613 GemmMicrokernelTester()
9614 .mr(8)
9615 .nr(8)
9616 .kr(1)
9617 .sr(4)
9618 .m(8)
9619 .n(8)
9620 .k(k)
9621 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9622 }
9623 }
9624
9625 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_div_4_strided_a) {
9626 TEST_REQUIRES_ARM_NEON;
9627 for (size_t k = 8; k <= 40; k += 4) {
9628 GemmMicrokernelTester()
9629 .mr(8)
9630 .nr(8)
9631 .kr(1)
9632 .sr(4)
9633 .m(8)
9634 .n(8)
9635 .k(k)
9636 .a_stride(43)
9637 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9638 }
9639 }
9640
9641 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, k_div_4_subtile) {
9642 TEST_REQUIRES_ARM_NEON;
9643 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009644 for (uint32_t n = 1; n <= 8; n++) {
9645 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009646 GemmMicrokernelTester()
9647 .mr(8)
9648 .nr(8)
9649 .kr(1)
9650 .sr(4)
9651 .m(m)
9652 .n(n)
9653 .k(k)
9654 .iterations(1)
9655 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9656 }
9657 }
9658 }
9659 }
9660
9661 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_gt_8) {
9662 TEST_REQUIRES_ARM_NEON;
9663 for (uint32_t n = 9; n < 16; n++) {
9664 for (size_t k = 1; k <= 20; k += 5) {
9665 GemmMicrokernelTester()
9666 .mr(8)
9667 .nr(8)
9668 .kr(1)
9669 .sr(4)
9670 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009671 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009672 .k(k)
9673 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9674 }
9675 }
9676 }
9677
9678 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_gt_8_strided_cn) {
9679 TEST_REQUIRES_ARM_NEON;
9680 for (uint32_t n = 9; n < 16; n++) {
9681 for (size_t k = 1; k <= 20; k += 5) {
9682 GemmMicrokernelTester()
9683 .mr(8)
9684 .nr(8)
9685 .kr(1)
9686 .sr(4)
9687 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009688 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009689 .k(k)
9690 .cn_stride(11)
9691 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9692 }
9693 }
9694 }
9695
9696 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_gt_8_strided_a) {
9697 TEST_REQUIRES_ARM_NEON;
9698 for (uint32_t n = 9; n < 16; n++) {
9699 for (size_t k = 1; k <= 20; k += 5) {
9700 GemmMicrokernelTester()
9701 .mr(8)
9702 .nr(8)
9703 .kr(1)
9704 .sr(4)
9705 .m(8)
9706 .n(n)
9707 .k(k)
9708 .a_stride(23)
9709 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9710 }
9711 }
9712 }
9713
9714 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_gt_8_subtile) {
9715 TEST_REQUIRES_ARM_NEON;
9716 for (uint32_t n = 9; n < 16; n++) {
9717 for (size_t k = 1; k <= 20; k += 5) {
9718 for (uint32_t m = 1; m <= 8; m++) {
9719 GemmMicrokernelTester()
9720 .mr(8)
9721 .nr(8)
9722 .kr(1)
9723 .sr(4)
9724 .m(m)
9725 .n(n)
9726 .k(k)
9727 .iterations(1)
9728 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9729 }
9730 }
9731 }
9732 }
9733
9734 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_div_8) {
9735 TEST_REQUIRES_ARM_NEON;
9736 for (uint32_t n = 16; n <= 24; n += 8) {
9737 for (size_t k = 1; k <= 20; k += 5) {
9738 GemmMicrokernelTester()
9739 .mr(8)
9740 .nr(8)
9741 .kr(1)
9742 .sr(4)
9743 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009744 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009745 .k(k)
9746 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9747 }
9748 }
9749 }
9750
9751 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_div_8_strided_cn) {
9752 TEST_REQUIRES_ARM_NEON;
9753 for (uint32_t n = 16; n <= 24; n += 8) {
9754 for (size_t k = 1; k <= 20; k += 5) {
9755 GemmMicrokernelTester()
9756 .mr(8)
9757 .nr(8)
9758 .kr(1)
9759 .sr(4)
9760 .m(8)
9761 .n(n)
9762 .k(k)
9763 .cn_stride(11)
9764 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9765 }
9766 }
9767 }
9768
9769 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_div_8_strided_a) {
9770 TEST_REQUIRES_ARM_NEON;
9771 for (uint32_t n = 16; n <= 24; n += 8) {
9772 for (size_t k = 1; k <= 20; k += 5) {
9773 GemmMicrokernelTester()
9774 .mr(8)
9775 .nr(8)
9776 .kr(1)
9777 .sr(4)
9778 .m(8)
9779 .n(n)
9780 .k(k)
9781 .a_stride(23)
9782 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9783 }
9784 }
9785 }
9786
9787 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, n_div_8_subtile) {
9788 TEST_REQUIRES_ARM_NEON;
9789 for (uint32_t n = 16; n <= 24; n += 8) {
9790 for (size_t k = 1; k <= 20; k += 5) {
9791 for (uint32_t m = 1; m <= 8; m++) {
9792 GemmMicrokernelTester()
9793 .mr(8)
9794 .nr(8)
9795 .kr(1)
9796 .sr(4)
9797 .m(m)
9798 .n(n)
9799 .k(k)
9800 .iterations(1)
9801 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9802 }
9803 }
9804 }
9805 }
9806
9807 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, strided_cm_subtile) {
9808 TEST_REQUIRES_ARM_NEON;
9809 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009810 for (uint32_t n = 1; n <= 8; n++) {
9811 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009812 GemmMicrokernelTester()
9813 .mr(8)
9814 .nr(8)
9815 .kr(1)
9816 .sr(4)
9817 .m(m)
9818 .n(n)
9819 .k(k)
9820 .cm_stride(11)
9821 .iterations(1)
9822 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9823 }
9824 }
9825 }
9826 }
9827
9828 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, qmin) {
9829 TEST_REQUIRES_ARM_NEON;
9830 GemmMicrokernelTester()
9831 .mr(8)
9832 .nr(8)
9833 .kr(1)
9834 .sr(4)
9835 .m(8)
9836 .n(8)
9837 .k(4)
9838 .qmin(128)
9839 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9840 }
9841
9842 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, qmax) {
9843 TEST_REQUIRES_ARM_NEON;
9844 GemmMicrokernelTester()
9845 .mr(8)
9846 .nr(8)
9847 .kr(1)
9848 .sr(4)
9849 .m(8)
9850 .n(8)
9851 .k(4)
9852 .qmax(128)
9853 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9854 }
9855
9856 TEST(F32_GEMMINC_MINMAX_8X8S4__NEON, strided_cm) {
9857 TEST_REQUIRES_ARM_NEON;
9858 GemmMicrokernelTester()
9859 .mr(8)
9860 .nr(8)
9861 .kr(1)
9862 .sr(4)
9863 .m(8)
9864 .n(8)
9865 .k(4)
9866 .cm_stride(11)
9867 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
9868 }
9869#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9870
9871
9872#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9873 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4) {
9874 TEST_REQUIRES_ARM_NEON_FMA;
9875 GemmMicrokernelTester()
9876 .mr(1)
9877 .nr(8)
9878 .kr(1)
9879 .sr(4)
9880 .m(1)
9881 .n(8)
9882 .k(4)
9883 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9884 }
9885
9886 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, strided_cn) {
9887 TEST_REQUIRES_ARM_NEON_FMA;
9888 GemmMicrokernelTester()
9889 .mr(1)
9890 .nr(8)
9891 .kr(1)
9892 .sr(4)
9893 .m(1)
9894 .n(8)
9895 .k(4)
9896 .cn_stride(11)
9897 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9898 }
9899
9900 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4_strided_a) {
9901 TEST_REQUIRES_ARM_NEON_FMA;
9902 GemmMicrokernelTester()
9903 .mr(1)
9904 .nr(8)
9905 .kr(1)
9906 .sr(4)
9907 .m(1)
9908 .n(8)
9909 .k(4)
9910 .a_stride(7)
9911 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9912 }
9913
9914 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile) {
9915 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009916 for (uint32_t n = 1; n <= 8; n++) {
9917 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009918 GemmMicrokernelTester()
9919 .mr(1)
9920 .nr(8)
9921 .kr(1)
9922 .sr(4)
9923 .m(m)
9924 .n(n)
9925 .k(4)
9926 .iterations(1)
9927 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9928 }
9929 }
9930 }
9931
9932 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_m) {
9933 TEST_REQUIRES_ARM_NEON_FMA;
9934 for (uint32_t m = 1; m <= 1; m++) {
9935 GemmMicrokernelTester()
9936 .mr(1)
9937 .nr(8)
9938 .kr(1)
9939 .sr(4)
9940 .m(m)
9941 .n(8)
9942 .k(4)
9943 .iterations(1)
9944 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9945 }
9946 }
9947
9948 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_n) {
9949 TEST_REQUIRES_ARM_NEON_FMA;
9950 for (uint32_t n = 1; n <= 8; n++) {
9951 GemmMicrokernelTester()
9952 .mr(1)
9953 .nr(8)
9954 .kr(1)
9955 .sr(4)
9956 .m(1)
9957 .n(n)
9958 .k(4)
9959 .iterations(1)
9960 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9961 }
9962 }
9963
9964 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_lt_4) {
9965 TEST_REQUIRES_ARM_NEON_FMA;
9966 for (size_t k = 1; k < 4; k++) {
9967 GemmMicrokernelTester()
9968 .mr(1)
9969 .nr(8)
9970 .kr(1)
9971 .sr(4)
9972 .m(1)
9973 .n(8)
9974 .k(k)
9975 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9976 }
9977 }
9978
9979 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_lt_4_strided_a) {
9980 TEST_REQUIRES_ARM_NEON_FMA;
9981 for (size_t k = 1; k < 4; k++) {
9982 GemmMicrokernelTester()
9983 .mr(1)
9984 .nr(8)
9985 .kr(1)
9986 .sr(4)
9987 .m(1)
9988 .n(8)
9989 .k(k)
9990 .a_stride(7)
9991 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9992 }
9993 }
9994
9995 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_lt_4_subtile) {
9996 TEST_REQUIRES_ARM_NEON_FMA;
9997 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009998 for (uint32_t n = 1; n <= 8; n++) {
9999 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010000 GemmMicrokernelTester()
10001 .mr(1)
10002 .nr(8)
10003 .kr(1)
10004 .sr(4)
10005 .m(m)
10006 .n(n)
10007 .k(k)
10008 .iterations(1)
10009 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10010 }
10011 }
10012 }
10013 }
10014
10015 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_gt_4) {
10016 TEST_REQUIRES_ARM_NEON_FMA;
10017 for (size_t k = 5; k < 8; k++) {
10018 GemmMicrokernelTester()
10019 .mr(1)
10020 .nr(8)
10021 .kr(1)
10022 .sr(4)
10023 .m(1)
10024 .n(8)
10025 .k(k)
10026 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10027 }
10028 }
10029
10030 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_gt_4_strided_a) {
10031 TEST_REQUIRES_ARM_NEON_FMA;
10032 for (size_t k = 5; k < 8; k++) {
10033 GemmMicrokernelTester()
10034 .mr(1)
10035 .nr(8)
10036 .kr(1)
10037 .sr(4)
10038 .m(1)
10039 .n(8)
10040 .k(k)
10041 .a_stride(11)
10042 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10043 }
10044 }
10045
10046 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_gt_4_subtile) {
10047 TEST_REQUIRES_ARM_NEON_FMA;
10048 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010049 for (uint32_t n = 1; n <= 8; n++) {
10050 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010051 GemmMicrokernelTester()
10052 .mr(1)
10053 .nr(8)
10054 .kr(1)
10055 .sr(4)
10056 .m(m)
10057 .n(n)
10058 .k(k)
10059 .iterations(1)
10060 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10061 }
10062 }
10063 }
10064 }
10065
10066 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_div_4) {
10067 TEST_REQUIRES_ARM_NEON_FMA;
10068 for (size_t k = 8; k <= 40; k += 4) {
10069 GemmMicrokernelTester()
10070 .mr(1)
10071 .nr(8)
10072 .kr(1)
10073 .sr(4)
10074 .m(1)
10075 .n(8)
10076 .k(k)
10077 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10078 }
10079 }
10080
10081 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_div_4_strided_a) {
10082 TEST_REQUIRES_ARM_NEON_FMA;
10083 for (size_t k = 8; k <= 40; k += 4) {
10084 GemmMicrokernelTester()
10085 .mr(1)
10086 .nr(8)
10087 .kr(1)
10088 .sr(4)
10089 .m(1)
10090 .n(8)
10091 .k(k)
10092 .a_stride(43)
10093 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10094 }
10095 }
10096
10097 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, k_div_4_subtile) {
10098 TEST_REQUIRES_ARM_NEON_FMA;
10099 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010100 for (uint32_t n = 1; n <= 8; n++) {
10101 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010102 GemmMicrokernelTester()
10103 .mr(1)
10104 .nr(8)
10105 .kr(1)
10106 .sr(4)
10107 .m(m)
10108 .n(n)
10109 .k(k)
10110 .iterations(1)
10111 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10112 }
10113 }
10114 }
10115 }
10116
10117 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_gt_8) {
10118 TEST_REQUIRES_ARM_NEON_FMA;
10119 for (uint32_t n = 9; n < 16; n++) {
10120 for (size_t k = 1; k <= 20; k += 5) {
10121 GemmMicrokernelTester()
10122 .mr(1)
10123 .nr(8)
10124 .kr(1)
10125 .sr(4)
10126 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010127 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010128 .k(k)
10129 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10130 }
10131 }
10132 }
10133
10134 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_gt_8_strided_cn) {
10135 TEST_REQUIRES_ARM_NEON_FMA;
10136 for (uint32_t n = 9; n < 16; n++) {
10137 for (size_t k = 1; k <= 20; k += 5) {
10138 GemmMicrokernelTester()
10139 .mr(1)
10140 .nr(8)
10141 .kr(1)
10142 .sr(4)
10143 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010144 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010145 .k(k)
10146 .cn_stride(11)
10147 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10148 }
10149 }
10150 }
10151
10152 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_gt_8_strided_a) {
10153 TEST_REQUIRES_ARM_NEON_FMA;
10154 for (uint32_t n = 9; n < 16; n++) {
10155 for (size_t k = 1; k <= 20; k += 5) {
10156 GemmMicrokernelTester()
10157 .mr(1)
10158 .nr(8)
10159 .kr(1)
10160 .sr(4)
10161 .m(1)
10162 .n(n)
10163 .k(k)
10164 .a_stride(23)
10165 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10166 }
10167 }
10168 }
10169
10170 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_gt_8_subtile) {
10171 TEST_REQUIRES_ARM_NEON_FMA;
10172 for (uint32_t n = 9; n < 16; n++) {
10173 for (size_t k = 1; k <= 20; k += 5) {
10174 for (uint32_t m = 1; m <= 1; m++) {
10175 GemmMicrokernelTester()
10176 .mr(1)
10177 .nr(8)
10178 .kr(1)
10179 .sr(4)
10180 .m(m)
10181 .n(n)
10182 .k(k)
10183 .iterations(1)
10184 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10185 }
10186 }
10187 }
10188 }
10189
10190 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_div_8) {
10191 TEST_REQUIRES_ARM_NEON_FMA;
10192 for (uint32_t n = 16; n <= 24; n += 8) {
10193 for (size_t k = 1; k <= 20; k += 5) {
10194 GemmMicrokernelTester()
10195 .mr(1)
10196 .nr(8)
10197 .kr(1)
10198 .sr(4)
10199 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010200 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010201 .k(k)
10202 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10203 }
10204 }
10205 }
10206
10207 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_div_8_strided_cn) {
10208 TEST_REQUIRES_ARM_NEON_FMA;
10209 for (uint32_t n = 16; n <= 24; n += 8) {
10210 for (size_t k = 1; k <= 20; k += 5) {
10211 GemmMicrokernelTester()
10212 .mr(1)
10213 .nr(8)
10214 .kr(1)
10215 .sr(4)
10216 .m(1)
10217 .n(n)
10218 .k(k)
10219 .cn_stride(11)
10220 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10221 }
10222 }
10223 }
10224
10225 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_div_8_strided_a) {
10226 TEST_REQUIRES_ARM_NEON_FMA;
10227 for (uint32_t n = 16; n <= 24; n += 8) {
10228 for (size_t k = 1; k <= 20; k += 5) {
10229 GemmMicrokernelTester()
10230 .mr(1)
10231 .nr(8)
10232 .kr(1)
10233 .sr(4)
10234 .m(1)
10235 .n(n)
10236 .k(k)
10237 .a_stride(23)
10238 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10239 }
10240 }
10241 }
10242
10243 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, n_div_8_subtile) {
10244 TEST_REQUIRES_ARM_NEON_FMA;
10245 for (uint32_t n = 16; n <= 24; n += 8) {
10246 for (size_t k = 1; k <= 20; k += 5) {
10247 for (uint32_t m = 1; m <= 1; m++) {
10248 GemmMicrokernelTester()
10249 .mr(1)
10250 .nr(8)
10251 .kr(1)
10252 .sr(4)
10253 .m(m)
10254 .n(n)
10255 .k(k)
10256 .iterations(1)
10257 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10258 }
10259 }
10260 }
10261 }
10262
10263 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, strided_cm_subtile) {
10264 TEST_REQUIRES_ARM_NEON_FMA;
10265 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010266 for (uint32_t n = 1; n <= 8; n++) {
10267 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010268 GemmMicrokernelTester()
10269 .mr(1)
10270 .nr(8)
10271 .kr(1)
10272 .sr(4)
10273 .m(m)
10274 .n(n)
10275 .k(k)
10276 .cm_stride(11)
10277 .iterations(1)
10278 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10279 }
10280 }
10281 }
10282 }
10283
10284 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, qmin) {
10285 TEST_REQUIRES_ARM_NEON_FMA;
10286 GemmMicrokernelTester()
10287 .mr(1)
10288 .nr(8)
10289 .kr(1)
10290 .sr(4)
10291 .m(1)
10292 .n(8)
10293 .k(4)
10294 .qmin(128)
10295 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10296 }
10297
10298 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, qmax) {
10299 TEST_REQUIRES_ARM_NEON_FMA;
10300 GemmMicrokernelTester()
10301 .mr(1)
10302 .nr(8)
10303 .kr(1)
10304 .sr(4)
10305 .m(1)
10306 .n(8)
10307 .k(4)
10308 .qmax(128)
10309 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10310 }
10311
10312 TEST(F32_GEMMINC_MINMAX_1X8S4__NEONFMA, strided_cm) {
10313 TEST_REQUIRES_ARM_NEON_FMA;
10314 GemmMicrokernelTester()
10315 .mr(1)
10316 .nr(8)
10317 .kr(1)
10318 .sr(4)
10319 .m(1)
10320 .n(8)
10321 .k(4)
10322 .cm_stride(11)
10323 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10324 }
10325#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10326
10327
10328#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10329 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4) {
10330 TEST_REQUIRES_ARM_NEON_FMA;
10331 GemmMicrokernelTester()
10332 .mr(6)
10333 .nr(8)
10334 .kr(1)
10335 .sr(4)
10336 .m(6)
10337 .n(8)
10338 .k(4)
10339 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10340 }
10341
10342 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, strided_cn) {
10343 TEST_REQUIRES_ARM_NEON_FMA;
10344 GemmMicrokernelTester()
10345 .mr(6)
10346 .nr(8)
10347 .kr(1)
10348 .sr(4)
10349 .m(6)
10350 .n(8)
10351 .k(4)
10352 .cn_stride(11)
10353 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10354 }
10355
10356 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4_strided_a) {
10357 TEST_REQUIRES_ARM_NEON_FMA;
10358 GemmMicrokernelTester()
10359 .mr(6)
10360 .nr(8)
10361 .kr(1)
10362 .sr(4)
10363 .m(6)
10364 .n(8)
10365 .k(4)
10366 .a_stride(7)
10367 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10368 }
10369
10370 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile) {
10371 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010372 for (uint32_t n = 1; n <= 8; n++) {
10373 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010374 GemmMicrokernelTester()
10375 .mr(6)
10376 .nr(8)
10377 .kr(1)
10378 .sr(4)
10379 .m(m)
10380 .n(n)
10381 .k(4)
10382 .iterations(1)
10383 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10384 }
10385 }
10386 }
10387
10388 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_m) {
10389 TEST_REQUIRES_ARM_NEON_FMA;
10390 for (uint32_t m = 1; m <= 6; m++) {
10391 GemmMicrokernelTester()
10392 .mr(6)
10393 .nr(8)
10394 .kr(1)
10395 .sr(4)
10396 .m(m)
10397 .n(8)
10398 .k(4)
10399 .iterations(1)
10400 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10401 }
10402 }
10403
10404 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_n) {
10405 TEST_REQUIRES_ARM_NEON_FMA;
10406 for (uint32_t n = 1; n <= 8; n++) {
10407 GemmMicrokernelTester()
10408 .mr(6)
10409 .nr(8)
10410 .kr(1)
10411 .sr(4)
10412 .m(6)
10413 .n(n)
10414 .k(4)
10415 .iterations(1)
10416 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10417 }
10418 }
10419
10420 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_lt_4) {
10421 TEST_REQUIRES_ARM_NEON_FMA;
10422 for (size_t k = 1; k < 4; k++) {
10423 GemmMicrokernelTester()
10424 .mr(6)
10425 .nr(8)
10426 .kr(1)
10427 .sr(4)
10428 .m(6)
10429 .n(8)
10430 .k(k)
10431 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10432 }
10433 }
10434
10435 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_lt_4_strided_a) {
10436 TEST_REQUIRES_ARM_NEON_FMA;
10437 for (size_t k = 1; k < 4; k++) {
10438 GemmMicrokernelTester()
10439 .mr(6)
10440 .nr(8)
10441 .kr(1)
10442 .sr(4)
10443 .m(6)
10444 .n(8)
10445 .k(k)
10446 .a_stride(7)
10447 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10448 }
10449 }
10450
10451 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_lt_4_subtile) {
10452 TEST_REQUIRES_ARM_NEON_FMA;
10453 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010454 for (uint32_t n = 1; n <= 8; n++) {
10455 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010456 GemmMicrokernelTester()
10457 .mr(6)
10458 .nr(8)
10459 .kr(1)
10460 .sr(4)
10461 .m(m)
10462 .n(n)
10463 .k(k)
10464 .iterations(1)
10465 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10466 }
10467 }
10468 }
10469 }
10470
10471 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_gt_4) {
10472 TEST_REQUIRES_ARM_NEON_FMA;
10473 for (size_t k = 5; k < 8; k++) {
10474 GemmMicrokernelTester()
10475 .mr(6)
10476 .nr(8)
10477 .kr(1)
10478 .sr(4)
10479 .m(6)
10480 .n(8)
10481 .k(k)
10482 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10483 }
10484 }
10485
10486 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_gt_4_strided_a) {
10487 TEST_REQUIRES_ARM_NEON_FMA;
10488 for (size_t k = 5; k < 8; k++) {
10489 GemmMicrokernelTester()
10490 .mr(6)
10491 .nr(8)
10492 .kr(1)
10493 .sr(4)
10494 .m(6)
10495 .n(8)
10496 .k(k)
10497 .a_stride(11)
10498 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10499 }
10500 }
10501
10502 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_gt_4_subtile) {
10503 TEST_REQUIRES_ARM_NEON_FMA;
10504 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010505 for (uint32_t n = 1; n <= 8; n++) {
10506 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010507 GemmMicrokernelTester()
10508 .mr(6)
10509 .nr(8)
10510 .kr(1)
10511 .sr(4)
10512 .m(m)
10513 .n(n)
10514 .k(k)
10515 .iterations(1)
10516 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10517 }
10518 }
10519 }
10520 }
10521
10522 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_div_4) {
10523 TEST_REQUIRES_ARM_NEON_FMA;
10524 for (size_t k = 8; k <= 40; k += 4) {
10525 GemmMicrokernelTester()
10526 .mr(6)
10527 .nr(8)
10528 .kr(1)
10529 .sr(4)
10530 .m(6)
10531 .n(8)
10532 .k(k)
10533 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10534 }
10535 }
10536
10537 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_div_4_strided_a) {
10538 TEST_REQUIRES_ARM_NEON_FMA;
10539 for (size_t k = 8; k <= 40; k += 4) {
10540 GemmMicrokernelTester()
10541 .mr(6)
10542 .nr(8)
10543 .kr(1)
10544 .sr(4)
10545 .m(6)
10546 .n(8)
10547 .k(k)
10548 .a_stride(43)
10549 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10550 }
10551 }
10552
10553 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, k_div_4_subtile) {
10554 TEST_REQUIRES_ARM_NEON_FMA;
10555 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010556 for (uint32_t n = 1; n <= 8; n++) {
10557 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010558 GemmMicrokernelTester()
10559 .mr(6)
10560 .nr(8)
10561 .kr(1)
10562 .sr(4)
10563 .m(m)
10564 .n(n)
10565 .k(k)
10566 .iterations(1)
10567 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10568 }
10569 }
10570 }
10571 }
10572
10573 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_gt_8) {
10574 TEST_REQUIRES_ARM_NEON_FMA;
10575 for (uint32_t n = 9; n < 16; n++) {
10576 for (size_t k = 1; k <= 20; k += 5) {
10577 GemmMicrokernelTester()
10578 .mr(6)
10579 .nr(8)
10580 .kr(1)
10581 .sr(4)
10582 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010583 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010584 .k(k)
10585 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10586 }
10587 }
10588 }
10589
10590 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_gt_8_strided_cn) {
10591 TEST_REQUIRES_ARM_NEON_FMA;
10592 for (uint32_t n = 9; n < 16; n++) {
10593 for (size_t k = 1; k <= 20; k += 5) {
10594 GemmMicrokernelTester()
10595 .mr(6)
10596 .nr(8)
10597 .kr(1)
10598 .sr(4)
10599 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010600 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010601 .k(k)
10602 .cn_stride(11)
10603 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10604 }
10605 }
10606 }
10607
10608 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_gt_8_strided_a) {
10609 TEST_REQUIRES_ARM_NEON_FMA;
10610 for (uint32_t n = 9; n < 16; n++) {
10611 for (size_t k = 1; k <= 20; k += 5) {
10612 GemmMicrokernelTester()
10613 .mr(6)
10614 .nr(8)
10615 .kr(1)
10616 .sr(4)
10617 .m(6)
10618 .n(n)
10619 .k(k)
10620 .a_stride(23)
10621 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10622 }
10623 }
10624 }
10625
10626 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_gt_8_subtile) {
10627 TEST_REQUIRES_ARM_NEON_FMA;
10628 for (uint32_t n = 9; n < 16; n++) {
10629 for (size_t k = 1; k <= 20; k += 5) {
10630 for (uint32_t m = 1; m <= 6; m++) {
10631 GemmMicrokernelTester()
10632 .mr(6)
10633 .nr(8)
10634 .kr(1)
10635 .sr(4)
10636 .m(m)
10637 .n(n)
10638 .k(k)
10639 .iterations(1)
10640 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10641 }
10642 }
10643 }
10644 }
10645
10646 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_div_8) {
10647 TEST_REQUIRES_ARM_NEON_FMA;
10648 for (uint32_t n = 16; n <= 24; n += 8) {
10649 for (size_t k = 1; k <= 20; k += 5) {
10650 GemmMicrokernelTester()
10651 .mr(6)
10652 .nr(8)
10653 .kr(1)
10654 .sr(4)
10655 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010656 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010657 .k(k)
10658 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10659 }
10660 }
10661 }
10662
10663 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_div_8_strided_cn) {
10664 TEST_REQUIRES_ARM_NEON_FMA;
10665 for (uint32_t n = 16; n <= 24; n += 8) {
10666 for (size_t k = 1; k <= 20; k += 5) {
10667 GemmMicrokernelTester()
10668 .mr(6)
10669 .nr(8)
10670 .kr(1)
10671 .sr(4)
10672 .m(6)
10673 .n(n)
10674 .k(k)
10675 .cn_stride(11)
10676 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10677 }
10678 }
10679 }
10680
10681 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_div_8_strided_a) {
10682 TEST_REQUIRES_ARM_NEON_FMA;
10683 for (uint32_t n = 16; n <= 24; n += 8) {
10684 for (size_t k = 1; k <= 20; k += 5) {
10685 GemmMicrokernelTester()
10686 .mr(6)
10687 .nr(8)
10688 .kr(1)
10689 .sr(4)
10690 .m(6)
10691 .n(n)
10692 .k(k)
10693 .a_stride(23)
10694 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10695 }
10696 }
10697 }
10698
10699 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, n_div_8_subtile) {
10700 TEST_REQUIRES_ARM_NEON_FMA;
10701 for (uint32_t n = 16; n <= 24; n += 8) {
10702 for (size_t k = 1; k <= 20; k += 5) {
10703 for (uint32_t m = 1; m <= 6; m++) {
10704 GemmMicrokernelTester()
10705 .mr(6)
10706 .nr(8)
10707 .kr(1)
10708 .sr(4)
10709 .m(m)
10710 .n(n)
10711 .k(k)
10712 .iterations(1)
10713 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10714 }
10715 }
10716 }
10717 }
10718
10719 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, strided_cm_subtile) {
10720 TEST_REQUIRES_ARM_NEON_FMA;
10721 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010722 for (uint32_t n = 1; n <= 8; n++) {
10723 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010724 GemmMicrokernelTester()
10725 .mr(6)
10726 .nr(8)
10727 .kr(1)
10728 .sr(4)
10729 .m(m)
10730 .n(n)
10731 .k(k)
10732 .cm_stride(11)
10733 .iterations(1)
10734 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10735 }
10736 }
10737 }
10738 }
10739
10740 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, qmin) {
10741 TEST_REQUIRES_ARM_NEON_FMA;
10742 GemmMicrokernelTester()
10743 .mr(6)
10744 .nr(8)
10745 .kr(1)
10746 .sr(4)
10747 .m(6)
10748 .n(8)
10749 .k(4)
10750 .qmin(128)
10751 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10752 }
10753
10754 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, qmax) {
10755 TEST_REQUIRES_ARM_NEON_FMA;
10756 GemmMicrokernelTester()
10757 .mr(6)
10758 .nr(8)
10759 .kr(1)
10760 .sr(4)
10761 .m(6)
10762 .n(8)
10763 .k(4)
10764 .qmax(128)
10765 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10766 }
10767
10768 TEST(F32_GEMMINC_MINMAX_6X8S4__NEONFMA, strided_cm) {
10769 TEST_REQUIRES_ARM_NEON_FMA;
10770 GemmMicrokernelTester()
10771 .mr(6)
10772 .nr(8)
10773 .kr(1)
10774 .sr(4)
10775 .m(6)
10776 .n(8)
10777 .k(4)
10778 .cm_stride(11)
10779 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10780 }
10781#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10782
10783
10784#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10785 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4) {
10786 TEST_REQUIRES_ARM_NEON_FMA;
10787 GemmMicrokernelTester()
10788 .mr(8)
10789 .nr(8)
10790 .kr(1)
10791 .sr(4)
10792 .m(8)
10793 .n(8)
10794 .k(4)
10795 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10796 }
10797
10798 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, strided_cn) {
10799 TEST_REQUIRES_ARM_NEON_FMA;
10800 GemmMicrokernelTester()
10801 .mr(8)
10802 .nr(8)
10803 .kr(1)
10804 .sr(4)
10805 .m(8)
10806 .n(8)
10807 .k(4)
10808 .cn_stride(11)
10809 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10810 }
10811
10812 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4_strided_a) {
10813 TEST_REQUIRES_ARM_NEON_FMA;
10814 GemmMicrokernelTester()
10815 .mr(8)
10816 .nr(8)
10817 .kr(1)
10818 .sr(4)
10819 .m(8)
10820 .n(8)
10821 .k(4)
10822 .a_stride(7)
10823 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10824 }
10825
10826 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile) {
10827 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010828 for (uint32_t n = 1; n <= 8; n++) {
10829 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010830 GemmMicrokernelTester()
10831 .mr(8)
10832 .nr(8)
10833 .kr(1)
10834 .sr(4)
10835 .m(m)
10836 .n(n)
10837 .k(4)
10838 .iterations(1)
10839 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10840 }
10841 }
10842 }
10843
10844 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_m) {
10845 TEST_REQUIRES_ARM_NEON_FMA;
10846 for (uint32_t m = 1; m <= 8; m++) {
10847 GemmMicrokernelTester()
10848 .mr(8)
10849 .nr(8)
10850 .kr(1)
10851 .sr(4)
10852 .m(m)
10853 .n(8)
10854 .k(4)
10855 .iterations(1)
10856 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10857 }
10858 }
10859
10860 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_n) {
10861 TEST_REQUIRES_ARM_NEON_FMA;
10862 for (uint32_t n = 1; n <= 8; n++) {
10863 GemmMicrokernelTester()
10864 .mr(8)
10865 .nr(8)
10866 .kr(1)
10867 .sr(4)
10868 .m(8)
10869 .n(n)
10870 .k(4)
10871 .iterations(1)
10872 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10873 }
10874 }
10875
10876 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_lt_4) {
10877 TEST_REQUIRES_ARM_NEON_FMA;
10878 for (size_t k = 1; k < 4; k++) {
10879 GemmMicrokernelTester()
10880 .mr(8)
10881 .nr(8)
10882 .kr(1)
10883 .sr(4)
10884 .m(8)
10885 .n(8)
10886 .k(k)
10887 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10888 }
10889 }
10890
10891 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_lt_4_strided_a) {
10892 TEST_REQUIRES_ARM_NEON_FMA;
10893 for (size_t k = 1; k < 4; k++) {
10894 GemmMicrokernelTester()
10895 .mr(8)
10896 .nr(8)
10897 .kr(1)
10898 .sr(4)
10899 .m(8)
10900 .n(8)
10901 .k(k)
10902 .a_stride(7)
10903 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10904 }
10905 }
10906
10907 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_lt_4_subtile) {
10908 TEST_REQUIRES_ARM_NEON_FMA;
10909 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010910 for (uint32_t n = 1; n <= 8; n++) {
10911 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010912 GemmMicrokernelTester()
10913 .mr(8)
10914 .nr(8)
10915 .kr(1)
10916 .sr(4)
10917 .m(m)
10918 .n(n)
10919 .k(k)
10920 .iterations(1)
10921 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10922 }
10923 }
10924 }
10925 }
10926
10927 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_gt_4) {
10928 TEST_REQUIRES_ARM_NEON_FMA;
10929 for (size_t k = 5; k < 8; k++) {
10930 GemmMicrokernelTester()
10931 .mr(8)
10932 .nr(8)
10933 .kr(1)
10934 .sr(4)
10935 .m(8)
10936 .n(8)
10937 .k(k)
10938 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10939 }
10940 }
10941
10942 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_gt_4_strided_a) {
10943 TEST_REQUIRES_ARM_NEON_FMA;
10944 for (size_t k = 5; k < 8; k++) {
10945 GemmMicrokernelTester()
10946 .mr(8)
10947 .nr(8)
10948 .kr(1)
10949 .sr(4)
10950 .m(8)
10951 .n(8)
10952 .k(k)
10953 .a_stride(11)
10954 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10955 }
10956 }
10957
10958 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_gt_4_subtile) {
10959 TEST_REQUIRES_ARM_NEON_FMA;
10960 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010961 for (uint32_t n = 1; n <= 8; n++) {
10962 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010963 GemmMicrokernelTester()
10964 .mr(8)
10965 .nr(8)
10966 .kr(1)
10967 .sr(4)
10968 .m(m)
10969 .n(n)
10970 .k(k)
10971 .iterations(1)
10972 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10973 }
10974 }
10975 }
10976 }
10977
10978 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_div_4) {
10979 TEST_REQUIRES_ARM_NEON_FMA;
10980 for (size_t k = 8; k <= 40; k += 4) {
10981 GemmMicrokernelTester()
10982 .mr(8)
10983 .nr(8)
10984 .kr(1)
10985 .sr(4)
10986 .m(8)
10987 .n(8)
10988 .k(k)
10989 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
10990 }
10991 }
10992
10993 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_div_4_strided_a) {
10994 TEST_REQUIRES_ARM_NEON_FMA;
10995 for (size_t k = 8; k <= 40; k += 4) {
10996 GemmMicrokernelTester()
10997 .mr(8)
10998 .nr(8)
10999 .kr(1)
11000 .sr(4)
11001 .m(8)
11002 .n(8)
11003 .k(k)
11004 .a_stride(43)
11005 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11006 }
11007 }
11008
11009 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, k_div_4_subtile) {
11010 TEST_REQUIRES_ARM_NEON_FMA;
11011 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011012 for (uint32_t n = 1; n <= 8; n++) {
11013 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011014 GemmMicrokernelTester()
11015 .mr(8)
11016 .nr(8)
11017 .kr(1)
11018 .sr(4)
11019 .m(m)
11020 .n(n)
11021 .k(k)
11022 .iterations(1)
11023 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11024 }
11025 }
11026 }
11027 }
11028
11029 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_gt_8) {
11030 TEST_REQUIRES_ARM_NEON_FMA;
11031 for (uint32_t n = 9; n < 16; n++) {
11032 for (size_t k = 1; k <= 20; k += 5) {
11033 GemmMicrokernelTester()
11034 .mr(8)
11035 .nr(8)
11036 .kr(1)
11037 .sr(4)
11038 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011039 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011040 .k(k)
11041 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11042 }
11043 }
11044 }
11045
11046 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_gt_8_strided_cn) {
11047 TEST_REQUIRES_ARM_NEON_FMA;
11048 for (uint32_t n = 9; n < 16; n++) {
11049 for (size_t k = 1; k <= 20; k += 5) {
11050 GemmMicrokernelTester()
11051 .mr(8)
11052 .nr(8)
11053 .kr(1)
11054 .sr(4)
11055 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011056 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011057 .k(k)
11058 .cn_stride(11)
11059 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11060 }
11061 }
11062 }
11063
11064 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_gt_8_strided_a) {
11065 TEST_REQUIRES_ARM_NEON_FMA;
11066 for (uint32_t n = 9; n < 16; n++) {
11067 for (size_t k = 1; k <= 20; k += 5) {
11068 GemmMicrokernelTester()
11069 .mr(8)
11070 .nr(8)
11071 .kr(1)
11072 .sr(4)
11073 .m(8)
11074 .n(n)
11075 .k(k)
11076 .a_stride(23)
11077 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11078 }
11079 }
11080 }
11081
11082 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_gt_8_subtile) {
11083 TEST_REQUIRES_ARM_NEON_FMA;
11084 for (uint32_t n = 9; n < 16; n++) {
11085 for (size_t k = 1; k <= 20; k += 5) {
11086 for (uint32_t m = 1; m <= 8; m++) {
11087 GemmMicrokernelTester()
11088 .mr(8)
11089 .nr(8)
11090 .kr(1)
11091 .sr(4)
11092 .m(m)
11093 .n(n)
11094 .k(k)
11095 .iterations(1)
11096 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11097 }
11098 }
11099 }
11100 }
11101
11102 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_div_8) {
11103 TEST_REQUIRES_ARM_NEON_FMA;
11104 for (uint32_t n = 16; n <= 24; n += 8) {
11105 for (size_t k = 1; k <= 20; k += 5) {
11106 GemmMicrokernelTester()
11107 .mr(8)
11108 .nr(8)
11109 .kr(1)
11110 .sr(4)
11111 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011112 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011113 .k(k)
11114 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11115 }
11116 }
11117 }
11118
11119 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_div_8_strided_cn) {
11120 TEST_REQUIRES_ARM_NEON_FMA;
11121 for (uint32_t n = 16; n <= 24; n += 8) {
11122 for (size_t k = 1; k <= 20; k += 5) {
11123 GemmMicrokernelTester()
11124 .mr(8)
11125 .nr(8)
11126 .kr(1)
11127 .sr(4)
11128 .m(8)
11129 .n(n)
11130 .k(k)
11131 .cn_stride(11)
11132 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11133 }
11134 }
11135 }
11136
11137 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_div_8_strided_a) {
11138 TEST_REQUIRES_ARM_NEON_FMA;
11139 for (uint32_t n = 16; n <= 24; n += 8) {
11140 for (size_t k = 1; k <= 20; k += 5) {
11141 GemmMicrokernelTester()
11142 .mr(8)
11143 .nr(8)
11144 .kr(1)
11145 .sr(4)
11146 .m(8)
11147 .n(n)
11148 .k(k)
11149 .a_stride(23)
11150 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11151 }
11152 }
11153 }
11154
11155 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, n_div_8_subtile) {
11156 TEST_REQUIRES_ARM_NEON_FMA;
11157 for (uint32_t n = 16; n <= 24; n += 8) {
11158 for (size_t k = 1; k <= 20; k += 5) {
11159 for (uint32_t m = 1; m <= 8; m++) {
11160 GemmMicrokernelTester()
11161 .mr(8)
11162 .nr(8)
11163 .kr(1)
11164 .sr(4)
11165 .m(m)
11166 .n(n)
11167 .k(k)
11168 .iterations(1)
11169 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11170 }
11171 }
11172 }
11173 }
11174
11175 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, strided_cm_subtile) {
11176 TEST_REQUIRES_ARM_NEON_FMA;
11177 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011178 for (uint32_t n = 1; n <= 8; n++) {
11179 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011180 GemmMicrokernelTester()
11181 .mr(8)
11182 .nr(8)
11183 .kr(1)
11184 .sr(4)
11185 .m(m)
11186 .n(n)
11187 .k(k)
11188 .cm_stride(11)
11189 .iterations(1)
11190 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11191 }
11192 }
11193 }
11194 }
11195
11196 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, qmin) {
11197 TEST_REQUIRES_ARM_NEON_FMA;
11198 GemmMicrokernelTester()
11199 .mr(8)
11200 .nr(8)
11201 .kr(1)
11202 .sr(4)
11203 .m(8)
11204 .n(8)
11205 .k(4)
11206 .qmin(128)
11207 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11208 }
11209
11210 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, qmax) {
11211 TEST_REQUIRES_ARM_NEON_FMA;
11212 GemmMicrokernelTester()
11213 .mr(8)
11214 .nr(8)
11215 .kr(1)
11216 .sr(4)
11217 .m(8)
11218 .n(8)
11219 .k(4)
11220 .qmax(128)
11221 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11222 }
11223
11224 TEST(F32_GEMMINC_MINMAX_8X8S4__NEONFMA, strided_cm) {
11225 TEST_REQUIRES_ARM_NEON_FMA;
11226 GemmMicrokernelTester()
11227 .mr(8)
11228 .nr(8)
11229 .kr(1)
11230 .sr(4)
11231 .m(8)
11232 .n(8)
11233 .k(4)
11234 .cm_stride(11)
11235 .Test(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
11236 }
11237#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11238
11239
11240#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11241 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1) {
11242 TEST_REQUIRES_X86_SSE;
11243 GemmMicrokernelTester()
11244 .mr(3)
11245 .nr(8)
11246 .kr(1)
11247 .sr(1)
11248 .m(3)
11249 .n(8)
11250 .k(1)
11251 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11252 }
11253
11254 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, strided_cn) {
11255 TEST_REQUIRES_X86_SSE;
11256 GemmMicrokernelTester()
11257 .mr(3)
11258 .nr(8)
11259 .kr(1)
11260 .sr(1)
11261 .m(3)
11262 .n(8)
11263 .k(1)
11264 .cn_stride(11)
11265 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11266 }
11267
11268 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1_strided_a) {
11269 TEST_REQUIRES_X86_SSE;
11270 GemmMicrokernelTester()
11271 .mr(3)
11272 .nr(8)
11273 .kr(1)
11274 .sr(1)
11275 .m(3)
11276 .n(8)
11277 .k(1)
11278 .a_stride(3)
11279 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11280 }
11281
11282 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile) {
11283 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011284 for (uint32_t n = 1; n <= 8; n++) {
11285 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011286 GemmMicrokernelTester()
11287 .mr(3)
11288 .nr(8)
11289 .kr(1)
11290 .sr(1)
11291 .m(m)
11292 .n(n)
11293 .k(1)
11294 .iterations(1)
11295 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11296 }
11297 }
11298 }
11299
11300 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_m) {
11301 TEST_REQUIRES_X86_SSE;
11302 for (uint32_t m = 1; m <= 3; m++) {
11303 GemmMicrokernelTester()
11304 .mr(3)
11305 .nr(8)
11306 .kr(1)
11307 .sr(1)
11308 .m(m)
11309 .n(8)
11310 .k(1)
11311 .iterations(1)
11312 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11313 }
11314 }
11315
11316 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_n) {
11317 TEST_REQUIRES_X86_SSE;
11318 for (uint32_t n = 1; n <= 8; n++) {
11319 GemmMicrokernelTester()
11320 .mr(3)
11321 .nr(8)
11322 .kr(1)
11323 .sr(1)
11324 .m(3)
11325 .n(n)
11326 .k(1)
11327 .iterations(1)
11328 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11329 }
11330 }
11331
11332 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_gt_1) {
11333 TEST_REQUIRES_X86_SSE;
11334 for (size_t k = 2; k < 10; k++) {
11335 GemmMicrokernelTester()
11336 .mr(3)
11337 .nr(8)
11338 .kr(1)
11339 .sr(1)
11340 .m(3)
11341 .n(8)
11342 .k(k)
11343 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11344 }
11345 }
11346
11347 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_gt_1_strided_a) {
11348 TEST_REQUIRES_X86_SSE;
11349 for (size_t k = 2; k < 10; k++) {
11350 GemmMicrokernelTester()
11351 .mr(3)
11352 .nr(8)
11353 .kr(1)
11354 .sr(1)
11355 .m(3)
11356 .n(8)
11357 .k(k)
11358 .a_stride(11)
11359 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11360 }
11361 }
11362
11363 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, k_gt_1_subtile) {
11364 TEST_REQUIRES_X86_SSE;
11365 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011366 for (uint32_t n = 1; n <= 8; n++) {
11367 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011368 GemmMicrokernelTester()
11369 .mr(3)
11370 .nr(8)
11371 .kr(1)
11372 .sr(1)
11373 .m(m)
11374 .n(n)
11375 .k(k)
11376 .iterations(1)
11377 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11378 }
11379 }
11380 }
11381 }
11382
11383 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_gt_8) {
11384 TEST_REQUIRES_X86_SSE;
11385 for (uint32_t n = 9; n < 16; n++) {
11386 for (size_t k = 1; k <= 5; k += 2) {
11387 GemmMicrokernelTester()
11388 .mr(3)
11389 .nr(8)
11390 .kr(1)
11391 .sr(1)
11392 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011393 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011394 .k(k)
11395 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11396 }
11397 }
11398 }
11399
11400 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_gt_8_strided_cn) {
11401 TEST_REQUIRES_X86_SSE;
11402 for (uint32_t n = 9; n < 16; n++) {
11403 for (size_t k = 1; k <= 5; k += 2) {
11404 GemmMicrokernelTester()
11405 .mr(3)
11406 .nr(8)
11407 .kr(1)
11408 .sr(1)
11409 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011410 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011411 .k(k)
11412 .cn_stride(11)
11413 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11414 }
11415 }
11416 }
11417
11418 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_gt_8_strided_a) {
11419 TEST_REQUIRES_X86_SSE;
11420 for (uint32_t n = 9; n < 16; n++) {
11421 for (size_t k = 1; k <= 5; k += 2) {
11422 GemmMicrokernelTester()
11423 .mr(3)
11424 .nr(8)
11425 .kr(1)
11426 .sr(1)
11427 .m(3)
11428 .n(n)
11429 .k(k)
11430 .a_stride(7)
11431 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11432 }
11433 }
11434 }
11435
11436 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_gt_8_subtile) {
11437 TEST_REQUIRES_X86_SSE;
11438 for (uint32_t n = 9; n < 16; n++) {
11439 for (size_t k = 1; k <= 5; k += 2) {
11440 for (uint32_t m = 1; m <= 3; m++) {
11441 GemmMicrokernelTester()
11442 .mr(3)
11443 .nr(8)
11444 .kr(1)
11445 .sr(1)
11446 .m(m)
11447 .n(n)
11448 .k(k)
11449 .iterations(1)
11450 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11451 }
11452 }
11453 }
11454 }
11455
11456 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_div_8) {
11457 TEST_REQUIRES_X86_SSE;
11458 for (uint32_t n = 16; n <= 24; n += 8) {
11459 for (size_t k = 1; k <= 5; k += 2) {
11460 GemmMicrokernelTester()
11461 .mr(3)
11462 .nr(8)
11463 .kr(1)
11464 .sr(1)
11465 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011466 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011467 .k(k)
11468 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11469 }
11470 }
11471 }
11472
11473 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_div_8_strided_cn) {
11474 TEST_REQUIRES_X86_SSE;
11475 for (uint32_t n = 16; n <= 24; n += 8) {
11476 for (size_t k = 1; k <= 5; k += 2) {
11477 GemmMicrokernelTester()
11478 .mr(3)
11479 .nr(8)
11480 .kr(1)
11481 .sr(1)
11482 .m(3)
11483 .n(n)
11484 .k(k)
11485 .cn_stride(11)
11486 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11487 }
11488 }
11489 }
11490
11491 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_div_8_strided_a) {
11492 TEST_REQUIRES_X86_SSE;
11493 for (uint32_t n = 16; n <= 24; n += 8) {
11494 for (size_t k = 1; k <= 5; k += 2) {
11495 GemmMicrokernelTester()
11496 .mr(3)
11497 .nr(8)
11498 .kr(1)
11499 .sr(1)
11500 .m(3)
11501 .n(n)
11502 .k(k)
11503 .a_stride(7)
11504 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11505 }
11506 }
11507 }
11508
11509 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, n_div_8_subtile) {
11510 TEST_REQUIRES_X86_SSE;
11511 for (uint32_t n = 16; n <= 24; n += 8) {
11512 for (size_t k = 1; k <= 5; k += 2) {
11513 for (uint32_t m = 1; m <= 3; m++) {
11514 GemmMicrokernelTester()
11515 .mr(3)
11516 .nr(8)
11517 .kr(1)
11518 .sr(1)
11519 .m(m)
11520 .n(n)
11521 .k(k)
11522 .iterations(1)
11523 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11524 }
11525 }
11526 }
11527 }
11528
11529 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, strided_cm_subtile) {
11530 TEST_REQUIRES_X86_SSE;
11531 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011532 for (uint32_t n = 1; n <= 8; n++) {
11533 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011534 GemmMicrokernelTester()
11535 .mr(3)
11536 .nr(8)
11537 .kr(1)
11538 .sr(1)
11539 .m(m)
11540 .n(n)
11541 .k(k)
11542 .cm_stride(11)
11543 .iterations(1)
11544 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11545 }
11546 }
11547 }
11548 }
11549
11550 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, qmin) {
11551 TEST_REQUIRES_X86_SSE;
11552 GemmMicrokernelTester()
11553 .mr(3)
11554 .nr(8)
11555 .kr(1)
11556 .sr(1)
11557 .m(3)
11558 .n(8)
11559 .k(1)
11560 .qmin(128)
11561 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11562 }
11563
11564 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, qmax) {
11565 TEST_REQUIRES_X86_SSE;
11566 GemmMicrokernelTester()
11567 .mr(3)
11568 .nr(8)
11569 .kr(1)
11570 .sr(1)
11571 .m(3)
11572 .n(8)
11573 .k(1)
11574 .qmax(128)
11575 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11576 }
11577
11578 TEST(F32_GEMMINC_MINMAX_3X8__SSE_LOAD1, strided_cm) {
11579 TEST_REQUIRES_X86_SSE;
11580 GemmMicrokernelTester()
11581 .mr(3)
11582 .nr(8)
11583 .kr(1)
11584 .sr(1)
11585 .m(3)
11586 .n(8)
11587 .k(1)
11588 .cm_stride(11)
11589 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
11590 }
11591#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11592
11593
11594#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11595 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1) {
11596 TEST_REQUIRES_X86_SSE;
11597 GemmMicrokernelTester()
11598 .mr(4)
11599 .nr(8)
11600 .kr(1)
11601 .sr(1)
11602 .m(4)
11603 .n(8)
11604 .k(1)
11605 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11606 }
11607
11608 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, strided_cn) {
11609 TEST_REQUIRES_X86_SSE;
11610 GemmMicrokernelTester()
11611 .mr(4)
11612 .nr(8)
11613 .kr(1)
11614 .sr(1)
11615 .m(4)
11616 .n(8)
11617 .k(1)
11618 .cn_stride(11)
11619 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11620 }
11621
11622 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1_strided_a) {
11623 TEST_REQUIRES_X86_SSE;
11624 GemmMicrokernelTester()
11625 .mr(4)
11626 .nr(8)
11627 .kr(1)
11628 .sr(1)
11629 .m(4)
11630 .n(8)
11631 .k(1)
11632 .a_stride(3)
11633 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11634 }
11635
11636 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile) {
11637 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011638 for (uint32_t n = 1; n <= 8; n++) {
11639 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011640 GemmMicrokernelTester()
11641 .mr(4)
11642 .nr(8)
11643 .kr(1)
11644 .sr(1)
11645 .m(m)
11646 .n(n)
11647 .k(1)
11648 .iterations(1)
11649 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11650 }
11651 }
11652 }
11653
11654 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_m) {
11655 TEST_REQUIRES_X86_SSE;
11656 for (uint32_t m = 1; m <= 4; m++) {
11657 GemmMicrokernelTester()
11658 .mr(4)
11659 .nr(8)
11660 .kr(1)
11661 .sr(1)
11662 .m(m)
11663 .n(8)
11664 .k(1)
11665 .iterations(1)
11666 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11667 }
11668 }
11669
11670 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_n) {
11671 TEST_REQUIRES_X86_SSE;
11672 for (uint32_t n = 1; n <= 8; n++) {
11673 GemmMicrokernelTester()
11674 .mr(4)
11675 .nr(8)
11676 .kr(1)
11677 .sr(1)
11678 .m(4)
11679 .n(n)
11680 .k(1)
11681 .iterations(1)
11682 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11683 }
11684 }
11685
11686 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_gt_1) {
11687 TEST_REQUIRES_X86_SSE;
11688 for (size_t k = 2; k < 10; k++) {
11689 GemmMicrokernelTester()
11690 .mr(4)
11691 .nr(8)
11692 .kr(1)
11693 .sr(1)
11694 .m(4)
11695 .n(8)
11696 .k(k)
11697 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11698 }
11699 }
11700
11701 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_gt_1_strided_a) {
11702 TEST_REQUIRES_X86_SSE;
11703 for (size_t k = 2; k < 10; k++) {
11704 GemmMicrokernelTester()
11705 .mr(4)
11706 .nr(8)
11707 .kr(1)
11708 .sr(1)
11709 .m(4)
11710 .n(8)
11711 .k(k)
11712 .a_stride(11)
11713 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11714 }
11715 }
11716
11717 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, k_gt_1_subtile) {
11718 TEST_REQUIRES_X86_SSE;
11719 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011720 for (uint32_t n = 1; n <= 8; n++) {
11721 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011722 GemmMicrokernelTester()
11723 .mr(4)
11724 .nr(8)
11725 .kr(1)
11726 .sr(1)
11727 .m(m)
11728 .n(n)
11729 .k(k)
11730 .iterations(1)
11731 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11732 }
11733 }
11734 }
11735 }
11736
11737 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_gt_8) {
11738 TEST_REQUIRES_X86_SSE;
11739 for (uint32_t n = 9; n < 16; n++) {
11740 for (size_t k = 1; k <= 5; k += 2) {
11741 GemmMicrokernelTester()
11742 .mr(4)
11743 .nr(8)
11744 .kr(1)
11745 .sr(1)
11746 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011747 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011748 .k(k)
11749 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11750 }
11751 }
11752 }
11753
11754 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_gt_8_strided_cn) {
11755 TEST_REQUIRES_X86_SSE;
11756 for (uint32_t n = 9; n < 16; n++) {
11757 for (size_t k = 1; k <= 5; k += 2) {
11758 GemmMicrokernelTester()
11759 .mr(4)
11760 .nr(8)
11761 .kr(1)
11762 .sr(1)
11763 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011764 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011765 .k(k)
11766 .cn_stride(11)
11767 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11768 }
11769 }
11770 }
11771
11772 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_gt_8_strided_a) {
11773 TEST_REQUIRES_X86_SSE;
11774 for (uint32_t n = 9; n < 16; n++) {
11775 for (size_t k = 1; k <= 5; k += 2) {
11776 GemmMicrokernelTester()
11777 .mr(4)
11778 .nr(8)
11779 .kr(1)
11780 .sr(1)
11781 .m(4)
11782 .n(n)
11783 .k(k)
11784 .a_stride(7)
11785 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11786 }
11787 }
11788 }
11789
11790 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_gt_8_subtile) {
11791 TEST_REQUIRES_X86_SSE;
11792 for (uint32_t n = 9; n < 16; n++) {
11793 for (size_t k = 1; k <= 5; k += 2) {
11794 for (uint32_t m = 1; m <= 4; m++) {
11795 GemmMicrokernelTester()
11796 .mr(4)
11797 .nr(8)
11798 .kr(1)
11799 .sr(1)
11800 .m(m)
11801 .n(n)
11802 .k(k)
11803 .iterations(1)
11804 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11805 }
11806 }
11807 }
11808 }
11809
11810 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_div_8) {
11811 TEST_REQUIRES_X86_SSE;
11812 for (uint32_t n = 16; n <= 24; n += 8) {
11813 for (size_t k = 1; k <= 5; k += 2) {
11814 GemmMicrokernelTester()
11815 .mr(4)
11816 .nr(8)
11817 .kr(1)
11818 .sr(1)
11819 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011820 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011821 .k(k)
11822 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11823 }
11824 }
11825 }
11826
11827 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_div_8_strided_cn) {
11828 TEST_REQUIRES_X86_SSE;
11829 for (uint32_t n = 16; n <= 24; n += 8) {
11830 for (size_t k = 1; k <= 5; k += 2) {
11831 GemmMicrokernelTester()
11832 .mr(4)
11833 .nr(8)
11834 .kr(1)
11835 .sr(1)
11836 .m(4)
11837 .n(n)
11838 .k(k)
11839 .cn_stride(11)
11840 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11841 }
11842 }
11843 }
11844
11845 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_div_8_strided_a) {
11846 TEST_REQUIRES_X86_SSE;
11847 for (uint32_t n = 16; n <= 24; n += 8) {
11848 for (size_t k = 1; k <= 5; k += 2) {
11849 GemmMicrokernelTester()
11850 .mr(4)
11851 .nr(8)
11852 .kr(1)
11853 .sr(1)
11854 .m(4)
11855 .n(n)
11856 .k(k)
11857 .a_stride(7)
11858 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11859 }
11860 }
11861 }
11862
11863 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, n_div_8_subtile) {
11864 TEST_REQUIRES_X86_SSE;
11865 for (uint32_t n = 16; n <= 24; n += 8) {
11866 for (size_t k = 1; k <= 5; k += 2) {
11867 for (uint32_t m = 1; m <= 4; m++) {
11868 GemmMicrokernelTester()
11869 .mr(4)
11870 .nr(8)
11871 .kr(1)
11872 .sr(1)
11873 .m(m)
11874 .n(n)
11875 .k(k)
11876 .iterations(1)
11877 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11878 }
11879 }
11880 }
11881 }
11882
11883 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, strided_cm_subtile) {
11884 TEST_REQUIRES_X86_SSE;
11885 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011886 for (uint32_t n = 1; n <= 8; n++) {
11887 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011888 GemmMicrokernelTester()
11889 .mr(4)
11890 .nr(8)
11891 .kr(1)
11892 .sr(1)
11893 .m(m)
11894 .n(n)
11895 .k(k)
11896 .cm_stride(11)
11897 .iterations(1)
11898 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11899 }
11900 }
11901 }
11902 }
11903
11904 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, qmin) {
11905 TEST_REQUIRES_X86_SSE;
11906 GemmMicrokernelTester()
11907 .mr(4)
11908 .nr(8)
11909 .kr(1)
11910 .sr(1)
11911 .m(4)
11912 .n(8)
11913 .k(1)
11914 .qmin(128)
11915 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11916 }
11917
11918 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, qmax) {
11919 TEST_REQUIRES_X86_SSE;
11920 GemmMicrokernelTester()
11921 .mr(4)
11922 .nr(8)
11923 .kr(1)
11924 .sr(1)
11925 .m(4)
11926 .n(8)
11927 .k(1)
11928 .qmax(128)
11929 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11930 }
11931
11932 TEST(F32_GEMMINC_MINMAX_4X8__SSE_LOAD1, strided_cm) {
11933 TEST_REQUIRES_X86_SSE;
11934 GemmMicrokernelTester()
11935 .mr(4)
11936 .nr(8)
11937 .kr(1)
11938 .sr(1)
11939 .m(4)
11940 .n(8)
11941 .k(1)
11942 .cm_stride(11)
11943 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
11944 }
11945#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11946
11947
11948#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11949 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1) {
11950 TEST_REQUIRES_X86_SSE;
11951 GemmMicrokernelTester()
11952 .mr(5)
11953 .nr(8)
11954 .kr(1)
11955 .sr(1)
11956 .m(5)
11957 .n(8)
11958 .k(1)
11959 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
11960 }
11961
11962 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, strided_cn) {
11963 TEST_REQUIRES_X86_SSE;
11964 GemmMicrokernelTester()
11965 .mr(5)
11966 .nr(8)
11967 .kr(1)
11968 .sr(1)
11969 .m(5)
11970 .n(8)
11971 .k(1)
11972 .cn_stride(11)
11973 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
11974 }
11975
11976 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1_strided_a) {
11977 TEST_REQUIRES_X86_SSE;
11978 GemmMicrokernelTester()
11979 .mr(5)
11980 .nr(8)
11981 .kr(1)
11982 .sr(1)
11983 .m(5)
11984 .n(8)
11985 .k(1)
11986 .a_stride(3)
11987 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
11988 }
11989
11990 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile) {
11991 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011992 for (uint32_t n = 1; n <= 8; n++) {
11993 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011994 GemmMicrokernelTester()
11995 .mr(5)
11996 .nr(8)
11997 .kr(1)
11998 .sr(1)
11999 .m(m)
12000 .n(n)
12001 .k(1)
12002 .iterations(1)
12003 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12004 }
12005 }
12006 }
12007
12008 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_m) {
12009 TEST_REQUIRES_X86_SSE;
12010 for (uint32_t m = 1; m <= 5; m++) {
12011 GemmMicrokernelTester()
12012 .mr(5)
12013 .nr(8)
12014 .kr(1)
12015 .sr(1)
12016 .m(m)
12017 .n(8)
12018 .k(1)
12019 .iterations(1)
12020 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12021 }
12022 }
12023
12024 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_n) {
12025 TEST_REQUIRES_X86_SSE;
12026 for (uint32_t n = 1; n <= 8; n++) {
12027 GemmMicrokernelTester()
12028 .mr(5)
12029 .nr(8)
12030 .kr(1)
12031 .sr(1)
12032 .m(5)
12033 .n(n)
12034 .k(1)
12035 .iterations(1)
12036 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12037 }
12038 }
12039
12040 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_gt_1) {
12041 TEST_REQUIRES_X86_SSE;
12042 for (size_t k = 2; k < 10; k++) {
12043 GemmMicrokernelTester()
12044 .mr(5)
12045 .nr(8)
12046 .kr(1)
12047 .sr(1)
12048 .m(5)
12049 .n(8)
12050 .k(k)
12051 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12052 }
12053 }
12054
12055 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_gt_1_strided_a) {
12056 TEST_REQUIRES_X86_SSE;
12057 for (size_t k = 2; k < 10; k++) {
12058 GemmMicrokernelTester()
12059 .mr(5)
12060 .nr(8)
12061 .kr(1)
12062 .sr(1)
12063 .m(5)
12064 .n(8)
12065 .k(k)
12066 .a_stride(11)
12067 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12068 }
12069 }
12070
12071 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, k_gt_1_subtile) {
12072 TEST_REQUIRES_X86_SSE;
12073 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012074 for (uint32_t n = 1; n <= 8; n++) {
12075 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012076 GemmMicrokernelTester()
12077 .mr(5)
12078 .nr(8)
12079 .kr(1)
12080 .sr(1)
12081 .m(m)
12082 .n(n)
12083 .k(k)
12084 .iterations(1)
12085 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12086 }
12087 }
12088 }
12089 }
12090
12091 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_gt_8) {
12092 TEST_REQUIRES_X86_SSE;
12093 for (uint32_t n = 9; n < 16; n++) {
12094 for (size_t k = 1; k <= 5; k += 2) {
12095 GemmMicrokernelTester()
12096 .mr(5)
12097 .nr(8)
12098 .kr(1)
12099 .sr(1)
12100 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012101 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012102 .k(k)
12103 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12104 }
12105 }
12106 }
12107
12108 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_gt_8_strided_cn) {
12109 TEST_REQUIRES_X86_SSE;
12110 for (uint32_t n = 9; n < 16; n++) {
12111 for (size_t k = 1; k <= 5; k += 2) {
12112 GemmMicrokernelTester()
12113 .mr(5)
12114 .nr(8)
12115 .kr(1)
12116 .sr(1)
12117 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012118 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012119 .k(k)
12120 .cn_stride(11)
12121 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12122 }
12123 }
12124 }
12125
12126 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_gt_8_strided_a) {
12127 TEST_REQUIRES_X86_SSE;
12128 for (uint32_t n = 9; n < 16; n++) {
12129 for (size_t k = 1; k <= 5; k += 2) {
12130 GemmMicrokernelTester()
12131 .mr(5)
12132 .nr(8)
12133 .kr(1)
12134 .sr(1)
12135 .m(5)
12136 .n(n)
12137 .k(k)
12138 .a_stride(7)
12139 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12140 }
12141 }
12142 }
12143
12144 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_gt_8_subtile) {
12145 TEST_REQUIRES_X86_SSE;
12146 for (uint32_t n = 9; n < 16; n++) {
12147 for (size_t k = 1; k <= 5; k += 2) {
12148 for (uint32_t m = 1; m <= 5; m++) {
12149 GemmMicrokernelTester()
12150 .mr(5)
12151 .nr(8)
12152 .kr(1)
12153 .sr(1)
12154 .m(m)
12155 .n(n)
12156 .k(k)
12157 .iterations(1)
12158 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12159 }
12160 }
12161 }
12162 }
12163
12164 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_div_8) {
12165 TEST_REQUIRES_X86_SSE;
12166 for (uint32_t n = 16; n <= 24; n += 8) {
12167 for (size_t k = 1; k <= 5; k += 2) {
12168 GemmMicrokernelTester()
12169 .mr(5)
12170 .nr(8)
12171 .kr(1)
12172 .sr(1)
12173 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012174 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012175 .k(k)
12176 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12177 }
12178 }
12179 }
12180
12181 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_div_8_strided_cn) {
12182 TEST_REQUIRES_X86_SSE;
12183 for (uint32_t n = 16; n <= 24; n += 8) {
12184 for (size_t k = 1; k <= 5; k += 2) {
12185 GemmMicrokernelTester()
12186 .mr(5)
12187 .nr(8)
12188 .kr(1)
12189 .sr(1)
12190 .m(5)
12191 .n(n)
12192 .k(k)
12193 .cn_stride(11)
12194 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12195 }
12196 }
12197 }
12198
12199 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_div_8_strided_a) {
12200 TEST_REQUIRES_X86_SSE;
12201 for (uint32_t n = 16; n <= 24; n += 8) {
12202 for (size_t k = 1; k <= 5; k += 2) {
12203 GemmMicrokernelTester()
12204 .mr(5)
12205 .nr(8)
12206 .kr(1)
12207 .sr(1)
12208 .m(5)
12209 .n(n)
12210 .k(k)
12211 .a_stride(7)
12212 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12213 }
12214 }
12215 }
12216
12217 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, n_div_8_subtile) {
12218 TEST_REQUIRES_X86_SSE;
12219 for (uint32_t n = 16; n <= 24; n += 8) {
12220 for (size_t k = 1; k <= 5; k += 2) {
12221 for (uint32_t m = 1; m <= 5; m++) {
12222 GemmMicrokernelTester()
12223 .mr(5)
12224 .nr(8)
12225 .kr(1)
12226 .sr(1)
12227 .m(m)
12228 .n(n)
12229 .k(k)
12230 .iterations(1)
12231 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12232 }
12233 }
12234 }
12235 }
12236
12237 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, strided_cm_subtile) {
12238 TEST_REQUIRES_X86_SSE;
12239 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012240 for (uint32_t n = 1; n <= 8; n++) {
12241 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012242 GemmMicrokernelTester()
12243 .mr(5)
12244 .nr(8)
12245 .kr(1)
12246 .sr(1)
12247 .m(m)
12248 .n(n)
12249 .k(k)
12250 .cm_stride(11)
12251 .iterations(1)
12252 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12253 }
12254 }
12255 }
12256 }
12257
12258 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, qmin) {
12259 TEST_REQUIRES_X86_SSE;
12260 GemmMicrokernelTester()
12261 .mr(5)
12262 .nr(8)
12263 .kr(1)
12264 .sr(1)
12265 .m(5)
12266 .n(8)
12267 .k(1)
12268 .qmin(128)
12269 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12270 }
12271
12272 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, qmax) {
12273 TEST_REQUIRES_X86_SSE;
12274 GemmMicrokernelTester()
12275 .mr(5)
12276 .nr(8)
12277 .kr(1)
12278 .sr(1)
12279 .m(5)
12280 .n(8)
12281 .k(1)
12282 .qmax(128)
12283 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12284 }
12285
12286 TEST(F32_GEMMINC_MINMAX_5X8__SSE_LOAD1, strided_cm) {
12287 TEST_REQUIRES_X86_SSE;
12288 GemmMicrokernelTester()
12289 .mr(5)
12290 .nr(8)
12291 .kr(1)
12292 .sr(1)
12293 .m(5)
12294 .n(8)
12295 .k(1)
12296 .cm_stride(11)
12297 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
12298 }
12299#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12300
12301
12302#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12303 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4) {
12304 TEST_REQUIRES_X86_SSE;
12305 GemmMicrokernelTester()
12306 .mr(3)
12307 .nr(8)
12308 .kr(1)
12309 .sr(1)
12310 .m(3)
12311 .n(8)
12312 .k(4)
12313 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12314 }
12315
12316 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, strided_cn) {
12317 TEST_REQUIRES_X86_SSE;
12318 GemmMicrokernelTester()
12319 .mr(3)
12320 .nr(8)
12321 .kr(1)
12322 .sr(1)
12323 .m(3)
12324 .n(8)
12325 .k(4)
12326 .cn_stride(11)
12327 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12328 }
12329
12330 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4_strided_a) {
12331 TEST_REQUIRES_X86_SSE;
12332 GemmMicrokernelTester()
12333 .mr(3)
12334 .nr(8)
12335 .kr(1)
12336 .sr(1)
12337 .m(3)
12338 .n(8)
12339 .k(4)
12340 .a_stride(7)
12341 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12342 }
12343
12344 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4_subtile) {
12345 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012346 for (uint32_t n = 1; n <= 8; n++) {
12347 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012348 GemmMicrokernelTester()
12349 .mr(3)
12350 .nr(8)
12351 .kr(1)
12352 .sr(1)
12353 .m(m)
12354 .n(n)
12355 .k(4)
12356 .iterations(1)
12357 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12358 }
12359 }
12360 }
12361
12362 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_m) {
12363 TEST_REQUIRES_X86_SSE;
12364 for (uint32_t m = 1; m <= 3; m++) {
12365 GemmMicrokernelTester()
12366 .mr(3)
12367 .nr(8)
12368 .kr(1)
12369 .sr(1)
12370 .m(m)
12371 .n(8)
12372 .k(4)
12373 .iterations(1)
12374 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12375 }
12376 }
12377
12378 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_n) {
12379 TEST_REQUIRES_X86_SSE;
12380 for (uint32_t n = 1; n <= 8; n++) {
12381 GemmMicrokernelTester()
12382 .mr(3)
12383 .nr(8)
12384 .kr(1)
12385 .sr(1)
12386 .m(3)
12387 .n(n)
12388 .k(4)
12389 .iterations(1)
12390 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12391 }
12392 }
12393
12394 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_lt_4) {
12395 TEST_REQUIRES_X86_SSE;
12396 for (size_t k = 1; k < 4; k++) {
12397 GemmMicrokernelTester()
12398 .mr(3)
12399 .nr(8)
12400 .kr(1)
12401 .sr(1)
12402 .m(3)
12403 .n(8)
12404 .k(k)
12405 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12406 }
12407 }
12408
12409 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_lt_4_strided_a) {
12410 TEST_REQUIRES_X86_SSE;
12411 for (size_t k = 1; k < 4; k++) {
12412 GemmMicrokernelTester()
12413 .mr(3)
12414 .nr(8)
12415 .kr(1)
12416 .sr(1)
12417 .m(3)
12418 .n(8)
12419 .k(k)
12420 .a_stride(7)
12421 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12422 }
12423 }
12424
12425 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_lt_4_subtile) {
12426 TEST_REQUIRES_X86_SSE;
12427 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012428 for (uint32_t n = 1; n <= 8; n++) {
12429 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012430 GemmMicrokernelTester()
12431 .mr(3)
12432 .nr(8)
12433 .kr(1)
12434 .sr(1)
12435 .m(m)
12436 .n(n)
12437 .k(k)
12438 .iterations(1)
12439 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12440 }
12441 }
12442 }
12443 }
12444
12445 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_gt_4) {
12446 TEST_REQUIRES_X86_SSE;
12447 for (size_t k = 5; k < 8; k++) {
12448 GemmMicrokernelTester()
12449 .mr(3)
12450 .nr(8)
12451 .kr(1)
12452 .sr(1)
12453 .m(3)
12454 .n(8)
12455 .k(k)
12456 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12457 }
12458 }
12459
12460 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_gt_4_strided_a) {
12461 TEST_REQUIRES_X86_SSE;
12462 for (size_t k = 5; k < 8; k++) {
12463 GemmMicrokernelTester()
12464 .mr(3)
12465 .nr(8)
12466 .kr(1)
12467 .sr(1)
12468 .m(3)
12469 .n(8)
12470 .k(k)
12471 .a_stride(11)
12472 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12473 }
12474 }
12475
12476 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_gt_4_subtile) {
12477 TEST_REQUIRES_X86_SSE;
12478 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012479 for (uint32_t n = 1; n <= 8; n++) {
12480 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012481 GemmMicrokernelTester()
12482 .mr(3)
12483 .nr(8)
12484 .kr(1)
12485 .sr(1)
12486 .m(m)
12487 .n(n)
12488 .k(k)
12489 .iterations(1)
12490 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12491 }
12492 }
12493 }
12494 }
12495
12496 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_div_4) {
12497 TEST_REQUIRES_X86_SSE;
12498 for (size_t k = 8; k <= 40; k += 4) {
12499 GemmMicrokernelTester()
12500 .mr(3)
12501 .nr(8)
12502 .kr(1)
12503 .sr(1)
12504 .m(3)
12505 .n(8)
12506 .k(k)
12507 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12508 }
12509 }
12510
12511 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_div_4_strided_a) {
12512 TEST_REQUIRES_X86_SSE;
12513 for (size_t k = 8; k <= 40; k += 4) {
12514 GemmMicrokernelTester()
12515 .mr(3)
12516 .nr(8)
12517 .kr(1)
12518 .sr(1)
12519 .m(3)
12520 .n(8)
12521 .k(k)
12522 .a_stride(43)
12523 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12524 }
12525 }
12526
12527 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, k_div_4_subtile) {
12528 TEST_REQUIRES_X86_SSE;
12529 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012530 for (uint32_t n = 1; n <= 8; n++) {
12531 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012532 GemmMicrokernelTester()
12533 .mr(3)
12534 .nr(8)
12535 .kr(1)
12536 .sr(1)
12537 .m(m)
12538 .n(n)
12539 .k(k)
12540 .iterations(1)
12541 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12542 }
12543 }
12544 }
12545 }
12546
12547 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_gt_8) {
12548 TEST_REQUIRES_X86_SSE;
12549 for (uint32_t n = 9; n < 16; n++) {
12550 for (size_t k = 1; k <= 20; k += 5) {
12551 GemmMicrokernelTester()
12552 .mr(3)
12553 .nr(8)
12554 .kr(1)
12555 .sr(1)
12556 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012557 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012558 .k(k)
12559 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12560 }
12561 }
12562 }
12563
12564 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_gt_8_strided_cn) {
12565 TEST_REQUIRES_X86_SSE;
12566 for (uint32_t n = 9; n < 16; n++) {
12567 for (size_t k = 1; k <= 20; k += 5) {
12568 GemmMicrokernelTester()
12569 .mr(3)
12570 .nr(8)
12571 .kr(1)
12572 .sr(1)
12573 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012574 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012575 .k(k)
12576 .cn_stride(11)
12577 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12578 }
12579 }
12580 }
12581
12582 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_gt_8_strided_a) {
12583 TEST_REQUIRES_X86_SSE;
12584 for (uint32_t n = 9; n < 16; n++) {
12585 for (size_t k = 1; k <= 20; k += 5) {
12586 GemmMicrokernelTester()
12587 .mr(3)
12588 .nr(8)
12589 .kr(1)
12590 .sr(1)
12591 .m(3)
12592 .n(n)
12593 .k(k)
12594 .a_stride(23)
12595 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12596 }
12597 }
12598 }
12599
12600 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_gt_8_subtile) {
12601 TEST_REQUIRES_X86_SSE;
12602 for (uint32_t n = 9; n < 16; n++) {
12603 for (size_t k = 1; k <= 20; k += 5) {
12604 for (uint32_t m = 1; m <= 3; m++) {
12605 GemmMicrokernelTester()
12606 .mr(3)
12607 .nr(8)
12608 .kr(1)
12609 .sr(1)
12610 .m(m)
12611 .n(n)
12612 .k(k)
12613 .iterations(1)
12614 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12615 }
12616 }
12617 }
12618 }
12619
12620 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_div_8) {
12621 TEST_REQUIRES_X86_SSE;
12622 for (uint32_t n = 16; n <= 24; n += 8) {
12623 for (size_t k = 1; k <= 20; k += 5) {
12624 GemmMicrokernelTester()
12625 .mr(3)
12626 .nr(8)
12627 .kr(1)
12628 .sr(1)
12629 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012630 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012631 .k(k)
12632 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12633 }
12634 }
12635 }
12636
12637 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_div_8_strided_cn) {
12638 TEST_REQUIRES_X86_SSE;
12639 for (uint32_t n = 16; n <= 24; n += 8) {
12640 for (size_t k = 1; k <= 20; k += 5) {
12641 GemmMicrokernelTester()
12642 .mr(3)
12643 .nr(8)
12644 .kr(1)
12645 .sr(1)
12646 .m(3)
12647 .n(n)
12648 .k(k)
12649 .cn_stride(11)
12650 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12651 }
12652 }
12653 }
12654
12655 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_div_8_strided_a) {
12656 TEST_REQUIRES_X86_SSE;
12657 for (uint32_t n = 16; n <= 24; n += 8) {
12658 for (size_t k = 1; k <= 20; k += 5) {
12659 GemmMicrokernelTester()
12660 .mr(3)
12661 .nr(8)
12662 .kr(1)
12663 .sr(1)
12664 .m(3)
12665 .n(n)
12666 .k(k)
12667 .a_stride(23)
12668 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12669 }
12670 }
12671 }
12672
12673 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, n_div_8_subtile) {
12674 TEST_REQUIRES_X86_SSE;
12675 for (uint32_t n = 16; n <= 24; n += 8) {
12676 for (size_t k = 1; k <= 20; k += 5) {
12677 for (uint32_t m = 1; m <= 3; m++) {
12678 GemmMicrokernelTester()
12679 .mr(3)
12680 .nr(8)
12681 .kr(1)
12682 .sr(1)
12683 .m(m)
12684 .n(n)
12685 .k(k)
12686 .iterations(1)
12687 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12688 }
12689 }
12690 }
12691 }
12692
12693 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, strided_cm_subtile) {
12694 TEST_REQUIRES_X86_SSE;
12695 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012696 for (uint32_t n = 1; n <= 8; n++) {
12697 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012698 GemmMicrokernelTester()
12699 .mr(3)
12700 .nr(8)
12701 .kr(1)
12702 .sr(1)
12703 .m(m)
12704 .n(n)
12705 .k(k)
12706 .cm_stride(11)
12707 .iterations(1)
12708 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12709 }
12710 }
12711 }
12712 }
12713
12714 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, qmin) {
12715 TEST_REQUIRES_X86_SSE;
12716 GemmMicrokernelTester()
12717 .mr(3)
12718 .nr(8)
12719 .kr(1)
12720 .sr(1)
12721 .m(3)
12722 .n(8)
12723 .k(4)
12724 .qmin(128)
12725 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12726 }
12727
12728 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, qmax) {
12729 TEST_REQUIRES_X86_SSE;
12730 GemmMicrokernelTester()
12731 .mr(3)
12732 .nr(8)
12733 .kr(1)
12734 .sr(1)
12735 .m(3)
12736 .n(8)
12737 .k(4)
12738 .qmax(128)
12739 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12740 }
12741
12742 TEST(F32_GEMMINC_MINMAX_3X8__SSE_DUP, strided_cm) {
12743 TEST_REQUIRES_X86_SSE;
12744 GemmMicrokernelTester()
12745 .mr(3)
12746 .nr(8)
12747 .kr(1)
12748 .sr(1)
12749 .m(3)
12750 .n(8)
12751 .k(4)
12752 .cm_stride(11)
12753 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
12754 }
12755#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12756
12757
12758#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12759 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4) {
12760 TEST_REQUIRES_X86_SSE;
12761 GemmMicrokernelTester()
12762 .mr(5)
12763 .nr(8)
12764 .kr(1)
12765 .sr(1)
12766 .m(5)
12767 .n(8)
12768 .k(4)
12769 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12770 }
12771
12772 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, strided_cn) {
12773 TEST_REQUIRES_X86_SSE;
12774 GemmMicrokernelTester()
12775 .mr(5)
12776 .nr(8)
12777 .kr(1)
12778 .sr(1)
12779 .m(5)
12780 .n(8)
12781 .k(4)
12782 .cn_stride(11)
12783 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12784 }
12785
12786 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4_strided_a) {
12787 TEST_REQUIRES_X86_SSE;
12788 GemmMicrokernelTester()
12789 .mr(5)
12790 .nr(8)
12791 .kr(1)
12792 .sr(1)
12793 .m(5)
12794 .n(8)
12795 .k(4)
12796 .a_stride(7)
12797 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12798 }
12799
12800 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4_subtile) {
12801 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012802 for (uint32_t n = 1; n <= 8; n++) {
12803 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012804 GemmMicrokernelTester()
12805 .mr(5)
12806 .nr(8)
12807 .kr(1)
12808 .sr(1)
12809 .m(m)
12810 .n(n)
12811 .k(4)
12812 .iterations(1)
12813 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12814 }
12815 }
12816 }
12817
12818 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_m) {
12819 TEST_REQUIRES_X86_SSE;
12820 for (uint32_t m = 1; m <= 5; m++) {
12821 GemmMicrokernelTester()
12822 .mr(5)
12823 .nr(8)
12824 .kr(1)
12825 .sr(1)
12826 .m(m)
12827 .n(8)
12828 .k(4)
12829 .iterations(1)
12830 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12831 }
12832 }
12833
12834 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_n) {
12835 TEST_REQUIRES_X86_SSE;
12836 for (uint32_t n = 1; n <= 8; n++) {
12837 GemmMicrokernelTester()
12838 .mr(5)
12839 .nr(8)
12840 .kr(1)
12841 .sr(1)
12842 .m(5)
12843 .n(n)
12844 .k(4)
12845 .iterations(1)
12846 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12847 }
12848 }
12849
12850 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_lt_4) {
12851 TEST_REQUIRES_X86_SSE;
12852 for (size_t k = 1; k < 4; k++) {
12853 GemmMicrokernelTester()
12854 .mr(5)
12855 .nr(8)
12856 .kr(1)
12857 .sr(1)
12858 .m(5)
12859 .n(8)
12860 .k(k)
12861 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12862 }
12863 }
12864
12865 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_lt_4_strided_a) {
12866 TEST_REQUIRES_X86_SSE;
12867 for (size_t k = 1; k < 4; k++) {
12868 GemmMicrokernelTester()
12869 .mr(5)
12870 .nr(8)
12871 .kr(1)
12872 .sr(1)
12873 .m(5)
12874 .n(8)
12875 .k(k)
12876 .a_stride(7)
12877 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12878 }
12879 }
12880
12881 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_lt_4_subtile) {
12882 TEST_REQUIRES_X86_SSE;
12883 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012884 for (uint32_t n = 1; n <= 8; n++) {
12885 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012886 GemmMicrokernelTester()
12887 .mr(5)
12888 .nr(8)
12889 .kr(1)
12890 .sr(1)
12891 .m(m)
12892 .n(n)
12893 .k(k)
12894 .iterations(1)
12895 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12896 }
12897 }
12898 }
12899 }
12900
12901 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_gt_4) {
12902 TEST_REQUIRES_X86_SSE;
12903 for (size_t k = 5; k < 8; k++) {
12904 GemmMicrokernelTester()
12905 .mr(5)
12906 .nr(8)
12907 .kr(1)
12908 .sr(1)
12909 .m(5)
12910 .n(8)
12911 .k(k)
12912 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12913 }
12914 }
12915
12916 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_gt_4_strided_a) {
12917 TEST_REQUIRES_X86_SSE;
12918 for (size_t k = 5; k < 8; k++) {
12919 GemmMicrokernelTester()
12920 .mr(5)
12921 .nr(8)
12922 .kr(1)
12923 .sr(1)
12924 .m(5)
12925 .n(8)
12926 .k(k)
12927 .a_stride(11)
12928 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12929 }
12930 }
12931
12932 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_gt_4_subtile) {
12933 TEST_REQUIRES_X86_SSE;
12934 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012935 for (uint32_t n = 1; n <= 8; n++) {
12936 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012937 GemmMicrokernelTester()
12938 .mr(5)
12939 .nr(8)
12940 .kr(1)
12941 .sr(1)
12942 .m(m)
12943 .n(n)
12944 .k(k)
12945 .iterations(1)
12946 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12947 }
12948 }
12949 }
12950 }
12951
12952 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_div_4) {
12953 TEST_REQUIRES_X86_SSE;
12954 for (size_t k = 8; k <= 40; k += 4) {
12955 GemmMicrokernelTester()
12956 .mr(5)
12957 .nr(8)
12958 .kr(1)
12959 .sr(1)
12960 .m(5)
12961 .n(8)
12962 .k(k)
12963 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12964 }
12965 }
12966
12967 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_div_4_strided_a) {
12968 TEST_REQUIRES_X86_SSE;
12969 for (size_t k = 8; k <= 40; k += 4) {
12970 GemmMicrokernelTester()
12971 .mr(5)
12972 .nr(8)
12973 .kr(1)
12974 .sr(1)
12975 .m(5)
12976 .n(8)
12977 .k(k)
12978 .a_stride(43)
12979 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12980 }
12981 }
12982
12983 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, k_div_4_subtile) {
12984 TEST_REQUIRES_X86_SSE;
12985 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012986 for (uint32_t n = 1; n <= 8; n++) {
12987 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012988 GemmMicrokernelTester()
12989 .mr(5)
12990 .nr(8)
12991 .kr(1)
12992 .sr(1)
12993 .m(m)
12994 .n(n)
12995 .k(k)
12996 .iterations(1)
12997 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
12998 }
12999 }
13000 }
13001 }
13002
13003 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_gt_8) {
13004 TEST_REQUIRES_X86_SSE;
13005 for (uint32_t n = 9; n < 16; n++) {
13006 for (size_t k = 1; k <= 20; k += 5) {
13007 GemmMicrokernelTester()
13008 .mr(5)
13009 .nr(8)
13010 .kr(1)
13011 .sr(1)
13012 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013013 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013014 .k(k)
13015 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13016 }
13017 }
13018 }
13019
13020 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_gt_8_strided_cn) {
13021 TEST_REQUIRES_X86_SSE;
13022 for (uint32_t n = 9; n < 16; n++) {
13023 for (size_t k = 1; k <= 20; k += 5) {
13024 GemmMicrokernelTester()
13025 .mr(5)
13026 .nr(8)
13027 .kr(1)
13028 .sr(1)
13029 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013030 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013031 .k(k)
13032 .cn_stride(11)
13033 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13034 }
13035 }
13036 }
13037
13038 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_gt_8_strided_a) {
13039 TEST_REQUIRES_X86_SSE;
13040 for (uint32_t n = 9; n < 16; n++) {
13041 for (size_t k = 1; k <= 20; k += 5) {
13042 GemmMicrokernelTester()
13043 .mr(5)
13044 .nr(8)
13045 .kr(1)
13046 .sr(1)
13047 .m(5)
13048 .n(n)
13049 .k(k)
13050 .a_stride(23)
13051 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13052 }
13053 }
13054 }
13055
13056 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_gt_8_subtile) {
13057 TEST_REQUIRES_X86_SSE;
13058 for (uint32_t n = 9; n < 16; n++) {
13059 for (size_t k = 1; k <= 20; k += 5) {
13060 for (uint32_t m = 1; m <= 5; m++) {
13061 GemmMicrokernelTester()
13062 .mr(5)
13063 .nr(8)
13064 .kr(1)
13065 .sr(1)
13066 .m(m)
13067 .n(n)
13068 .k(k)
13069 .iterations(1)
13070 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13071 }
13072 }
13073 }
13074 }
13075
13076 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_div_8) {
13077 TEST_REQUIRES_X86_SSE;
13078 for (uint32_t n = 16; n <= 24; n += 8) {
13079 for (size_t k = 1; k <= 20; k += 5) {
13080 GemmMicrokernelTester()
13081 .mr(5)
13082 .nr(8)
13083 .kr(1)
13084 .sr(1)
13085 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013086 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013087 .k(k)
13088 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13089 }
13090 }
13091 }
13092
13093 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_div_8_strided_cn) {
13094 TEST_REQUIRES_X86_SSE;
13095 for (uint32_t n = 16; n <= 24; n += 8) {
13096 for (size_t k = 1; k <= 20; k += 5) {
13097 GemmMicrokernelTester()
13098 .mr(5)
13099 .nr(8)
13100 .kr(1)
13101 .sr(1)
13102 .m(5)
13103 .n(n)
13104 .k(k)
13105 .cn_stride(11)
13106 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13107 }
13108 }
13109 }
13110
13111 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_div_8_strided_a) {
13112 TEST_REQUIRES_X86_SSE;
13113 for (uint32_t n = 16; n <= 24; n += 8) {
13114 for (size_t k = 1; k <= 20; k += 5) {
13115 GemmMicrokernelTester()
13116 .mr(5)
13117 .nr(8)
13118 .kr(1)
13119 .sr(1)
13120 .m(5)
13121 .n(n)
13122 .k(k)
13123 .a_stride(23)
13124 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13125 }
13126 }
13127 }
13128
13129 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, n_div_8_subtile) {
13130 TEST_REQUIRES_X86_SSE;
13131 for (uint32_t n = 16; n <= 24; n += 8) {
13132 for (size_t k = 1; k <= 20; k += 5) {
13133 for (uint32_t m = 1; m <= 5; m++) {
13134 GemmMicrokernelTester()
13135 .mr(5)
13136 .nr(8)
13137 .kr(1)
13138 .sr(1)
13139 .m(m)
13140 .n(n)
13141 .k(k)
13142 .iterations(1)
13143 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13144 }
13145 }
13146 }
13147 }
13148
13149 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, strided_cm_subtile) {
13150 TEST_REQUIRES_X86_SSE;
13151 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013152 for (uint32_t n = 1; n <= 8; n++) {
13153 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013154 GemmMicrokernelTester()
13155 .mr(5)
13156 .nr(8)
13157 .kr(1)
13158 .sr(1)
13159 .m(m)
13160 .n(n)
13161 .k(k)
13162 .cm_stride(11)
13163 .iterations(1)
13164 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13165 }
13166 }
13167 }
13168 }
13169
13170 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, qmin) {
13171 TEST_REQUIRES_X86_SSE;
13172 GemmMicrokernelTester()
13173 .mr(5)
13174 .nr(8)
13175 .kr(1)
13176 .sr(1)
13177 .m(5)
13178 .n(8)
13179 .k(4)
13180 .qmin(128)
13181 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13182 }
13183
13184 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, qmax) {
13185 TEST_REQUIRES_X86_SSE;
13186 GemmMicrokernelTester()
13187 .mr(5)
13188 .nr(8)
13189 .kr(1)
13190 .sr(1)
13191 .m(5)
13192 .n(8)
13193 .k(4)
13194 .qmax(128)
13195 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13196 }
13197
13198 TEST(F32_GEMMINC_MINMAX_5X8__SSE_DUP, strided_cm) {
13199 TEST_REQUIRES_X86_SSE;
13200 GemmMicrokernelTester()
13201 .mr(5)
13202 .nr(8)
13203 .kr(1)
13204 .sr(1)
13205 .m(5)
13206 .n(8)
13207 .k(4)
13208 .cm_stride(11)
13209 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
13210 }
13211#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13212
13213
13214#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13215 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4) {
13216 TEST_REQUIRES_X86_SSE;
13217 GemmMicrokernelTester()
13218 .mr(1)
13219 .nr(8)
13220 .kr(1)
13221 .sr(4)
13222 .m(1)
13223 .n(8)
13224 .k(4)
13225 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13226 }
13227
13228 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, strided_cn) {
13229 TEST_REQUIRES_X86_SSE;
13230 GemmMicrokernelTester()
13231 .mr(1)
13232 .nr(8)
13233 .kr(1)
13234 .sr(4)
13235 .m(1)
13236 .n(8)
13237 .k(4)
13238 .cn_stride(11)
13239 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13240 }
13241
13242 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4_strided_a) {
13243 TEST_REQUIRES_X86_SSE;
13244 GemmMicrokernelTester()
13245 .mr(1)
13246 .nr(8)
13247 .kr(1)
13248 .sr(4)
13249 .m(1)
13250 .n(8)
13251 .k(4)
13252 .a_stride(7)
13253 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13254 }
13255
13256 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4_subtile) {
13257 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013258 for (uint32_t n = 1; n <= 8; n++) {
13259 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013260 GemmMicrokernelTester()
13261 .mr(1)
13262 .nr(8)
13263 .kr(1)
13264 .sr(4)
13265 .m(m)
13266 .n(n)
13267 .k(4)
13268 .iterations(1)
13269 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13270 }
13271 }
13272 }
13273
13274 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4_subtile_m) {
13275 TEST_REQUIRES_X86_SSE;
13276 for (uint32_t m = 1; m <= 1; m++) {
13277 GemmMicrokernelTester()
13278 .mr(1)
13279 .nr(8)
13280 .kr(1)
13281 .sr(4)
13282 .m(m)
13283 .n(8)
13284 .k(4)
13285 .iterations(1)
13286 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13287 }
13288 }
13289
13290 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_eq_4_subtile_n) {
13291 TEST_REQUIRES_X86_SSE;
13292 for (uint32_t n = 1; n <= 8; n++) {
13293 GemmMicrokernelTester()
13294 .mr(1)
13295 .nr(8)
13296 .kr(1)
13297 .sr(4)
13298 .m(1)
13299 .n(n)
13300 .k(4)
13301 .iterations(1)
13302 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13303 }
13304 }
13305
13306 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_lt_4) {
13307 TEST_REQUIRES_X86_SSE;
13308 for (size_t k = 1; k < 4; k++) {
13309 GemmMicrokernelTester()
13310 .mr(1)
13311 .nr(8)
13312 .kr(1)
13313 .sr(4)
13314 .m(1)
13315 .n(8)
13316 .k(k)
13317 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13318 }
13319 }
13320
13321 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_lt_4_strided_a) {
13322 TEST_REQUIRES_X86_SSE;
13323 for (size_t k = 1; k < 4; k++) {
13324 GemmMicrokernelTester()
13325 .mr(1)
13326 .nr(8)
13327 .kr(1)
13328 .sr(4)
13329 .m(1)
13330 .n(8)
13331 .k(k)
13332 .a_stride(7)
13333 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13334 }
13335 }
13336
13337 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_lt_4_subtile) {
13338 TEST_REQUIRES_X86_SSE;
13339 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013340 for (uint32_t n = 1; n <= 8; n++) {
13341 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013342 GemmMicrokernelTester()
13343 .mr(1)
13344 .nr(8)
13345 .kr(1)
13346 .sr(4)
13347 .m(m)
13348 .n(n)
13349 .k(k)
13350 .iterations(1)
13351 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13352 }
13353 }
13354 }
13355 }
13356
13357 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_gt_4) {
13358 TEST_REQUIRES_X86_SSE;
13359 for (size_t k = 5; k < 8; k++) {
13360 GemmMicrokernelTester()
13361 .mr(1)
13362 .nr(8)
13363 .kr(1)
13364 .sr(4)
13365 .m(1)
13366 .n(8)
13367 .k(k)
13368 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13369 }
13370 }
13371
13372 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_gt_4_strided_a) {
13373 TEST_REQUIRES_X86_SSE;
13374 for (size_t k = 5; k < 8; k++) {
13375 GemmMicrokernelTester()
13376 .mr(1)
13377 .nr(8)
13378 .kr(1)
13379 .sr(4)
13380 .m(1)
13381 .n(8)
13382 .k(k)
13383 .a_stride(11)
13384 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13385 }
13386 }
13387
13388 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_gt_4_subtile) {
13389 TEST_REQUIRES_X86_SSE;
13390 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013391 for (uint32_t n = 1; n <= 8; n++) {
13392 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013393 GemmMicrokernelTester()
13394 .mr(1)
13395 .nr(8)
13396 .kr(1)
13397 .sr(4)
13398 .m(m)
13399 .n(n)
13400 .k(k)
13401 .iterations(1)
13402 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13403 }
13404 }
13405 }
13406 }
13407
13408 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_div_4) {
13409 TEST_REQUIRES_X86_SSE;
13410 for (size_t k = 8; k <= 40; k += 4) {
13411 GemmMicrokernelTester()
13412 .mr(1)
13413 .nr(8)
13414 .kr(1)
13415 .sr(4)
13416 .m(1)
13417 .n(8)
13418 .k(k)
13419 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13420 }
13421 }
13422
13423 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_div_4_strided_a) {
13424 TEST_REQUIRES_X86_SSE;
13425 for (size_t k = 8; k <= 40; k += 4) {
13426 GemmMicrokernelTester()
13427 .mr(1)
13428 .nr(8)
13429 .kr(1)
13430 .sr(4)
13431 .m(1)
13432 .n(8)
13433 .k(k)
13434 .a_stride(43)
13435 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13436 }
13437 }
13438
13439 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, k_div_4_subtile) {
13440 TEST_REQUIRES_X86_SSE;
13441 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013442 for (uint32_t n = 1; n <= 8; n++) {
13443 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013444 GemmMicrokernelTester()
13445 .mr(1)
13446 .nr(8)
13447 .kr(1)
13448 .sr(4)
13449 .m(m)
13450 .n(n)
13451 .k(k)
13452 .iterations(1)
13453 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13454 }
13455 }
13456 }
13457 }
13458
13459 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_gt_8) {
13460 TEST_REQUIRES_X86_SSE;
13461 for (uint32_t n = 9; n < 16; n++) {
13462 for (size_t k = 1; k <= 20; k += 5) {
13463 GemmMicrokernelTester()
13464 .mr(1)
13465 .nr(8)
13466 .kr(1)
13467 .sr(4)
13468 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013469 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013470 .k(k)
13471 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13472 }
13473 }
13474 }
13475
13476 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_gt_8_strided_cn) {
13477 TEST_REQUIRES_X86_SSE;
13478 for (uint32_t n = 9; n < 16; n++) {
13479 for (size_t k = 1; k <= 20; k += 5) {
13480 GemmMicrokernelTester()
13481 .mr(1)
13482 .nr(8)
13483 .kr(1)
13484 .sr(4)
13485 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013486 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013487 .k(k)
13488 .cn_stride(11)
13489 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13490 }
13491 }
13492 }
13493
13494 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_gt_8_strided_a) {
13495 TEST_REQUIRES_X86_SSE;
13496 for (uint32_t n = 9; n < 16; n++) {
13497 for (size_t k = 1; k <= 20; k += 5) {
13498 GemmMicrokernelTester()
13499 .mr(1)
13500 .nr(8)
13501 .kr(1)
13502 .sr(4)
13503 .m(1)
13504 .n(n)
13505 .k(k)
13506 .a_stride(23)
13507 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13508 }
13509 }
13510 }
13511
13512 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_gt_8_subtile) {
13513 TEST_REQUIRES_X86_SSE;
13514 for (uint32_t n = 9; n < 16; n++) {
13515 for (size_t k = 1; k <= 20; k += 5) {
13516 for (uint32_t m = 1; m <= 1; m++) {
13517 GemmMicrokernelTester()
13518 .mr(1)
13519 .nr(8)
13520 .kr(1)
13521 .sr(4)
13522 .m(m)
13523 .n(n)
13524 .k(k)
13525 .iterations(1)
13526 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13527 }
13528 }
13529 }
13530 }
13531
13532 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_div_8) {
13533 TEST_REQUIRES_X86_SSE;
13534 for (uint32_t n = 16; n <= 24; n += 8) {
13535 for (size_t k = 1; k <= 20; k += 5) {
13536 GemmMicrokernelTester()
13537 .mr(1)
13538 .nr(8)
13539 .kr(1)
13540 .sr(4)
13541 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013542 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013543 .k(k)
13544 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13545 }
13546 }
13547 }
13548
13549 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_div_8_strided_cn) {
13550 TEST_REQUIRES_X86_SSE;
13551 for (uint32_t n = 16; n <= 24; n += 8) {
13552 for (size_t k = 1; k <= 20; k += 5) {
13553 GemmMicrokernelTester()
13554 .mr(1)
13555 .nr(8)
13556 .kr(1)
13557 .sr(4)
13558 .m(1)
13559 .n(n)
13560 .k(k)
13561 .cn_stride(11)
13562 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13563 }
13564 }
13565 }
13566
13567 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_div_8_strided_a) {
13568 TEST_REQUIRES_X86_SSE;
13569 for (uint32_t n = 16; n <= 24; n += 8) {
13570 for (size_t k = 1; k <= 20; k += 5) {
13571 GemmMicrokernelTester()
13572 .mr(1)
13573 .nr(8)
13574 .kr(1)
13575 .sr(4)
13576 .m(1)
13577 .n(n)
13578 .k(k)
13579 .a_stride(23)
13580 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13581 }
13582 }
13583 }
13584
13585 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, n_div_8_subtile) {
13586 TEST_REQUIRES_X86_SSE;
13587 for (uint32_t n = 16; n <= 24; n += 8) {
13588 for (size_t k = 1; k <= 20; k += 5) {
13589 for (uint32_t m = 1; m <= 1; m++) {
13590 GemmMicrokernelTester()
13591 .mr(1)
13592 .nr(8)
13593 .kr(1)
13594 .sr(4)
13595 .m(m)
13596 .n(n)
13597 .k(k)
13598 .iterations(1)
13599 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13600 }
13601 }
13602 }
13603 }
13604
13605 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, strided_cm_subtile) {
13606 TEST_REQUIRES_X86_SSE;
13607 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013608 for (uint32_t n = 1; n <= 8; n++) {
13609 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013610 GemmMicrokernelTester()
13611 .mr(1)
13612 .nr(8)
13613 .kr(1)
13614 .sr(4)
13615 .m(m)
13616 .n(n)
13617 .k(k)
13618 .cm_stride(11)
13619 .iterations(1)
13620 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13621 }
13622 }
13623 }
13624 }
13625
13626 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, qmin) {
13627 TEST_REQUIRES_X86_SSE;
13628 GemmMicrokernelTester()
13629 .mr(1)
13630 .nr(8)
13631 .kr(1)
13632 .sr(4)
13633 .m(1)
13634 .n(8)
13635 .k(4)
13636 .qmin(128)
13637 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13638 }
13639
13640 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, qmax) {
13641 TEST_REQUIRES_X86_SSE;
13642 GemmMicrokernelTester()
13643 .mr(1)
13644 .nr(8)
13645 .kr(1)
13646 .sr(4)
13647 .m(1)
13648 .n(8)
13649 .k(4)
13650 .qmax(128)
13651 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13652 }
13653
13654 TEST(F32_GEMMINC_MINMAX_1X8S4__SSE, strided_cm) {
13655 TEST_REQUIRES_X86_SSE;
13656 GemmMicrokernelTester()
13657 .mr(1)
13658 .nr(8)
13659 .kr(1)
13660 .sr(4)
13661 .m(1)
13662 .n(8)
13663 .k(4)
13664 .cm_stride(11)
13665 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
13666 }
13667#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13668
13669
13670#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13671 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4) {
13672 TEST_REQUIRES_X86_SSE;
13673 GemmMicrokernelTester()
13674 .mr(4)
13675 .nr(8)
13676 .kr(1)
13677 .sr(4)
13678 .m(4)
13679 .n(8)
13680 .k(4)
13681 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13682 }
13683
13684 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, strided_cn) {
13685 TEST_REQUIRES_X86_SSE;
13686 GemmMicrokernelTester()
13687 .mr(4)
13688 .nr(8)
13689 .kr(1)
13690 .sr(4)
13691 .m(4)
13692 .n(8)
13693 .k(4)
13694 .cn_stride(11)
13695 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13696 }
13697
13698 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4_strided_a) {
13699 TEST_REQUIRES_X86_SSE;
13700 GemmMicrokernelTester()
13701 .mr(4)
13702 .nr(8)
13703 .kr(1)
13704 .sr(4)
13705 .m(4)
13706 .n(8)
13707 .k(4)
13708 .a_stride(7)
13709 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13710 }
13711
13712 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4_subtile) {
13713 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013714 for (uint32_t n = 1; n <= 8; n++) {
13715 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013716 GemmMicrokernelTester()
13717 .mr(4)
13718 .nr(8)
13719 .kr(1)
13720 .sr(4)
13721 .m(m)
13722 .n(n)
13723 .k(4)
13724 .iterations(1)
13725 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13726 }
13727 }
13728 }
13729
13730 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4_subtile_m) {
13731 TEST_REQUIRES_X86_SSE;
13732 for (uint32_t m = 1; m <= 4; m++) {
13733 GemmMicrokernelTester()
13734 .mr(4)
13735 .nr(8)
13736 .kr(1)
13737 .sr(4)
13738 .m(m)
13739 .n(8)
13740 .k(4)
13741 .iterations(1)
13742 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13743 }
13744 }
13745
13746 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_eq_4_subtile_n) {
13747 TEST_REQUIRES_X86_SSE;
13748 for (uint32_t n = 1; n <= 8; n++) {
13749 GemmMicrokernelTester()
13750 .mr(4)
13751 .nr(8)
13752 .kr(1)
13753 .sr(4)
13754 .m(4)
13755 .n(n)
13756 .k(4)
13757 .iterations(1)
13758 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13759 }
13760 }
13761
13762 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_lt_4) {
13763 TEST_REQUIRES_X86_SSE;
13764 for (size_t k = 1; k < 4; k++) {
13765 GemmMicrokernelTester()
13766 .mr(4)
13767 .nr(8)
13768 .kr(1)
13769 .sr(4)
13770 .m(4)
13771 .n(8)
13772 .k(k)
13773 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13774 }
13775 }
13776
13777 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_lt_4_strided_a) {
13778 TEST_REQUIRES_X86_SSE;
13779 for (size_t k = 1; k < 4; k++) {
13780 GemmMicrokernelTester()
13781 .mr(4)
13782 .nr(8)
13783 .kr(1)
13784 .sr(4)
13785 .m(4)
13786 .n(8)
13787 .k(k)
13788 .a_stride(7)
13789 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13790 }
13791 }
13792
13793 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_lt_4_subtile) {
13794 TEST_REQUIRES_X86_SSE;
13795 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013796 for (uint32_t n = 1; n <= 8; n++) {
13797 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013798 GemmMicrokernelTester()
13799 .mr(4)
13800 .nr(8)
13801 .kr(1)
13802 .sr(4)
13803 .m(m)
13804 .n(n)
13805 .k(k)
13806 .iterations(1)
13807 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13808 }
13809 }
13810 }
13811 }
13812
13813 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_gt_4) {
13814 TEST_REQUIRES_X86_SSE;
13815 for (size_t k = 5; k < 8; k++) {
13816 GemmMicrokernelTester()
13817 .mr(4)
13818 .nr(8)
13819 .kr(1)
13820 .sr(4)
13821 .m(4)
13822 .n(8)
13823 .k(k)
13824 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13825 }
13826 }
13827
13828 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_gt_4_strided_a) {
13829 TEST_REQUIRES_X86_SSE;
13830 for (size_t k = 5; k < 8; k++) {
13831 GemmMicrokernelTester()
13832 .mr(4)
13833 .nr(8)
13834 .kr(1)
13835 .sr(4)
13836 .m(4)
13837 .n(8)
13838 .k(k)
13839 .a_stride(11)
13840 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13841 }
13842 }
13843
13844 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_gt_4_subtile) {
13845 TEST_REQUIRES_X86_SSE;
13846 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013847 for (uint32_t n = 1; n <= 8; n++) {
13848 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013849 GemmMicrokernelTester()
13850 .mr(4)
13851 .nr(8)
13852 .kr(1)
13853 .sr(4)
13854 .m(m)
13855 .n(n)
13856 .k(k)
13857 .iterations(1)
13858 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13859 }
13860 }
13861 }
13862 }
13863
13864 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_div_4) {
13865 TEST_REQUIRES_X86_SSE;
13866 for (size_t k = 8; k <= 40; k += 4) {
13867 GemmMicrokernelTester()
13868 .mr(4)
13869 .nr(8)
13870 .kr(1)
13871 .sr(4)
13872 .m(4)
13873 .n(8)
13874 .k(k)
13875 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13876 }
13877 }
13878
13879 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_div_4_strided_a) {
13880 TEST_REQUIRES_X86_SSE;
13881 for (size_t k = 8; k <= 40; k += 4) {
13882 GemmMicrokernelTester()
13883 .mr(4)
13884 .nr(8)
13885 .kr(1)
13886 .sr(4)
13887 .m(4)
13888 .n(8)
13889 .k(k)
13890 .a_stride(43)
13891 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13892 }
13893 }
13894
13895 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, k_div_4_subtile) {
13896 TEST_REQUIRES_X86_SSE;
13897 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013898 for (uint32_t n = 1; n <= 8; n++) {
13899 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013900 GemmMicrokernelTester()
13901 .mr(4)
13902 .nr(8)
13903 .kr(1)
13904 .sr(4)
13905 .m(m)
13906 .n(n)
13907 .k(k)
13908 .iterations(1)
13909 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13910 }
13911 }
13912 }
13913 }
13914
13915 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_gt_8) {
13916 TEST_REQUIRES_X86_SSE;
13917 for (uint32_t n = 9; n < 16; n++) {
13918 for (size_t k = 1; k <= 20; k += 5) {
13919 GemmMicrokernelTester()
13920 .mr(4)
13921 .nr(8)
13922 .kr(1)
13923 .sr(4)
13924 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013925 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013926 .k(k)
13927 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13928 }
13929 }
13930 }
13931
13932 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_gt_8_strided_cn) {
13933 TEST_REQUIRES_X86_SSE;
13934 for (uint32_t n = 9; n < 16; n++) {
13935 for (size_t k = 1; k <= 20; k += 5) {
13936 GemmMicrokernelTester()
13937 .mr(4)
13938 .nr(8)
13939 .kr(1)
13940 .sr(4)
13941 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013942 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013943 .k(k)
13944 .cn_stride(11)
13945 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13946 }
13947 }
13948 }
13949
13950 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_gt_8_strided_a) {
13951 TEST_REQUIRES_X86_SSE;
13952 for (uint32_t n = 9; n < 16; n++) {
13953 for (size_t k = 1; k <= 20; k += 5) {
13954 GemmMicrokernelTester()
13955 .mr(4)
13956 .nr(8)
13957 .kr(1)
13958 .sr(4)
13959 .m(4)
13960 .n(n)
13961 .k(k)
13962 .a_stride(23)
13963 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13964 }
13965 }
13966 }
13967
13968 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_gt_8_subtile) {
13969 TEST_REQUIRES_X86_SSE;
13970 for (uint32_t n = 9; n < 16; n++) {
13971 for (size_t k = 1; k <= 20; k += 5) {
13972 for (uint32_t m = 1; m <= 4; m++) {
13973 GemmMicrokernelTester()
13974 .mr(4)
13975 .nr(8)
13976 .kr(1)
13977 .sr(4)
13978 .m(m)
13979 .n(n)
13980 .k(k)
13981 .iterations(1)
13982 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
13983 }
13984 }
13985 }
13986 }
13987
13988 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_div_8) {
13989 TEST_REQUIRES_X86_SSE;
13990 for (uint32_t n = 16; n <= 24; n += 8) {
13991 for (size_t k = 1; k <= 20; k += 5) {
13992 GemmMicrokernelTester()
13993 .mr(4)
13994 .nr(8)
13995 .kr(1)
13996 .sr(4)
13997 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013998 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013999 .k(k)
14000 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
14001 }
14002 }
14003 }
14004
14005 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_div_8_strided_cn) {
14006 TEST_REQUIRES_X86_SSE;
14007 for (uint32_t n = 16; n <= 24; n += 8) {
14008 for (size_t k = 1; k <= 20; k += 5) {
14009 GemmMicrokernelTester()
14010 .mr(4)
14011 .nr(8)
14012 .kr(1)
14013 .sr(4)
14014 .m(4)
14015 .n(n)
14016 .k(k)
14017 .cn_stride(11)
14018 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
14019 }
14020 }
14021 }
14022
14023 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_div_8_strided_a) {
14024 TEST_REQUIRES_X86_SSE;
14025 for (uint32_t n = 16; n <= 24; n += 8) {
14026 for (size_t k = 1; k <= 20; k += 5) {
14027 GemmMicrokernelTester()
14028 .mr(4)
14029 .nr(8)
14030 .kr(1)
14031 .sr(4)
14032 .m(4)
14033 .n(n)
14034 .k(k)
14035 .a_stride(23)
14036 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
14037 }
14038 }
14039 }
14040
14041 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, n_div_8_subtile) {
14042 TEST_REQUIRES_X86_SSE;
14043 for (uint32_t n = 16; n <= 24; n += 8) {
14044 for (size_t k = 1; k <= 20; k += 5) {
14045 for (uint32_t m = 1; m <= 4; m++) {
14046 GemmMicrokernelTester()
14047 .mr(4)
14048 .nr(8)
14049 .kr(1)
14050 .sr(4)
14051 .m(m)
14052 .n(n)
14053 .k(k)
14054 .iterations(1)
14055 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
14056 }
14057 }
14058 }
14059 }
14060
14061 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, strided_cm_subtile) {
14062 TEST_REQUIRES_X86_SSE;
14063 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014064 for (uint32_t n = 1; n <= 8; n++) {
14065 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014066 GemmMicrokernelTester()
14067 .mr(4)
14068 .nr(8)
14069 .kr(1)
14070 .sr(4)
14071 .m(m)
14072 .n(n)
14073 .k(k)
14074 .cm_stride(11)
14075 .iterations(1)
14076 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
14077 }
14078 }
14079 }
14080 }
14081
14082 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, qmin) {
14083 TEST_REQUIRES_X86_SSE;
14084 GemmMicrokernelTester()
14085 .mr(4)
14086 .nr(8)
14087 .kr(1)
14088 .sr(4)
14089 .m(4)
14090 .n(8)
14091 .k(4)
14092 .qmin(128)
14093 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
14094 }
14095
14096 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, qmax) {
14097 TEST_REQUIRES_X86_SSE;
14098 GemmMicrokernelTester()
14099 .mr(4)
14100 .nr(8)
14101 .kr(1)
14102 .sr(4)
14103 .m(4)
14104 .n(8)
14105 .k(4)
14106 .qmax(128)
14107 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
14108 }
14109
14110 TEST(F32_GEMMINC_MINMAX_4X8S4__SSE, strided_cm) {
14111 TEST_REQUIRES_X86_SSE;
14112 GemmMicrokernelTester()
14113 .mr(4)
14114 .nr(8)
14115 .kr(1)
14116 .sr(4)
14117 .m(4)
14118 .n(8)
14119 .k(4)
14120 .cm_stride(11)
14121 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
14122 }
14123#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14124
14125
14126#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14127 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4) {
14128 TEST_REQUIRES_X86_SSE;
14129 GemmMicrokernelTester()
14130 .mr(5)
14131 .nr(8)
14132 .kr(1)
14133 .sr(4)
14134 .m(5)
14135 .n(8)
14136 .k(4)
14137 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14138 }
14139
14140 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, strided_cn) {
14141 TEST_REQUIRES_X86_SSE;
14142 GemmMicrokernelTester()
14143 .mr(5)
14144 .nr(8)
14145 .kr(1)
14146 .sr(4)
14147 .m(5)
14148 .n(8)
14149 .k(4)
14150 .cn_stride(11)
14151 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14152 }
14153
14154 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4_strided_a) {
14155 TEST_REQUIRES_X86_SSE;
14156 GemmMicrokernelTester()
14157 .mr(5)
14158 .nr(8)
14159 .kr(1)
14160 .sr(4)
14161 .m(5)
14162 .n(8)
14163 .k(4)
14164 .a_stride(7)
14165 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14166 }
14167
14168 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4_subtile) {
14169 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014170 for (uint32_t n = 1; n <= 8; n++) {
14171 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014172 GemmMicrokernelTester()
14173 .mr(5)
14174 .nr(8)
14175 .kr(1)
14176 .sr(4)
14177 .m(m)
14178 .n(n)
14179 .k(4)
14180 .iterations(1)
14181 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14182 }
14183 }
14184 }
14185
14186 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4_subtile_m) {
14187 TEST_REQUIRES_X86_SSE;
14188 for (uint32_t m = 1; m <= 5; m++) {
14189 GemmMicrokernelTester()
14190 .mr(5)
14191 .nr(8)
14192 .kr(1)
14193 .sr(4)
14194 .m(m)
14195 .n(8)
14196 .k(4)
14197 .iterations(1)
14198 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14199 }
14200 }
14201
14202 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_eq_4_subtile_n) {
14203 TEST_REQUIRES_X86_SSE;
14204 for (uint32_t n = 1; n <= 8; n++) {
14205 GemmMicrokernelTester()
14206 .mr(5)
14207 .nr(8)
14208 .kr(1)
14209 .sr(4)
14210 .m(5)
14211 .n(n)
14212 .k(4)
14213 .iterations(1)
14214 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14215 }
14216 }
14217
14218 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_lt_4) {
14219 TEST_REQUIRES_X86_SSE;
14220 for (size_t k = 1; k < 4; k++) {
14221 GemmMicrokernelTester()
14222 .mr(5)
14223 .nr(8)
14224 .kr(1)
14225 .sr(4)
14226 .m(5)
14227 .n(8)
14228 .k(k)
14229 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14230 }
14231 }
14232
14233 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_lt_4_strided_a) {
14234 TEST_REQUIRES_X86_SSE;
14235 for (size_t k = 1; k < 4; k++) {
14236 GemmMicrokernelTester()
14237 .mr(5)
14238 .nr(8)
14239 .kr(1)
14240 .sr(4)
14241 .m(5)
14242 .n(8)
14243 .k(k)
14244 .a_stride(7)
14245 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14246 }
14247 }
14248
14249 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_lt_4_subtile) {
14250 TEST_REQUIRES_X86_SSE;
14251 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014252 for (uint32_t n = 1; n <= 8; n++) {
14253 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014254 GemmMicrokernelTester()
14255 .mr(5)
14256 .nr(8)
14257 .kr(1)
14258 .sr(4)
14259 .m(m)
14260 .n(n)
14261 .k(k)
14262 .iterations(1)
14263 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14264 }
14265 }
14266 }
14267 }
14268
14269 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_gt_4) {
14270 TEST_REQUIRES_X86_SSE;
14271 for (size_t k = 5; k < 8; k++) {
14272 GemmMicrokernelTester()
14273 .mr(5)
14274 .nr(8)
14275 .kr(1)
14276 .sr(4)
14277 .m(5)
14278 .n(8)
14279 .k(k)
14280 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14281 }
14282 }
14283
14284 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_gt_4_strided_a) {
14285 TEST_REQUIRES_X86_SSE;
14286 for (size_t k = 5; k < 8; k++) {
14287 GemmMicrokernelTester()
14288 .mr(5)
14289 .nr(8)
14290 .kr(1)
14291 .sr(4)
14292 .m(5)
14293 .n(8)
14294 .k(k)
14295 .a_stride(11)
14296 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14297 }
14298 }
14299
14300 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_gt_4_subtile) {
14301 TEST_REQUIRES_X86_SSE;
14302 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014303 for (uint32_t n = 1; n <= 8; n++) {
14304 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014305 GemmMicrokernelTester()
14306 .mr(5)
14307 .nr(8)
14308 .kr(1)
14309 .sr(4)
14310 .m(m)
14311 .n(n)
14312 .k(k)
14313 .iterations(1)
14314 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14315 }
14316 }
14317 }
14318 }
14319
14320 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_div_4) {
14321 TEST_REQUIRES_X86_SSE;
14322 for (size_t k = 8; k <= 40; k += 4) {
14323 GemmMicrokernelTester()
14324 .mr(5)
14325 .nr(8)
14326 .kr(1)
14327 .sr(4)
14328 .m(5)
14329 .n(8)
14330 .k(k)
14331 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14332 }
14333 }
14334
14335 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_div_4_strided_a) {
14336 TEST_REQUIRES_X86_SSE;
14337 for (size_t k = 8; k <= 40; k += 4) {
14338 GemmMicrokernelTester()
14339 .mr(5)
14340 .nr(8)
14341 .kr(1)
14342 .sr(4)
14343 .m(5)
14344 .n(8)
14345 .k(k)
14346 .a_stride(43)
14347 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14348 }
14349 }
14350
14351 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, k_div_4_subtile) {
14352 TEST_REQUIRES_X86_SSE;
14353 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014354 for (uint32_t n = 1; n <= 8; n++) {
14355 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014356 GemmMicrokernelTester()
14357 .mr(5)
14358 .nr(8)
14359 .kr(1)
14360 .sr(4)
14361 .m(m)
14362 .n(n)
14363 .k(k)
14364 .iterations(1)
14365 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14366 }
14367 }
14368 }
14369 }
14370
14371 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_gt_8) {
14372 TEST_REQUIRES_X86_SSE;
14373 for (uint32_t n = 9; n < 16; n++) {
14374 for (size_t k = 1; k <= 20; k += 5) {
14375 GemmMicrokernelTester()
14376 .mr(5)
14377 .nr(8)
14378 .kr(1)
14379 .sr(4)
14380 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014381 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014382 .k(k)
14383 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14384 }
14385 }
14386 }
14387
14388 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_gt_8_strided_cn) {
14389 TEST_REQUIRES_X86_SSE;
14390 for (uint32_t n = 9; n < 16; n++) {
14391 for (size_t k = 1; k <= 20; k += 5) {
14392 GemmMicrokernelTester()
14393 .mr(5)
14394 .nr(8)
14395 .kr(1)
14396 .sr(4)
14397 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014398 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014399 .k(k)
14400 .cn_stride(11)
14401 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14402 }
14403 }
14404 }
14405
14406 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_gt_8_strided_a) {
14407 TEST_REQUIRES_X86_SSE;
14408 for (uint32_t n = 9; n < 16; n++) {
14409 for (size_t k = 1; k <= 20; k += 5) {
14410 GemmMicrokernelTester()
14411 .mr(5)
14412 .nr(8)
14413 .kr(1)
14414 .sr(4)
14415 .m(5)
14416 .n(n)
14417 .k(k)
14418 .a_stride(23)
14419 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14420 }
14421 }
14422 }
14423
14424 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_gt_8_subtile) {
14425 TEST_REQUIRES_X86_SSE;
14426 for (uint32_t n = 9; n < 16; n++) {
14427 for (size_t k = 1; k <= 20; k += 5) {
14428 for (uint32_t m = 1; m <= 5; m++) {
14429 GemmMicrokernelTester()
14430 .mr(5)
14431 .nr(8)
14432 .kr(1)
14433 .sr(4)
14434 .m(m)
14435 .n(n)
14436 .k(k)
14437 .iterations(1)
14438 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14439 }
14440 }
14441 }
14442 }
14443
14444 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_div_8) {
14445 TEST_REQUIRES_X86_SSE;
14446 for (uint32_t n = 16; n <= 24; n += 8) {
14447 for (size_t k = 1; k <= 20; k += 5) {
14448 GemmMicrokernelTester()
14449 .mr(5)
14450 .nr(8)
14451 .kr(1)
14452 .sr(4)
14453 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014454 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014455 .k(k)
14456 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14457 }
14458 }
14459 }
14460
14461 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_div_8_strided_cn) {
14462 TEST_REQUIRES_X86_SSE;
14463 for (uint32_t n = 16; n <= 24; n += 8) {
14464 for (size_t k = 1; k <= 20; k += 5) {
14465 GemmMicrokernelTester()
14466 .mr(5)
14467 .nr(8)
14468 .kr(1)
14469 .sr(4)
14470 .m(5)
14471 .n(n)
14472 .k(k)
14473 .cn_stride(11)
14474 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14475 }
14476 }
14477 }
14478
14479 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_div_8_strided_a) {
14480 TEST_REQUIRES_X86_SSE;
14481 for (uint32_t n = 16; n <= 24; n += 8) {
14482 for (size_t k = 1; k <= 20; k += 5) {
14483 GemmMicrokernelTester()
14484 .mr(5)
14485 .nr(8)
14486 .kr(1)
14487 .sr(4)
14488 .m(5)
14489 .n(n)
14490 .k(k)
14491 .a_stride(23)
14492 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14493 }
14494 }
14495 }
14496
14497 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, n_div_8_subtile) {
14498 TEST_REQUIRES_X86_SSE;
14499 for (uint32_t n = 16; n <= 24; n += 8) {
14500 for (size_t k = 1; k <= 20; k += 5) {
14501 for (uint32_t m = 1; m <= 5; m++) {
14502 GemmMicrokernelTester()
14503 .mr(5)
14504 .nr(8)
14505 .kr(1)
14506 .sr(4)
14507 .m(m)
14508 .n(n)
14509 .k(k)
14510 .iterations(1)
14511 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14512 }
14513 }
14514 }
14515 }
14516
14517 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, strided_cm_subtile) {
14518 TEST_REQUIRES_X86_SSE;
14519 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014520 for (uint32_t n = 1; n <= 8; n++) {
14521 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014522 GemmMicrokernelTester()
14523 .mr(5)
14524 .nr(8)
14525 .kr(1)
14526 .sr(4)
14527 .m(m)
14528 .n(n)
14529 .k(k)
14530 .cm_stride(11)
14531 .iterations(1)
14532 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14533 }
14534 }
14535 }
14536 }
14537
14538 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, qmin) {
14539 TEST_REQUIRES_X86_SSE;
14540 GemmMicrokernelTester()
14541 .mr(5)
14542 .nr(8)
14543 .kr(1)
14544 .sr(4)
14545 .m(5)
14546 .n(8)
14547 .k(4)
14548 .qmin(128)
14549 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14550 }
14551
14552 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, qmax) {
14553 TEST_REQUIRES_X86_SSE;
14554 GemmMicrokernelTester()
14555 .mr(5)
14556 .nr(8)
14557 .kr(1)
14558 .sr(4)
14559 .m(5)
14560 .n(8)
14561 .k(4)
14562 .qmax(128)
14563 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14564 }
14565
14566 TEST(F32_GEMMINC_MINMAX_5X8S4__SSE, strided_cm) {
14567 TEST_REQUIRES_X86_SSE;
14568 GemmMicrokernelTester()
14569 .mr(5)
14570 .nr(8)
14571 .kr(1)
14572 .sr(4)
14573 .m(5)
14574 .n(8)
14575 .k(4)
14576 .cm_stride(11)
14577 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
14578 }
14579#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14580
14581
14582#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14583 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4) {
14584 TEST_REQUIRES_X86_SSE2;
14585 GemmMicrokernelTester()
14586 .mr(1)
14587 .nr(8)
14588 .kr(1)
14589 .sr(1)
14590 .m(1)
14591 .n(8)
14592 .k(4)
14593 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14594 }
14595
14596 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, strided_cn) {
14597 TEST_REQUIRES_X86_SSE2;
14598 GemmMicrokernelTester()
14599 .mr(1)
14600 .nr(8)
14601 .kr(1)
14602 .sr(1)
14603 .m(1)
14604 .n(8)
14605 .k(4)
14606 .cn_stride(11)
14607 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14608 }
14609
14610 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4_strided_a) {
14611 TEST_REQUIRES_X86_SSE2;
14612 GemmMicrokernelTester()
14613 .mr(1)
14614 .nr(8)
14615 .kr(1)
14616 .sr(1)
14617 .m(1)
14618 .n(8)
14619 .k(4)
14620 .a_stride(7)
14621 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14622 }
14623
14624 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile) {
14625 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014626 for (uint32_t n = 1; n <= 8; n++) {
14627 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014628 GemmMicrokernelTester()
14629 .mr(1)
14630 .nr(8)
14631 .kr(1)
14632 .sr(1)
14633 .m(m)
14634 .n(n)
14635 .k(4)
14636 .iterations(1)
14637 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14638 }
14639 }
14640 }
14641
14642 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_m) {
14643 TEST_REQUIRES_X86_SSE2;
14644 for (uint32_t m = 1; m <= 1; m++) {
14645 GemmMicrokernelTester()
14646 .mr(1)
14647 .nr(8)
14648 .kr(1)
14649 .sr(1)
14650 .m(m)
14651 .n(8)
14652 .k(4)
14653 .iterations(1)
14654 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14655 }
14656 }
14657
14658 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_n) {
14659 TEST_REQUIRES_X86_SSE2;
14660 for (uint32_t n = 1; n <= 8; n++) {
14661 GemmMicrokernelTester()
14662 .mr(1)
14663 .nr(8)
14664 .kr(1)
14665 .sr(1)
14666 .m(1)
14667 .n(n)
14668 .k(4)
14669 .iterations(1)
14670 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14671 }
14672 }
14673
14674 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_lt_4) {
14675 TEST_REQUIRES_X86_SSE2;
14676 for (size_t k = 1; k < 4; k++) {
14677 GemmMicrokernelTester()
14678 .mr(1)
14679 .nr(8)
14680 .kr(1)
14681 .sr(1)
14682 .m(1)
14683 .n(8)
14684 .k(k)
14685 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14686 }
14687 }
14688
14689 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_lt_4_strided_a) {
14690 TEST_REQUIRES_X86_SSE2;
14691 for (size_t k = 1; k < 4; k++) {
14692 GemmMicrokernelTester()
14693 .mr(1)
14694 .nr(8)
14695 .kr(1)
14696 .sr(1)
14697 .m(1)
14698 .n(8)
14699 .k(k)
14700 .a_stride(7)
14701 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14702 }
14703 }
14704
14705 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_lt_4_subtile) {
14706 TEST_REQUIRES_X86_SSE2;
14707 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014708 for (uint32_t n = 1; n <= 8; n++) {
14709 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014710 GemmMicrokernelTester()
14711 .mr(1)
14712 .nr(8)
14713 .kr(1)
14714 .sr(1)
14715 .m(m)
14716 .n(n)
14717 .k(k)
14718 .iterations(1)
14719 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14720 }
14721 }
14722 }
14723 }
14724
14725 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_gt_4) {
14726 TEST_REQUIRES_X86_SSE2;
14727 for (size_t k = 5; k < 8; k++) {
14728 GemmMicrokernelTester()
14729 .mr(1)
14730 .nr(8)
14731 .kr(1)
14732 .sr(1)
14733 .m(1)
14734 .n(8)
14735 .k(k)
14736 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14737 }
14738 }
14739
14740 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_gt_4_strided_a) {
14741 TEST_REQUIRES_X86_SSE2;
14742 for (size_t k = 5; k < 8; k++) {
14743 GemmMicrokernelTester()
14744 .mr(1)
14745 .nr(8)
14746 .kr(1)
14747 .sr(1)
14748 .m(1)
14749 .n(8)
14750 .k(k)
14751 .a_stride(11)
14752 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14753 }
14754 }
14755
14756 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_gt_4_subtile) {
14757 TEST_REQUIRES_X86_SSE2;
14758 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014759 for (uint32_t n = 1; n <= 8; n++) {
14760 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014761 GemmMicrokernelTester()
14762 .mr(1)
14763 .nr(8)
14764 .kr(1)
14765 .sr(1)
14766 .m(m)
14767 .n(n)
14768 .k(k)
14769 .iterations(1)
14770 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14771 }
14772 }
14773 }
14774 }
14775
14776 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_div_4) {
14777 TEST_REQUIRES_X86_SSE2;
14778 for (size_t k = 8; k <= 40; k += 4) {
14779 GemmMicrokernelTester()
14780 .mr(1)
14781 .nr(8)
14782 .kr(1)
14783 .sr(1)
14784 .m(1)
14785 .n(8)
14786 .k(k)
14787 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14788 }
14789 }
14790
14791 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_div_4_strided_a) {
14792 TEST_REQUIRES_X86_SSE2;
14793 for (size_t k = 8; k <= 40; k += 4) {
14794 GemmMicrokernelTester()
14795 .mr(1)
14796 .nr(8)
14797 .kr(1)
14798 .sr(1)
14799 .m(1)
14800 .n(8)
14801 .k(k)
14802 .a_stride(43)
14803 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14804 }
14805 }
14806
14807 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, k_div_4_subtile) {
14808 TEST_REQUIRES_X86_SSE2;
14809 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014810 for (uint32_t n = 1; n <= 8; n++) {
14811 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014812 GemmMicrokernelTester()
14813 .mr(1)
14814 .nr(8)
14815 .kr(1)
14816 .sr(1)
14817 .m(m)
14818 .n(n)
14819 .k(k)
14820 .iterations(1)
14821 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14822 }
14823 }
14824 }
14825 }
14826
14827 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_gt_8) {
14828 TEST_REQUIRES_X86_SSE2;
14829 for (uint32_t n = 9; n < 16; n++) {
14830 for (size_t k = 1; k <= 20; k += 5) {
14831 GemmMicrokernelTester()
14832 .mr(1)
14833 .nr(8)
14834 .kr(1)
14835 .sr(1)
14836 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014837 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014838 .k(k)
14839 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14840 }
14841 }
14842 }
14843
14844 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_gt_8_strided_cn) {
14845 TEST_REQUIRES_X86_SSE2;
14846 for (uint32_t n = 9; n < 16; n++) {
14847 for (size_t k = 1; k <= 20; k += 5) {
14848 GemmMicrokernelTester()
14849 .mr(1)
14850 .nr(8)
14851 .kr(1)
14852 .sr(1)
14853 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014854 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014855 .k(k)
14856 .cn_stride(11)
14857 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14858 }
14859 }
14860 }
14861
14862 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_gt_8_strided_a) {
14863 TEST_REQUIRES_X86_SSE2;
14864 for (uint32_t n = 9; n < 16; n++) {
14865 for (size_t k = 1; k <= 20; k += 5) {
14866 GemmMicrokernelTester()
14867 .mr(1)
14868 .nr(8)
14869 .kr(1)
14870 .sr(1)
14871 .m(1)
14872 .n(n)
14873 .k(k)
14874 .a_stride(23)
14875 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14876 }
14877 }
14878 }
14879
14880 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_gt_8_subtile) {
14881 TEST_REQUIRES_X86_SSE2;
14882 for (uint32_t n = 9; n < 16; n++) {
14883 for (size_t k = 1; k <= 20; k += 5) {
14884 for (uint32_t m = 1; m <= 1; m++) {
14885 GemmMicrokernelTester()
14886 .mr(1)
14887 .nr(8)
14888 .kr(1)
14889 .sr(1)
14890 .m(m)
14891 .n(n)
14892 .k(k)
14893 .iterations(1)
14894 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14895 }
14896 }
14897 }
14898 }
14899
14900 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_div_8) {
14901 TEST_REQUIRES_X86_SSE2;
14902 for (uint32_t n = 16; n <= 24; n += 8) {
14903 for (size_t k = 1; k <= 20; k += 5) {
14904 GemmMicrokernelTester()
14905 .mr(1)
14906 .nr(8)
14907 .kr(1)
14908 .sr(1)
14909 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014910 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014911 .k(k)
14912 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14913 }
14914 }
14915 }
14916
14917 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_div_8_strided_cn) {
14918 TEST_REQUIRES_X86_SSE2;
14919 for (uint32_t n = 16; n <= 24; n += 8) {
14920 for (size_t k = 1; k <= 20; k += 5) {
14921 GemmMicrokernelTester()
14922 .mr(1)
14923 .nr(8)
14924 .kr(1)
14925 .sr(1)
14926 .m(1)
14927 .n(n)
14928 .k(k)
14929 .cn_stride(11)
14930 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14931 }
14932 }
14933 }
14934
14935 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_div_8_strided_a) {
14936 TEST_REQUIRES_X86_SSE2;
14937 for (uint32_t n = 16; n <= 24; n += 8) {
14938 for (size_t k = 1; k <= 20; k += 5) {
14939 GemmMicrokernelTester()
14940 .mr(1)
14941 .nr(8)
14942 .kr(1)
14943 .sr(1)
14944 .m(1)
14945 .n(n)
14946 .k(k)
14947 .a_stride(23)
14948 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14949 }
14950 }
14951 }
14952
14953 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, n_div_8_subtile) {
14954 TEST_REQUIRES_X86_SSE2;
14955 for (uint32_t n = 16; n <= 24; n += 8) {
14956 for (size_t k = 1; k <= 20; k += 5) {
14957 for (uint32_t m = 1; m <= 1; m++) {
14958 GemmMicrokernelTester()
14959 .mr(1)
14960 .nr(8)
14961 .kr(1)
14962 .sr(1)
14963 .m(m)
14964 .n(n)
14965 .k(k)
14966 .iterations(1)
14967 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14968 }
14969 }
14970 }
14971 }
14972
14973 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, strided_cm_subtile) {
14974 TEST_REQUIRES_X86_SSE2;
14975 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014976 for (uint32_t n = 1; n <= 8; n++) {
14977 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014978 GemmMicrokernelTester()
14979 .mr(1)
14980 .nr(8)
14981 .kr(1)
14982 .sr(1)
14983 .m(m)
14984 .n(n)
14985 .k(k)
14986 .cm_stride(11)
14987 .iterations(1)
14988 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14989 }
14990 }
14991 }
14992 }
14993
14994 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, qmin) {
14995 TEST_REQUIRES_X86_SSE2;
14996 GemmMicrokernelTester()
14997 .mr(1)
14998 .nr(8)
14999 .kr(1)
15000 .sr(1)
15001 .m(1)
15002 .n(8)
15003 .k(4)
15004 .qmin(128)
15005 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15006 }
15007
15008 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, qmax) {
15009 TEST_REQUIRES_X86_SSE2;
15010 GemmMicrokernelTester()
15011 .mr(1)
15012 .nr(8)
15013 .kr(1)
15014 .sr(1)
15015 .m(1)
15016 .n(8)
15017 .k(4)
15018 .qmax(128)
15019 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15020 }
15021
15022 TEST(F32_GEMMINC_MINMAX_1X8__SSE2_DUP, strided_cm) {
15023 TEST_REQUIRES_X86_SSE2;
15024 GemmMicrokernelTester()
15025 .mr(1)
15026 .nr(8)
15027 .kr(1)
15028 .sr(1)
15029 .m(1)
15030 .n(8)
15031 .k(4)
15032 .cm_stride(11)
15033 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15034 }
15035#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15036
15037
15038#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15039 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4) {
15040 TEST_REQUIRES_X86_SSE2;
15041 GemmMicrokernelTester()
15042 .mr(5)
15043 .nr(8)
15044 .kr(1)
15045 .sr(1)
15046 .m(5)
15047 .n(8)
15048 .k(4)
15049 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15050 }
15051
15052 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, strided_cn) {
15053 TEST_REQUIRES_X86_SSE2;
15054 GemmMicrokernelTester()
15055 .mr(5)
15056 .nr(8)
15057 .kr(1)
15058 .sr(1)
15059 .m(5)
15060 .n(8)
15061 .k(4)
15062 .cn_stride(11)
15063 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15064 }
15065
15066 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4_strided_a) {
15067 TEST_REQUIRES_X86_SSE2;
15068 GemmMicrokernelTester()
15069 .mr(5)
15070 .nr(8)
15071 .kr(1)
15072 .sr(1)
15073 .m(5)
15074 .n(8)
15075 .k(4)
15076 .a_stride(7)
15077 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15078 }
15079
15080 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile) {
15081 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015082 for (uint32_t n = 1; n <= 8; n++) {
15083 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015084 GemmMicrokernelTester()
15085 .mr(5)
15086 .nr(8)
15087 .kr(1)
15088 .sr(1)
15089 .m(m)
15090 .n(n)
15091 .k(4)
15092 .iterations(1)
15093 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15094 }
15095 }
15096 }
15097
15098 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_m) {
15099 TEST_REQUIRES_X86_SSE2;
15100 for (uint32_t m = 1; m <= 5; m++) {
15101 GemmMicrokernelTester()
15102 .mr(5)
15103 .nr(8)
15104 .kr(1)
15105 .sr(1)
15106 .m(m)
15107 .n(8)
15108 .k(4)
15109 .iterations(1)
15110 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15111 }
15112 }
15113
15114 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_n) {
15115 TEST_REQUIRES_X86_SSE2;
15116 for (uint32_t n = 1; n <= 8; n++) {
15117 GemmMicrokernelTester()
15118 .mr(5)
15119 .nr(8)
15120 .kr(1)
15121 .sr(1)
15122 .m(5)
15123 .n(n)
15124 .k(4)
15125 .iterations(1)
15126 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15127 }
15128 }
15129
15130 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_lt_4) {
15131 TEST_REQUIRES_X86_SSE2;
15132 for (size_t k = 1; k < 4; k++) {
15133 GemmMicrokernelTester()
15134 .mr(5)
15135 .nr(8)
15136 .kr(1)
15137 .sr(1)
15138 .m(5)
15139 .n(8)
15140 .k(k)
15141 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15142 }
15143 }
15144
15145 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_lt_4_strided_a) {
15146 TEST_REQUIRES_X86_SSE2;
15147 for (size_t k = 1; k < 4; k++) {
15148 GemmMicrokernelTester()
15149 .mr(5)
15150 .nr(8)
15151 .kr(1)
15152 .sr(1)
15153 .m(5)
15154 .n(8)
15155 .k(k)
15156 .a_stride(7)
15157 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15158 }
15159 }
15160
15161 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_lt_4_subtile) {
15162 TEST_REQUIRES_X86_SSE2;
15163 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015164 for (uint32_t n = 1; n <= 8; n++) {
15165 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015166 GemmMicrokernelTester()
15167 .mr(5)
15168 .nr(8)
15169 .kr(1)
15170 .sr(1)
15171 .m(m)
15172 .n(n)
15173 .k(k)
15174 .iterations(1)
15175 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15176 }
15177 }
15178 }
15179 }
15180
15181 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_gt_4) {
15182 TEST_REQUIRES_X86_SSE2;
15183 for (size_t k = 5; k < 8; k++) {
15184 GemmMicrokernelTester()
15185 .mr(5)
15186 .nr(8)
15187 .kr(1)
15188 .sr(1)
15189 .m(5)
15190 .n(8)
15191 .k(k)
15192 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15193 }
15194 }
15195
15196 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_gt_4_strided_a) {
15197 TEST_REQUIRES_X86_SSE2;
15198 for (size_t k = 5; k < 8; k++) {
15199 GemmMicrokernelTester()
15200 .mr(5)
15201 .nr(8)
15202 .kr(1)
15203 .sr(1)
15204 .m(5)
15205 .n(8)
15206 .k(k)
15207 .a_stride(11)
15208 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15209 }
15210 }
15211
15212 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_gt_4_subtile) {
15213 TEST_REQUIRES_X86_SSE2;
15214 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015215 for (uint32_t n = 1; n <= 8; n++) {
15216 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015217 GemmMicrokernelTester()
15218 .mr(5)
15219 .nr(8)
15220 .kr(1)
15221 .sr(1)
15222 .m(m)
15223 .n(n)
15224 .k(k)
15225 .iterations(1)
15226 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15227 }
15228 }
15229 }
15230 }
15231
15232 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_div_4) {
15233 TEST_REQUIRES_X86_SSE2;
15234 for (size_t k = 8; k <= 40; k += 4) {
15235 GemmMicrokernelTester()
15236 .mr(5)
15237 .nr(8)
15238 .kr(1)
15239 .sr(1)
15240 .m(5)
15241 .n(8)
15242 .k(k)
15243 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15244 }
15245 }
15246
15247 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_div_4_strided_a) {
15248 TEST_REQUIRES_X86_SSE2;
15249 for (size_t k = 8; k <= 40; k += 4) {
15250 GemmMicrokernelTester()
15251 .mr(5)
15252 .nr(8)
15253 .kr(1)
15254 .sr(1)
15255 .m(5)
15256 .n(8)
15257 .k(k)
15258 .a_stride(43)
15259 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15260 }
15261 }
15262
15263 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, k_div_4_subtile) {
15264 TEST_REQUIRES_X86_SSE2;
15265 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015266 for (uint32_t n = 1; n <= 8; n++) {
15267 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015268 GemmMicrokernelTester()
15269 .mr(5)
15270 .nr(8)
15271 .kr(1)
15272 .sr(1)
15273 .m(m)
15274 .n(n)
15275 .k(k)
15276 .iterations(1)
15277 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15278 }
15279 }
15280 }
15281 }
15282
15283 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_gt_8) {
15284 TEST_REQUIRES_X86_SSE2;
15285 for (uint32_t n = 9; n < 16; n++) {
15286 for (size_t k = 1; k <= 20; k += 5) {
15287 GemmMicrokernelTester()
15288 .mr(5)
15289 .nr(8)
15290 .kr(1)
15291 .sr(1)
15292 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015293 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015294 .k(k)
15295 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15296 }
15297 }
15298 }
15299
15300 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_gt_8_strided_cn) {
15301 TEST_REQUIRES_X86_SSE2;
15302 for (uint32_t n = 9; n < 16; n++) {
15303 for (size_t k = 1; k <= 20; k += 5) {
15304 GemmMicrokernelTester()
15305 .mr(5)
15306 .nr(8)
15307 .kr(1)
15308 .sr(1)
15309 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015310 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015311 .k(k)
15312 .cn_stride(11)
15313 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15314 }
15315 }
15316 }
15317
15318 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_gt_8_strided_a) {
15319 TEST_REQUIRES_X86_SSE2;
15320 for (uint32_t n = 9; n < 16; n++) {
15321 for (size_t k = 1; k <= 20; k += 5) {
15322 GemmMicrokernelTester()
15323 .mr(5)
15324 .nr(8)
15325 .kr(1)
15326 .sr(1)
15327 .m(5)
15328 .n(n)
15329 .k(k)
15330 .a_stride(23)
15331 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15332 }
15333 }
15334 }
15335
15336 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_gt_8_subtile) {
15337 TEST_REQUIRES_X86_SSE2;
15338 for (uint32_t n = 9; n < 16; n++) {
15339 for (size_t k = 1; k <= 20; k += 5) {
15340 for (uint32_t m = 1; m <= 5; m++) {
15341 GemmMicrokernelTester()
15342 .mr(5)
15343 .nr(8)
15344 .kr(1)
15345 .sr(1)
15346 .m(m)
15347 .n(n)
15348 .k(k)
15349 .iterations(1)
15350 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15351 }
15352 }
15353 }
15354 }
15355
15356 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_div_8) {
15357 TEST_REQUIRES_X86_SSE2;
15358 for (uint32_t n = 16; n <= 24; n += 8) {
15359 for (size_t k = 1; k <= 20; k += 5) {
15360 GemmMicrokernelTester()
15361 .mr(5)
15362 .nr(8)
15363 .kr(1)
15364 .sr(1)
15365 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015366 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015367 .k(k)
15368 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15369 }
15370 }
15371 }
15372
15373 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_div_8_strided_cn) {
15374 TEST_REQUIRES_X86_SSE2;
15375 for (uint32_t n = 16; n <= 24; n += 8) {
15376 for (size_t k = 1; k <= 20; k += 5) {
15377 GemmMicrokernelTester()
15378 .mr(5)
15379 .nr(8)
15380 .kr(1)
15381 .sr(1)
15382 .m(5)
15383 .n(n)
15384 .k(k)
15385 .cn_stride(11)
15386 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15387 }
15388 }
15389 }
15390
15391 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_div_8_strided_a) {
15392 TEST_REQUIRES_X86_SSE2;
15393 for (uint32_t n = 16; n <= 24; n += 8) {
15394 for (size_t k = 1; k <= 20; k += 5) {
15395 GemmMicrokernelTester()
15396 .mr(5)
15397 .nr(8)
15398 .kr(1)
15399 .sr(1)
15400 .m(5)
15401 .n(n)
15402 .k(k)
15403 .a_stride(23)
15404 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15405 }
15406 }
15407 }
15408
15409 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, n_div_8_subtile) {
15410 TEST_REQUIRES_X86_SSE2;
15411 for (uint32_t n = 16; n <= 24; n += 8) {
15412 for (size_t k = 1; k <= 20; k += 5) {
15413 for (uint32_t m = 1; m <= 5; m++) {
15414 GemmMicrokernelTester()
15415 .mr(5)
15416 .nr(8)
15417 .kr(1)
15418 .sr(1)
15419 .m(m)
15420 .n(n)
15421 .k(k)
15422 .iterations(1)
15423 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15424 }
15425 }
15426 }
15427 }
15428
15429 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, strided_cm_subtile) {
15430 TEST_REQUIRES_X86_SSE2;
15431 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015432 for (uint32_t n = 1; n <= 8; n++) {
15433 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015434 GemmMicrokernelTester()
15435 .mr(5)
15436 .nr(8)
15437 .kr(1)
15438 .sr(1)
15439 .m(m)
15440 .n(n)
15441 .k(k)
15442 .cm_stride(11)
15443 .iterations(1)
15444 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15445 }
15446 }
15447 }
15448 }
15449
15450 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, qmin) {
15451 TEST_REQUIRES_X86_SSE2;
15452 GemmMicrokernelTester()
15453 .mr(5)
15454 .nr(8)
15455 .kr(1)
15456 .sr(1)
15457 .m(5)
15458 .n(8)
15459 .k(4)
15460 .qmin(128)
15461 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15462 }
15463
15464 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, qmax) {
15465 TEST_REQUIRES_X86_SSE2;
15466 GemmMicrokernelTester()
15467 .mr(5)
15468 .nr(8)
15469 .kr(1)
15470 .sr(1)
15471 .m(5)
15472 .n(8)
15473 .k(4)
15474 .qmax(128)
15475 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15476 }
15477
15478 TEST(F32_GEMMINC_MINMAX_5X8__SSE2_DUP, strided_cm) {
15479 TEST_REQUIRES_X86_SSE2;
15480 GemmMicrokernelTester()
15481 .mr(5)
15482 .nr(8)
15483 .kr(1)
15484 .sr(1)
15485 .m(5)
15486 .n(8)
15487 .k(4)
15488 .cm_stride(11)
15489 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15490 }
15491#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15492
15493
15494#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15495 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1) {
15496 TEST_REQUIRES_X86_AVX;
15497 GemmMicrokernelTester()
15498 .mr(4)
15499 .nr(8)
15500 .kr(1)
15501 .sr(1)
15502 .m(4)
15503 .n(8)
15504 .k(1)
15505 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15506 }
15507
15508 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, strided_cn) {
15509 TEST_REQUIRES_X86_AVX;
15510 GemmMicrokernelTester()
15511 .mr(4)
15512 .nr(8)
15513 .kr(1)
15514 .sr(1)
15515 .m(4)
15516 .n(8)
15517 .k(1)
15518 .cn_stride(11)
15519 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15520 }
15521
15522 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1_strided_a) {
15523 TEST_REQUIRES_X86_AVX;
15524 GemmMicrokernelTester()
15525 .mr(4)
15526 .nr(8)
15527 .kr(1)
15528 .sr(1)
15529 .m(4)
15530 .n(8)
15531 .k(1)
15532 .a_stride(3)
15533 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15534 }
15535
15536 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile) {
15537 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015538 for (uint32_t n = 1; n <= 8; n++) {
15539 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015540 GemmMicrokernelTester()
15541 .mr(4)
15542 .nr(8)
15543 .kr(1)
15544 .sr(1)
15545 .m(m)
15546 .n(n)
15547 .k(1)
15548 .iterations(1)
15549 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15550 }
15551 }
15552 }
15553
15554 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile_m) {
15555 TEST_REQUIRES_X86_AVX;
15556 for (uint32_t m = 1; m <= 4; m++) {
15557 GemmMicrokernelTester()
15558 .mr(4)
15559 .nr(8)
15560 .kr(1)
15561 .sr(1)
15562 .m(m)
15563 .n(8)
15564 .k(1)
15565 .iterations(1)
15566 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15567 }
15568 }
15569
15570 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile_n) {
15571 TEST_REQUIRES_X86_AVX;
15572 for (uint32_t n = 1; n <= 8; n++) {
15573 GemmMicrokernelTester()
15574 .mr(4)
15575 .nr(8)
15576 .kr(1)
15577 .sr(1)
15578 .m(4)
15579 .n(n)
15580 .k(1)
15581 .iterations(1)
15582 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15583 }
15584 }
15585
15586 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_gt_1) {
15587 TEST_REQUIRES_X86_AVX;
15588 for (size_t k = 2; k < 10; k++) {
15589 GemmMicrokernelTester()
15590 .mr(4)
15591 .nr(8)
15592 .kr(1)
15593 .sr(1)
15594 .m(4)
15595 .n(8)
15596 .k(k)
15597 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15598 }
15599 }
15600
15601 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_gt_1_strided_a) {
15602 TEST_REQUIRES_X86_AVX;
15603 for (size_t k = 2; k < 10; k++) {
15604 GemmMicrokernelTester()
15605 .mr(4)
15606 .nr(8)
15607 .kr(1)
15608 .sr(1)
15609 .m(4)
15610 .n(8)
15611 .k(k)
15612 .a_stride(11)
15613 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15614 }
15615 }
15616
15617 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, k_gt_1_subtile) {
15618 TEST_REQUIRES_X86_AVX;
15619 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015620 for (uint32_t n = 1; n <= 8; n++) {
15621 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015622 GemmMicrokernelTester()
15623 .mr(4)
15624 .nr(8)
15625 .kr(1)
15626 .sr(1)
15627 .m(m)
15628 .n(n)
15629 .k(k)
15630 .iterations(1)
15631 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15632 }
15633 }
15634 }
15635 }
15636
15637 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_gt_8) {
15638 TEST_REQUIRES_X86_AVX;
15639 for (uint32_t n = 9; n < 16; n++) {
15640 for (size_t k = 1; k <= 5; k += 2) {
15641 GemmMicrokernelTester()
15642 .mr(4)
15643 .nr(8)
15644 .kr(1)
15645 .sr(1)
15646 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015647 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015648 .k(k)
15649 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15650 }
15651 }
15652 }
15653
15654 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_gt_8_strided_cn) {
15655 TEST_REQUIRES_X86_AVX;
15656 for (uint32_t n = 9; n < 16; n++) {
15657 for (size_t k = 1; k <= 5; k += 2) {
15658 GemmMicrokernelTester()
15659 .mr(4)
15660 .nr(8)
15661 .kr(1)
15662 .sr(1)
15663 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015664 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015665 .k(k)
15666 .cn_stride(11)
15667 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15668 }
15669 }
15670 }
15671
15672 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_gt_8_strided_a) {
15673 TEST_REQUIRES_X86_AVX;
15674 for (uint32_t n = 9; n < 16; n++) {
15675 for (size_t k = 1; k <= 5; k += 2) {
15676 GemmMicrokernelTester()
15677 .mr(4)
15678 .nr(8)
15679 .kr(1)
15680 .sr(1)
15681 .m(4)
15682 .n(n)
15683 .k(k)
15684 .a_stride(7)
15685 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15686 }
15687 }
15688 }
15689
15690 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_gt_8_subtile) {
15691 TEST_REQUIRES_X86_AVX;
15692 for (uint32_t n = 9; n < 16; n++) {
15693 for (size_t k = 1; k <= 5; k += 2) {
15694 for (uint32_t m = 1; m <= 4; m++) {
15695 GemmMicrokernelTester()
15696 .mr(4)
15697 .nr(8)
15698 .kr(1)
15699 .sr(1)
15700 .m(m)
15701 .n(n)
15702 .k(k)
15703 .iterations(1)
15704 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15705 }
15706 }
15707 }
15708 }
15709
15710 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_div_8) {
15711 TEST_REQUIRES_X86_AVX;
15712 for (uint32_t n = 16; n <= 24; n += 8) {
15713 for (size_t k = 1; k <= 5; k += 2) {
15714 GemmMicrokernelTester()
15715 .mr(4)
15716 .nr(8)
15717 .kr(1)
15718 .sr(1)
15719 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015720 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015721 .k(k)
15722 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15723 }
15724 }
15725 }
15726
15727 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_div_8_strided_cn) {
15728 TEST_REQUIRES_X86_AVX;
15729 for (uint32_t n = 16; n <= 24; n += 8) {
15730 for (size_t k = 1; k <= 5; k += 2) {
15731 GemmMicrokernelTester()
15732 .mr(4)
15733 .nr(8)
15734 .kr(1)
15735 .sr(1)
15736 .m(4)
15737 .n(n)
15738 .k(k)
15739 .cn_stride(11)
15740 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15741 }
15742 }
15743 }
15744
15745 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_div_8_strided_a) {
15746 TEST_REQUIRES_X86_AVX;
15747 for (uint32_t n = 16; n <= 24; n += 8) {
15748 for (size_t k = 1; k <= 5; k += 2) {
15749 GemmMicrokernelTester()
15750 .mr(4)
15751 .nr(8)
15752 .kr(1)
15753 .sr(1)
15754 .m(4)
15755 .n(n)
15756 .k(k)
15757 .a_stride(7)
15758 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15759 }
15760 }
15761 }
15762
15763 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, n_div_8_subtile) {
15764 TEST_REQUIRES_X86_AVX;
15765 for (uint32_t n = 16; n <= 24; n += 8) {
15766 for (size_t k = 1; k <= 5; k += 2) {
15767 for (uint32_t m = 1; m <= 4; m++) {
15768 GemmMicrokernelTester()
15769 .mr(4)
15770 .nr(8)
15771 .kr(1)
15772 .sr(1)
15773 .m(m)
15774 .n(n)
15775 .k(k)
15776 .iterations(1)
15777 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15778 }
15779 }
15780 }
15781 }
15782
15783 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, strided_cm_subtile) {
15784 TEST_REQUIRES_X86_AVX;
15785 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015786 for (uint32_t n = 1; n <= 8; n++) {
15787 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015788 GemmMicrokernelTester()
15789 .mr(4)
15790 .nr(8)
15791 .kr(1)
15792 .sr(1)
15793 .m(m)
15794 .n(n)
15795 .k(k)
15796 .cm_stride(11)
15797 .iterations(1)
15798 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15799 }
15800 }
15801 }
15802 }
15803
15804 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, qmin) {
15805 TEST_REQUIRES_X86_AVX;
15806 GemmMicrokernelTester()
15807 .mr(4)
15808 .nr(8)
15809 .kr(1)
15810 .sr(1)
15811 .m(4)
15812 .n(8)
15813 .k(1)
15814 .qmin(128)
15815 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15816 }
15817
15818 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, qmax) {
15819 TEST_REQUIRES_X86_AVX;
15820 GemmMicrokernelTester()
15821 .mr(4)
15822 .nr(8)
15823 .kr(1)
15824 .sr(1)
15825 .m(4)
15826 .n(8)
15827 .k(1)
15828 .qmax(128)
15829 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15830 }
15831
15832 TEST(F32_GEMMINC_MINMAX_4X8__AVX_BROADCAST, strided_cm) {
15833 TEST_REQUIRES_X86_AVX;
15834 GemmMicrokernelTester()
15835 .mr(4)
15836 .nr(8)
15837 .kr(1)
15838 .sr(1)
15839 .m(4)
15840 .n(8)
15841 .k(1)
15842 .cm_stride(11)
15843 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15844 }
15845#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15846
15847
15848#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15849 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1) {
15850 TEST_REQUIRES_X86_AVX;
15851 GemmMicrokernelTester()
15852 .mr(7)
15853 .nr(8)
15854 .kr(1)
15855 .sr(1)
15856 .m(7)
15857 .n(8)
15858 .k(1)
15859 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15860 }
15861
15862 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, strided_cn) {
15863 TEST_REQUIRES_X86_AVX;
15864 GemmMicrokernelTester()
15865 .mr(7)
15866 .nr(8)
15867 .kr(1)
15868 .sr(1)
15869 .m(7)
15870 .n(8)
15871 .k(1)
15872 .cn_stride(11)
15873 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15874 }
15875
15876 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1_strided_a) {
15877 TEST_REQUIRES_X86_AVX;
15878 GemmMicrokernelTester()
15879 .mr(7)
15880 .nr(8)
15881 .kr(1)
15882 .sr(1)
15883 .m(7)
15884 .n(8)
15885 .k(1)
15886 .a_stride(3)
15887 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15888 }
15889
15890 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile) {
15891 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015892 for (uint32_t n = 1; n <= 8; n++) {
15893 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015894 GemmMicrokernelTester()
15895 .mr(7)
15896 .nr(8)
15897 .kr(1)
15898 .sr(1)
15899 .m(m)
15900 .n(n)
15901 .k(1)
15902 .iterations(1)
15903 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15904 }
15905 }
15906 }
15907
15908 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile_m) {
15909 TEST_REQUIRES_X86_AVX;
15910 for (uint32_t m = 1; m <= 7; m++) {
15911 GemmMicrokernelTester()
15912 .mr(7)
15913 .nr(8)
15914 .kr(1)
15915 .sr(1)
15916 .m(m)
15917 .n(8)
15918 .k(1)
15919 .iterations(1)
15920 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15921 }
15922 }
15923
15924 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile_n) {
15925 TEST_REQUIRES_X86_AVX;
15926 for (uint32_t n = 1; n <= 8; n++) {
15927 GemmMicrokernelTester()
15928 .mr(7)
15929 .nr(8)
15930 .kr(1)
15931 .sr(1)
15932 .m(7)
15933 .n(n)
15934 .k(1)
15935 .iterations(1)
15936 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15937 }
15938 }
15939
15940 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_gt_1) {
15941 TEST_REQUIRES_X86_AVX;
15942 for (size_t k = 2; k < 10; k++) {
15943 GemmMicrokernelTester()
15944 .mr(7)
15945 .nr(8)
15946 .kr(1)
15947 .sr(1)
15948 .m(7)
15949 .n(8)
15950 .k(k)
15951 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15952 }
15953 }
15954
15955 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_gt_1_strided_a) {
15956 TEST_REQUIRES_X86_AVX;
15957 for (size_t k = 2; k < 10; k++) {
15958 GemmMicrokernelTester()
15959 .mr(7)
15960 .nr(8)
15961 .kr(1)
15962 .sr(1)
15963 .m(7)
15964 .n(8)
15965 .k(k)
15966 .a_stride(11)
15967 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15968 }
15969 }
15970
15971 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, k_gt_1_subtile) {
15972 TEST_REQUIRES_X86_AVX;
15973 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015974 for (uint32_t n = 1; n <= 8; n++) {
15975 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015976 GemmMicrokernelTester()
15977 .mr(7)
15978 .nr(8)
15979 .kr(1)
15980 .sr(1)
15981 .m(m)
15982 .n(n)
15983 .k(k)
15984 .iterations(1)
15985 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15986 }
15987 }
15988 }
15989 }
15990
15991 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_gt_8) {
15992 TEST_REQUIRES_X86_AVX;
15993 for (uint32_t n = 9; n < 16; n++) {
15994 for (size_t k = 1; k <= 5; k += 2) {
15995 GemmMicrokernelTester()
15996 .mr(7)
15997 .nr(8)
15998 .kr(1)
15999 .sr(1)
16000 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016001 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016002 .k(k)
16003 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16004 }
16005 }
16006 }
16007
16008 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_gt_8_strided_cn) {
16009 TEST_REQUIRES_X86_AVX;
16010 for (uint32_t n = 9; n < 16; n++) {
16011 for (size_t k = 1; k <= 5; k += 2) {
16012 GemmMicrokernelTester()
16013 .mr(7)
16014 .nr(8)
16015 .kr(1)
16016 .sr(1)
16017 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016018 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016019 .k(k)
16020 .cn_stride(11)
16021 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16022 }
16023 }
16024 }
16025
16026 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_gt_8_strided_a) {
16027 TEST_REQUIRES_X86_AVX;
16028 for (uint32_t n = 9; n < 16; n++) {
16029 for (size_t k = 1; k <= 5; k += 2) {
16030 GemmMicrokernelTester()
16031 .mr(7)
16032 .nr(8)
16033 .kr(1)
16034 .sr(1)
16035 .m(7)
16036 .n(n)
16037 .k(k)
16038 .a_stride(7)
16039 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16040 }
16041 }
16042 }
16043
16044 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_gt_8_subtile) {
16045 TEST_REQUIRES_X86_AVX;
16046 for (uint32_t n = 9; n < 16; n++) {
16047 for (size_t k = 1; k <= 5; k += 2) {
16048 for (uint32_t m = 1; m <= 7; m++) {
16049 GemmMicrokernelTester()
16050 .mr(7)
16051 .nr(8)
16052 .kr(1)
16053 .sr(1)
16054 .m(m)
16055 .n(n)
16056 .k(k)
16057 .iterations(1)
16058 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16059 }
16060 }
16061 }
16062 }
16063
16064 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_div_8) {
16065 TEST_REQUIRES_X86_AVX;
16066 for (uint32_t n = 16; n <= 24; n += 8) {
16067 for (size_t k = 1; k <= 5; k += 2) {
16068 GemmMicrokernelTester()
16069 .mr(7)
16070 .nr(8)
16071 .kr(1)
16072 .sr(1)
16073 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016074 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016075 .k(k)
16076 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16077 }
16078 }
16079 }
16080
16081 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_div_8_strided_cn) {
16082 TEST_REQUIRES_X86_AVX;
16083 for (uint32_t n = 16; n <= 24; n += 8) {
16084 for (size_t k = 1; k <= 5; k += 2) {
16085 GemmMicrokernelTester()
16086 .mr(7)
16087 .nr(8)
16088 .kr(1)
16089 .sr(1)
16090 .m(7)
16091 .n(n)
16092 .k(k)
16093 .cn_stride(11)
16094 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16095 }
16096 }
16097 }
16098
16099 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_div_8_strided_a) {
16100 TEST_REQUIRES_X86_AVX;
16101 for (uint32_t n = 16; n <= 24; n += 8) {
16102 for (size_t k = 1; k <= 5; k += 2) {
16103 GemmMicrokernelTester()
16104 .mr(7)
16105 .nr(8)
16106 .kr(1)
16107 .sr(1)
16108 .m(7)
16109 .n(n)
16110 .k(k)
16111 .a_stride(7)
16112 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16113 }
16114 }
16115 }
16116
16117 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, n_div_8_subtile) {
16118 TEST_REQUIRES_X86_AVX;
16119 for (uint32_t n = 16; n <= 24; n += 8) {
16120 for (size_t k = 1; k <= 5; k += 2) {
16121 for (uint32_t m = 1; m <= 7; m++) {
16122 GemmMicrokernelTester()
16123 .mr(7)
16124 .nr(8)
16125 .kr(1)
16126 .sr(1)
16127 .m(m)
16128 .n(n)
16129 .k(k)
16130 .iterations(1)
16131 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16132 }
16133 }
16134 }
16135 }
16136
16137 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, strided_cm_subtile) {
16138 TEST_REQUIRES_X86_AVX;
16139 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016140 for (uint32_t n = 1; n <= 8; n++) {
16141 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016142 GemmMicrokernelTester()
16143 .mr(7)
16144 .nr(8)
16145 .kr(1)
16146 .sr(1)
16147 .m(m)
16148 .n(n)
16149 .k(k)
16150 .cm_stride(11)
16151 .iterations(1)
16152 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16153 }
16154 }
16155 }
16156 }
16157
16158 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, qmin) {
16159 TEST_REQUIRES_X86_AVX;
16160 GemmMicrokernelTester()
16161 .mr(7)
16162 .nr(8)
16163 .kr(1)
16164 .sr(1)
16165 .m(7)
16166 .n(8)
16167 .k(1)
16168 .qmin(128)
16169 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16170 }
16171
16172 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, qmax) {
16173 TEST_REQUIRES_X86_AVX;
16174 GemmMicrokernelTester()
16175 .mr(7)
16176 .nr(8)
16177 .kr(1)
16178 .sr(1)
16179 .m(7)
16180 .n(8)
16181 .k(1)
16182 .qmax(128)
16183 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16184 }
16185
16186 TEST(F32_GEMMINC_MINMAX_7X8__AVX_BROADCAST, strided_cm) {
16187 TEST_REQUIRES_X86_AVX;
16188 GemmMicrokernelTester()
16189 .mr(7)
16190 .nr(8)
16191 .kr(1)
16192 .sr(1)
16193 .m(7)
16194 .n(8)
16195 .k(1)
16196 .cm_stride(11)
16197 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16198 }
16199#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16200
16201
16202#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16203 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1) {
16204 TEST_REQUIRES_X86_AVX;
16205 GemmMicrokernelTester()
16206 .mr(3)
16207 .nr(16)
16208 .kr(1)
16209 .sr(1)
16210 .m(3)
16211 .n(16)
16212 .k(1)
16213 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16214 }
16215
16216 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, strided_cn) {
16217 TEST_REQUIRES_X86_AVX;
16218 GemmMicrokernelTester()
16219 .mr(3)
16220 .nr(16)
16221 .kr(1)
16222 .sr(1)
16223 .m(3)
16224 .n(16)
16225 .k(1)
16226 .cn_stride(19)
16227 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16228 }
16229
16230 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1_strided_a) {
16231 TEST_REQUIRES_X86_AVX;
16232 GemmMicrokernelTester()
16233 .mr(3)
16234 .nr(16)
16235 .kr(1)
16236 .sr(1)
16237 .m(3)
16238 .n(16)
16239 .k(1)
16240 .a_stride(3)
16241 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16242 }
16243
16244 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile) {
16245 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016246 for (uint32_t n = 1; n <= 16; n++) {
16247 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016248 GemmMicrokernelTester()
16249 .mr(3)
16250 .nr(16)
16251 .kr(1)
16252 .sr(1)
16253 .m(m)
16254 .n(n)
16255 .k(1)
16256 .iterations(1)
16257 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16258 }
16259 }
16260 }
16261
16262 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_m) {
16263 TEST_REQUIRES_X86_AVX;
16264 for (uint32_t m = 1; m <= 3; m++) {
16265 GemmMicrokernelTester()
16266 .mr(3)
16267 .nr(16)
16268 .kr(1)
16269 .sr(1)
16270 .m(m)
16271 .n(16)
16272 .k(1)
16273 .iterations(1)
16274 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16275 }
16276 }
16277
16278 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_n) {
16279 TEST_REQUIRES_X86_AVX;
16280 for (uint32_t n = 1; n <= 16; n++) {
16281 GemmMicrokernelTester()
16282 .mr(3)
16283 .nr(16)
16284 .kr(1)
16285 .sr(1)
16286 .m(3)
16287 .n(n)
16288 .k(1)
16289 .iterations(1)
16290 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16291 }
16292 }
16293
16294 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_gt_1) {
16295 TEST_REQUIRES_X86_AVX;
16296 for (size_t k = 2; k < 10; k++) {
16297 GemmMicrokernelTester()
16298 .mr(3)
16299 .nr(16)
16300 .kr(1)
16301 .sr(1)
16302 .m(3)
16303 .n(16)
16304 .k(k)
16305 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16306 }
16307 }
16308
16309 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_gt_1_strided_a) {
16310 TEST_REQUIRES_X86_AVX;
16311 for (size_t k = 2; k < 10; k++) {
16312 GemmMicrokernelTester()
16313 .mr(3)
16314 .nr(16)
16315 .kr(1)
16316 .sr(1)
16317 .m(3)
16318 .n(16)
16319 .k(k)
16320 .a_stride(11)
16321 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16322 }
16323 }
16324
16325 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, k_gt_1_subtile) {
16326 TEST_REQUIRES_X86_AVX;
16327 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016328 for (uint32_t n = 1; n <= 16; n++) {
16329 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016330 GemmMicrokernelTester()
16331 .mr(3)
16332 .nr(16)
16333 .kr(1)
16334 .sr(1)
16335 .m(m)
16336 .n(n)
16337 .k(k)
16338 .iterations(1)
16339 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16340 }
16341 }
16342 }
16343 }
16344
16345 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_gt_16) {
16346 TEST_REQUIRES_X86_AVX;
16347 for (uint32_t n = 17; n < 32; n++) {
16348 for (size_t k = 1; k <= 5; k += 2) {
16349 GemmMicrokernelTester()
16350 .mr(3)
16351 .nr(16)
16352 .kr(1)
16353 .sr(1)
16354 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016355 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016356 .k(k)
16357 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16358 }
16359 }
16360 }
16361
16362 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_gt_16_strided_cn) {
16363 TEST_REQUIRES_X86_AVX;
16364 for (uint32_t n = 17; n < 32; n++) {
16365 for (size_t k = 1; k <= 5; k += 2) {
16366 GemmMicrokernelTester()
16367 .mr(3)
16368 .nr(16)
16369 .kr(1)
16370 .sr(1)
16371 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016372 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016373 .k(k)
16374 .cn_stride(19)
16375 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16376 }
16377 }
16378 }
16379
16380 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_gt_16_strided_a) {
16381 TEST_REQUIRES_X86_AVX;
16382 for (uint32_t n = 17; n < 32; n++) {
16383 for (size_t k = 1; k <= 5; k += 2) {
16384 GemmMicrokernelTester()
16385 .mr(3)
16386 .nr(16)
16387 .kr(1)
16388 .sr(1)
16389 .m(3)
16390 .n(n)
16391 .k(k)
16392 .a_stride(7)
16393 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16394 }
16395 }
16396 }
16397
16398 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_gt_16_subtile) {
16399 TEST_REQUIRES_X86_AVX;
16400 for (uint32_t n = 17; n < 32; n++) {
16401 for (size_t k = 1; k <= 5; k += 2) {
16402 for (uint32_t m = 1; m <= 3; m++) {
16403 GemmMicrokernelTester()
16404 .mr(3)
16405 .nr(16)
16406 .kr(1)
16407 .sr(1)
16408 .m(m)
16409 .n(n)
16410 .k(k)
16411 .iterations(1)
16412 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16413 }
16414 }
16415 }
16416 }
16417
16418 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_div_16) {
16419 TEST_REQUIRES_X86_AVX;
16420 for (uint32_t n = 32; n <= 48; n += 16) {
16421 for (size_t k = 1; k <= 5; k += 2) {
16422 GemmMicrokernelTester()
16423 .mr(3)
16424 .nr(16)
16425 .kr(1)
16426 .sr(1)
16427 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016428 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016429 .k(k)
16430 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16431 }
16432 }
16433 }
16434
16435 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_div_16_strided_cn) {
16436 TEST_REQUIRES_X86_AVX;
16437 for (uint32_t n = 32; n <= 48; n += 16) {
16438 for (size_t k = 1; k <= 5; k += 2) {
16439 GemmMicrokernelTester()
16440 .mr(3)
16441 .nr(16)
16442 .kr(1)
16443 .sr(1)
16444 .m(3)
16445 .n(n)
16446 .k(k)
16447 .cn_stride(19)
16448 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16449 }
16450 }
16451 }
16452
16453 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_div_16_strided_a) {
16454 TEST_REQUIRES_X86_AVX;
16455 for (uint32_t n = 32; n <= 48; n += 16) {
16456 for (size_t k = 1; k <= 5; k += 2) {
16457 GemmMicrokernelTester()
16458 .mr(3)
16459 .nr(16)
16460 .kr(1)
16461 .sr(1)
16462 .m(3)
16463 .n(n)
16464 .k(k)
16465 .a_stride(7)
16466 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16467 }
16468 }
16469 }
16470
16471 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, n_div_16_subtile) {
16472 TEST_REQUIRES_X86_AVX;
16473 for (uint32_t n = 32; n <= 48; n += 16) {
16474 for (size_t k = 1; k <= 5; k += 2) {
16475 for (uint32_t m = 1; m <= 3; m++) {
16476 GemmMicrokernelTester()
16477 .mr(3)
16478 .nr(16)
16479 .kr(1)
16480 .sr(1)
16481 .m(m)
16482 .n(n)
16483 .k(k)
16484 .iterations(1)
16485 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16486 }
16487 }
16488 }
16489 }
16490
16491 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, strided_cm_subtile) {
16492 TEST_REQUIRES_X86_AVX;
16493 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016494 for (uint32_t n = 1; n <= 16; n++) {
16495 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016496 GemmMicrokernelTester()
16497 .mr(3)
16498 .nr(16)
16499 .kr(1)
16500 .sr(1)
16501 .m(m)
16502 .n(n)
16503 .k(k)
16504 .cm_stride(19)
16505 .iterations(1)
16506 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16507 }
16508 }
16509 }
16510 }
16511
16512 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, qmin) {
16513 TEST_REQUIRES_X86_AVX;
16514 GemmMicrokernelTester()
16515 .mr(3)
16516 .nr(16)
16517 .kr(1)
16518 .sr(1)
16519 .m(3)
16520 .n(16)
16521 .k(1)
16522 .qmin(128)
16523 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16524 }
16525
16526 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, qmax) {
16527 TEST_REQUIRES_X86_AVX;
16528 GemmMicrokernelTester()
16529 .mr(3)
16530 .nr(16)
16531 .kr(1)
16532 .sr(1)
16533 .m(3)
16534 .n(16)
16535 .k(1)
16536 .qmax(128)
16537 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16538 }
16539
16540 TEST(F32_GEMMINC_MINMAX_3X16__AVX_BROADCAST, strided_cm) {
16541 TEST_REQUIRES_X86_AVX;
16542 GemmMicrokernelTester()
16543 .mr(3)
16544 .nr(16)
16545 .kr(1)
16546 .sr(1)
16547 .m(3)
16548 .n(16)
16549 .k(1)
16550 .cm_stride(19)
16551 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16552 }
16553#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16554
16555
16556#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16557 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1) {
16558 TEST_REQUIRES_X86_FMA3;
16559 GemmMicrokernelTester()
16560 .mr(1)
16561 .nr(8)
16562 .kr(1)
16563 .sr(1)
16564 .m(1)
16565 .n(8)
16566 .k(1)
16567 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16568 }
16569
16570 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, strided_cn) {
16571 TEST_REQUIRES_X86_FMA3;
16572 GemmMicrokernelTester()
16573 .mr(1)
16574 .nr(8)
16575 .kr(1)
16576 .sr(1)
16577 .m(1)
16578 .n(8)
16579 .k(1)
16580 .cn_stride(11)
16581 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16582 }
16583
16584 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_strided_a) {
16585 TEST_REQUIRES_X86_FMA3;
16586 GemmMicrokernelTester()
16587 .mr(1)
16588 .nr(8)
16589 .kr(1)
16590 .sr(1)
16591 .m(1)
16592 .n(8)
16593 .k(1)
16594 .a_stride(3)
16595 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16596 }
16597
16598 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile) {
16599 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016600 for (uint32_t n = 1; n <= 8; n++) {
16601 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016602 GemmMicrokernelTester()
16603 .mr(1)
16604 .nr(8)
16605 .kr(1)
16606 .sr(1)
16607 .m(m)
16608 .n(n)
16609 .k(1)
16610 .iterations(1)
16611 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16612 }
16613 }
16614 }
16615
16616 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
16617 TEST_REQUIRES_X86_FMA3;
16618 for (uint32_t m = 1; m <= 1; m++) {
16619 GemmMicrokernelTester()
16620 .mr(1)
16621 .nr(8)
16622 .kr(1)
16623 .sr(1)
16624 .m(m)
16625 .n(8)
16626 .k(1)
16627 .iterations(1)
16628 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16629 }
16630 }
16631
16632 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
16633 TEST_REQUIRES_X86_FMA3;
16634 for (uint32_t n = 1; n <= 8; n++) {
16635 GemmMicrokernelTester()
16636 .mr(1)
16637 .nr(8)
16638 .kr(1)
16639 .sr(1)
16640 .m(1)
16641 .n(n)
16642 .k(1)
16643 .iterations(1)
16644 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16645 }
16646 }
16647
16648 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_gt_1) {
16649 TEST_REQUIRES_X86_FMA3;
16650 for (size_t k = 2; k < 10; k++) {
16651 GemmMicrokernelTester()
16652 .mr(1)
16653 .nr(8)
16654 .kr(1)
16655 .sr(1)
16656 .m(1)
16657 .n(8)
16658 .k(k)
16659 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16660 }
16661 }
16662
16663 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_gt_1_strided_a) {
16664 TEST_REQUIRES_X86_FMA3;
16665 for (size_t k = 2; k < 10; k++) {
16666 GemmMicrokernelTester()
16667 .mr(1)
16668 .nr(8)
16669 .kr(1)
16670 .sr(1)
16671 .m(1)
16672 .n(8)
16673 .k(k)
16674 .a_stride(11)
16675 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16676 }
16677 }
16678
16679 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, k_gt_1_subtile) {
16680 TEST_REQUIRES_X86_FMA3;
16681 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016682 for (uint32_t n = 1; n <= 8; n++) {
16683 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016684 GemmMicrokernelTester()
16685 .mr(1)
16686 .nr(8)
16687 .kr(1)
16688 .sr(1)
16689 .m(m)
16690 .n(n)
16691 .k(k)
16692 .iterations(1)
16693 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16694 }
16695 }
16696 }
16697 }
16698
16699 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_gt_8) {
16700 TEST_REQUIRES_X86_FMA3;
16701 for (uint32_t n = 9; n < 16; n++) {
16702 for (size_t k = 1; k <= 5; k += 2) {
16703 GemmMicrokernelTester()
16704 .mr(1)
16705 .nr(8)
16706 .kr(1)
16707 .sr(1)
16708 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016709 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016710 .k(k)
16711 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16712 }
16713 }
16714 }
16715
16716 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
16717 TEST_REQUIRES_X86_FMA3;
16718 for (uint32_t n = 9; n < 16; n++) {
16719 for (size_t k = 1; k <= 5; k += 2) {
16720 GemmMicrokernelTester()
16721 .mr(1)
16722 .nr(8)
16723 .kr(1)
16724 .sr(1)
16725 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016726 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016727 .k(k)
16728 .cn_stride(11)
16729 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16730 }
16731 }
16732 }
16733
16734 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_strided_a) {
16735 TEST_REQUIRES_X86_FMA3;
16736 for (uint32_t n = 9; n < 16; n++) {
16737 for (size_t k = 1; k <= 5; k += 2) {
16738 GemmMicrokernelTester()
16739 .mr(1)
16740 .nr(8)
16741 .kr(1)
16742 .sr(1)
16743 .m(1)
16744 .n(n)
16745 .k(k)
16746 .a_stride(7)
16747 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16748 }
16749 }
16750 }
16751
16752 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_subtile) {
16753 TEST_REQUIRES_X86_FMA3;
16754 for (uint32_t n = 9; n < 16; n++) {
16755 for (size_t k = 1; k <= 5; k += 2) {
16756 for (uint32_t m = 1; m <= 1; m++) {
16757 GemmMicrokernelTester()
16758 .mr(1)
16759 .nr(8)
16760 .kr(1)
16761 .sr(1)
16762 .m(m)
16763 .n(n)
16764 .k(k)
16765 .iterations(1)
16766 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16767 }
16768 }
16769 }
16770 }
16771
16772 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_div_8) {
16773 TEST_REQUIRES_X86_FMA3;
16774 for (uint32_t n = 16; n <= 24; n += 8) {
16775 for (size_t k = 1; k <= 5; k += 2) {
16776 GemmMicrokernelTester()
16777 .mr(1)
16778 .nr(8)
16779 .kr(1)
16780 .sr(1)
16781 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016782 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016783 .k(k)
16784 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16785 }
16786 }
16787 }
16788
16789 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_div_8_strided_cn) {
16790 TEST_REQUIRES_X86_FMA3;
16791 for (uint32_t n = 16; n <= 24; n += 8) {
16792 for (size_t k = 1; k <= 5; k += 2) {
16793 GemmMicrokernelTester()
16794 .mr(1)
16795 .nr(8)
16796 .kr(1)
16797 .sr(1)
16798 .m(1)
16799 .n(n)
16800 .k(k)
16801 .cn_stride(11)
16802 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16803 }
16804 }
16805 }
16806
16807 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_div_8_strided_a) {
16808 TEST_REQUIRES_X86_FMA3;
16809 for (uint32_t n = 16; n <= 24; n += 8) {
16810 for (size_t k = 1; k <= 5; k += 2) {
16811 GemmMicrokernelTester()
16812 .mr(1)
16813 .nr(8)
16814 .kr(1)
16815 .sr(1)
16816 .m(1)
16817 .n(n)
16818 .k(k)
16819 .a_stride(7)
16820 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16821 }
16822 }
16823 }
16824
16825 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, n_div_8_subtile) {
16826 TEST_REQUIRES_X86_FMA3;
16827 for (uint32_t n = 16; n <= 24; n += 8) {
16828 for (size_t k = 1; k <= 5; k += 2) {
16829 for (uint32_t m = 1; m <= 1; m++) {
16830 GemmMicrokernelTester()
16831 .mr(1)
16832 .nr(8)
16833 .kr(1)
16834 .sr(1)
16835 .m(m)
16836 .n(n)
16837 .k(k)
16838 .iterations(1)
16839 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16840 }
16841 }
16842 }
16843 }
16844
16845 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, strided_cm_subtile) {
16846 TEST_REQUIRES_X86_FMA3;
16847 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016848 for (uint32_t n = 1; n <= 8; n++) {
16849 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016850 GemmMicrokernelTester()
16851 .mr(1)
16852 .nr(8)
16853 .kr(1)
16854 .sr(1)
16855 .m(m)
16856 .n(n)
16857 .k(k)
16858 .cm_stride(11)
16859 .iterations(1)
16860 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16861 }
16862 }
16863 }
16864 }
16865
16866 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, qmin) {
16867 TEST_REQUIRES_X86_FMA3;
16868 GemmMicrokernelTester()
16869 .mr(1)
16870 .nr(8)
16871 .kr(1)
16872 .sr(1)
16873 .m(1)
16874 .n(8)
16875 .k(1)
16876 .qmin(128)
16877 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16878 }
16879
16880 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, qmax) {
16881 TEST_REQUIRES_X86_FMA3;
16882 GemmMicrokernelTester()
16883 .mr(1)
16884 .nr(8)
16885 .kr(1)
16886 .sr(1)
16887 .m(1)
16888 .n(8)
16889 .k(1)
16890 .qmax(128)
16891 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16892 }
16893
16894 TEST(F32_GEMMINC_MINMAX_1X8__FMA3_BROADCAST, strided_cm) {
16895 TEST_REQUIRES_X86_FMA3;
16896 GemmMicrokernelTester()
16897 .mr(1)
16898 .nr(8)
16899 .kr(1)
16900 .sr(1)
16901 .m(1)
16902 .n(8)
16903 .k(1)
16904 .cm_stride(11)
16905 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16906 }
16907#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16908
16909
16910#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16911 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1) {
16912 TEST_REQUIRES_X86_FMA3;
16913 GemmMicrokernelTester()
16914 .mr(4)
16915 .nr(8)
16916 .kr(1)
16917 .sr(1)
16918 .m(4)
16919 .n(8)
16920 .k(1)
16921 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16922 }
16923
16924 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, strided_cn) {
16925 TEST_REQUIRES_X86_FMA3;
16926 GemmMicrokernelTester()
16927 .mr(4)
16928 .nr(8)
16929 .kr(1)
16930 .sr(1)
16931 .m(4)
16932 .n(8)
16933 .k(1)
16934 .cn_stride(11)
16935 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16936 }
16937
16938 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_strided_a) {
16939 TEST_REQUIRES_X86_FMA3;
16940 GemmMicrokernelTester()
16941 .mr(4)
16942 .nr(8)
16943 .kr(1)
16944 .sr(1)
16945 .m(4)
16946 .n(8)
16947 .k(1)
16948 .a_stride(3)
16949 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16950 }
16951
16952 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile) {
16953 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016954 for (uint32_t n = 1; n <= 8; n++) {
16955 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016956 GemmMicrokernelTester()
16957 .mr(4)
16958 .nr(8)
16959 .kr(1)
16960 .sr(1)
16961 .m(m)
16962 .n(n)
16963 .k(1)
16964 .iterations(1)
16965 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16966 }
16967 }
16968 }
16969
16970 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
16971 TEST_REQUIRES_X86_FMA3;
16972 for (uint32_t m = 1; m <= 4; m++) {
16973 GemmMicrokernelTester()
16974 .mr(4)
16975 .nr(8)
16976 .kr(1)
16977 .sr(1)
16978 .m(m)
16979 .n(8)
16980 .k(1)
16981 .iterations(1)
16982 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16983 }
16984 }
16985
16986 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
16987 TEST_REQUIRES_X86_FMA3;
16988 for (uint32_t n = 1; n <= 8; n++) {
16989 GemmMicrokernelTester()
16990 .mr(4)
16991 .nr(8)
16992 .kr(1)
16993 .sr(1)
16994 .m(4)
16995 .n(n)
16996 .k(1)
16997 .iterations(1)
16998 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
16999 }
17000 }
17001
17002 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_gt_1) {
17003 TEST_REQUIRES_X86_FMA3;
17004 for (size_t k = 2; k < 10; k++) {
17005 GemmMicrokernelTester()
17006 .mr(4)
17007 .nr(8)
17008 .kr(1)
17009 .sr(1)
17010 .m(4)
17011 .n(8)
17012 .k(k)
17013 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17014 }
17015 }
17016
17017 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_gt_1_strided_a) {
17018 TEST_REQUIRES_X86_FMA3;
17019 for (size_t k = 2; k < 10; k++) {
17020 GemmMicrokernelTester()
17021 .mr(4)
17022 .nr(8)
17023 .kr(1)
17024 .sr(1)
17025 .m(4)
17026 .n(8)
17027 .k(k)
17028 .a_stride(11)
17029 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17030 }
17031 }
17032
17033 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, k_gt_1_subtile) {
17034 TEST_REQUIRES_X86_FMA3;
17035 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017036 for (uint32_t n = 1; n <= 8; n++) {
17037 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017038 GemmMicrokernelTester()
17039 .mr(4)
17040 .nr(8)
17041 .kr(1)
17042 .sr(1)
17043 .m(m)
17044 .n(n)
17045 .k(k)
17046 .iterations(1)
17047 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17048 }
17049 }
17050 }
17051 }
17052
17053 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_gt_8) {
17054 TEST_REQUIRES_X86_FMA3;
17055 for (uint32_t n = 9; n < 16; n++) {
17056 for (size_t k = 1; k <= 5; k += 2) {
17057 GemmMicrokernelTester()
17058 .mr(4)
17059 .nr(8)
17060 .kr(1)
17061 .sr(1)
17062 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017063 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017064 .k(k)
17065 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17066 }
17067 }
17068 }
17069
17070 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
17071 TEST_REQUIRES_X86_FMA3;
17072 for (uint32_t n = 9; n < 16; n++) {
17073 for (size_t k = 1; k <= 5; k += 2) {
17074 GemmMicrokernelTester()
17075 .mr(4)
17076 .nr(8)
17077 .kr(1)
17078 .sr(1)
17079 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017080 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017081 .k(k)
17082 .cn_stride(11)
17083 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17084 }
17085 }
17086 }
17087
17088 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_strided_a) {
17089 TEST_REQUIRES_X86_FMA3;
17090 for (uint32_t n = 9; n < 16; n++) {
17091 for (size_t k = 1; k <= 5; k += 2) {
17092 GemmMicrokernelTester()
17093 .mr(4)
17094 .nr(8)
17095 .kr(1)
17096 .sr(1)
17097 .m(4)
17098 .n(n)
17099 .k(k)
17100 .a_stride(7)
17101 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17102 }
17103 }
17104 }
17105
17106 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_subtile) {
17107 TEST_REQUIRES_X86_FMA3;
17108 for (uint32_t n = 9; n < 16; n++) {
17109 for (size_t k = 1; k <= 5; k += 2) {
17110 for (uint32_t m = 1; m <= 4; m++) {
17111 GemmMicrokernelTester()
17112 .mr(4)
17113 .nr(8)
17114 .kr(1)
17115 .sr(1)
17116 .m(m)
17117 .n(n)
17118 .k(k)
17119 .iterations(1)
17120 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17121 }
17122 }
17123 }
17124 }
17125
17126 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_div_8) {
17127 TEST_REQUIRES_X86_FMA3;
17128 for (uint32_t n = 16; n <= 24; n += 8) {
17129 for (size_t k = 1; k <= 5; k += 2) {
17130 GemmMicrokernelTester()
17131 .mr(4)
17132 .nr(8)
17133 .kr(1)
17134 .sr(1)
17135 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017136 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017137 .k(k)
17138 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17139 }
17140 }
17141 }
17142
17143 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_div_8_strided_cn) {
17144 TEST_REQUIRES_X86_FMA3;
17145 for (uint32_t n = 16; n <= 24; n += 8) {
17146 for (size_t k = 1; k <= 5; k += 2) {
17147 GemmMicrokernelTester()
17148 .mr(4)
17149 .nr(8)
17150 .kr(1)
17151 .sr(1)
17152 .m(4)
17153 .n(n)
17154 .k(k)
17155 .cn_stride(11)
17156 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17157 }
17158 }
17159 }
17160
17161 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_div_8_strided_a) {
17162 TEST_REQUIRES_X86_FMA3;
17163 for (uint32_t n = 16; n <= 24; n += 8) {
17164 for (size_t k = 1; k <= 5; k += 2) {
17165 GemmMicrokernelTester()
17166 .mr(4)
17167 .nr(8)
17168 .kr(1)
17169 .sr(1)
17170 .m(4)
17171 .n(n)
17172 .k(k)
17173 .a_stride(7)
17174 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17175 }
17176 }
17177 }
17178
17179 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, n_div_8_subtile) {
17180 TEST_REQUIRES_X86_FMA3;
17181 for (uint32_t n = 16; n <= 24; n += 8) {
17182 for (size_t k = 1; k <= 5; k += 2) {
17183 for (uint32_t m = 1; m <= 4; m++) {
17184 GemmMicrokernelTester()
17185 .mr(4)
17186 .nr(8)
17187 .kr(1)
17188 .sr(1)
17189 .m(m)
17190 .n(n)
17191 .k(k)
17192 .iterations(1)
17193 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17194 }
17195 }
17196 }
17197 }
17198
17199 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, strided_cm_subtile) {
17200 TEST_REQUIRES_X86_FMA3;
17201 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017202 for (uint32_t n = 1; n <= 8; n++) {
17203 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017204 GemmMicrokernelTester()
17205 .mr(4)
17206 .nr(8)
17207 .kr(1)
17208 .sr(1)
17209 .m(m)
17210 .n(n)
17211 .k(k)
17212 .cm_stride(11)
17213 .iterations(1)
17214 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17215 }
17216 }
17217 }
17218 }
17219
17220 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, qmin) {
17221 TEST_REQUIRES_X86_FMA3;
17222 GemmMicrokernelTester()
17223 .mr(4)
17224 .nr(8)
17225 .kr(1)
17226 .sr(1)
17227 .m(4)
17228 .n(8)
17229 .k(1)
17230 .qmin(128)
17231 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17232 }
17233
17234 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, qmax) {
17235 TEST_REQUIRES_X86_FMA3;
17236 GemmMicrokernelTester()
17237 .mr(4)
17238 .nr(8)
17239 .kr(1)
17240 .sr(1)
17241 .m(4)
17242 .n(8)
17243 .k(1)
17244 .qmax(128)
17245 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17246 }
17247
17248 TEST(F32_GEMMINC_MINMAX_4X8__FMA3_BROADCAST, strided_cm) {
17249 TEST_REQUIRES_X86_FMA3;
17250 GemmMicrokernelTester()
17251 .mr(4)
17252 .nr(8)
17253 .kr(1)
17254 .sr(1)
17255 .m(4)
17256 .n(8)
17257 .k(1)
17258 .cm_stride(11)
17259 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17260 }
17261#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17262
17263
17264#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17265 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1) {
17266 TEST_REQUIRES_X86_FMA3;
17267 GemmMicrokernelTester()
17268 .mr(5)
17269 .nr(8)
17270 .kr(1)
17271 .sr(1)
17272 .m(5)
17273 .n(8)
17274 .k(1)
17275 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17276 }
17277
17278 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, strided_cn) {
17279 TEST_REQUIRES_X86_FMA3;
17280 GemmMicrokernelTester()
17281 .mr(5)
17282 .nr(8)
17283 .kr(1)
17284 .sr(1)
17285 .m(5)
17286 .n(8)
17287 .k(1)
17288 .cn_stride(11)
17289 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17290 }
17291
17292 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_strided_a) {
17293 TEST_REQUIRES_X86_FMA3;
17294 GemmMicrokernelTester()
17295 .mr(5)
17296 .nr(8)
17297 .kr(1)
17298 .sr(1)
17299 .m(5)
17300 .n(8)
17301 .k(1)
17302 .a_stride(3)
17303 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17304 }
17305
17306 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile) {
17307 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017308 for (uint32_t n = 1; n <= 8; n++) {
17309 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017310 GemmMicrokernelTester()
17311 .mr(5)
17312 .nr(8)
17313 .kr(1)
17314 .sr(1)
17315 .m(m)
17316 .n(n)
17317 .k(1)
17318 .iterations(1)
17319 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17320 }
17321 }
17322 }
17323
17324 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
17325 TEST_REQUIRES_X86_FMA3;
17326 for (uint32_t m = 1; m <= 5; m++) {
17327 GemmMicrokernelTester()
17328 .mr(5)
17329 .nr(8)
17330 .kr(1)
17331 .sr(1)
17332 .m(m)
17333 .n(8)
17334 .k(1)
17335 .iterations(1)
17336 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17337 }
17338 }
17339
17340 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
17341 TEST_REQUIRES_X86_FMA3;
17342 for (uint32_t n = 1; n <= 8; n++) {
17343 GemmMicrokernelTester()
17344 .mr(5)
17345 .nr(8)
17346 .kr(1)
17347 .sr(1)
17348 .m(5)
17349 .n(n)
17350 .k(1)
17351 .iterations(1)
17352 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17353 }
17354 }
17355
17356 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_gt_1) {
17357 TEST_REQUIRES_X86_FMA3;
17358 for (size_t k = 2; k < 10; k++) {
17359 GemmMicrokernelTester()
17360 .mr(5)
17361 .nr(8)
17362 .kr(1)
17363 .sr(1)
17364 .m(5)
17365 .n(8)
17366 .k(k)
17367 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17368 }
17369 }
17370
17371 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_gt_1_strided_a) {
17372 TEST_REQUIRES_X86_FMA3;
17373 for (size_t k = 2; k < 10; k++) {
17374 GemmMicrokernelTester()
17375 .mr(5)
17376 .nr(8)
17377 .kr(1)
17378 .sr(1)
17379 .m(5)
17380 .n(8)
17381 .k(k)
17382 .a_stride(11)
17383 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17384 }
17385 }
17386
17387 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, k_gt_1_subtile) {
17388 TEST_REQUIRES_X86_FMA3;
17389 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017390 for (uint32_t n = 1; n <= 8; n++) {
17391 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017392 GemmMicrokernelTester()
17393 .mr(5)
17394 .nr(8)
17395 .kr(1)
17396 .sr(1)
17397 .m(m)
17398 .n(n)
17399 .k(k)
17400 .iterations(1)
17401 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17402 }
17403 }
17404 }
17405 }
17406
17407 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_gt_8) {
17408 TEST_REQUIRES_X86_FMA3;
17409 for (uint32_t n = 9; n < 16; n++) {
17410 for (size_t k = 1; k <= 5; k += 2) {
17411 GemmMicrokernelTester()
17412 .mr(5)
17413 .nr(8)
17414 .kr(1)
17415 .sr(1)
17416 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017417 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017418 .k(k)
17419 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17420 }
17421 }
17422 }
17423
17424 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
17425 TEST_REQUIRES_X86_FMA3;
17426 for (uint32_t n = 9; n < 16; n++) {
17427 for (size_t k = 1; k <= 5; k += 2) {
17428 GemmMicrokernelTester()
17429 .mr(5)
17430 .nr(8)
17431 .kr(1)
17432 .sr(1)
17433 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017434 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017435 .k(k)
17436 .cn_stride(11)
17437 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17438 }
17439 }
17440 }
17441
17442 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_strided_a) {
17443 TEST_REQUIRES_X86_FMA3;
17444 for (uint32_t n = 9; n < 16; n++) {
17445 for (size_t k = 1; k <= 5; k += 2) {
17446 GemmMicrokernelTester()
17447 .mr(5)
17448 .nr(8)
17449 .kr(1)
17450 .sr(1)
17451 .m(5)
17452 .n(n)
17453 .k(k)
17454 .a_stride(7)
17455 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17456 }
17457 }
17458 }
17459
17460 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_subtile) {
17461 TEST_REQUIRES_X86_FMA3;
17462 for (uint32_t n = 9; n < 16; n++) {
17463 for (size_t k = 1; k <= 5; k += 2) {
17464 for (uint32_t m = 1; m <= 5; m++) {
17465 GemmMicrokernelTester()
17466 .mr(5)
17467 .nr(8)
17468 .kr(1)
17469 .sr(1)
17470 .m(m)
17471 .n(n)
17472 .k(k)
17473 .iterations(1)
17474 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17475 }
17476 }
17477 }
17478 }
17479
17480 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_div_8) {
17481 TEST_REQUIRES_X86_FMA3;
17482 for (uint32_t n = 16; n <= 24; n += 8) {
17483 for (size_t k = 1; k <= 5; k += 2) {
17484 GemmMicrokernelTester()
17485 .mr(5)
17486 .nr(8)
17487 .kr(1)
17488 .sr(1)
17489 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017490 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017491 .k(k)
17492 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17493 }
17494 }
17495 }
17496
17497 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_div_8_strided_cn) {
17498 TEST_REQUIRES_X86_FMA3;
17499 for (uint32_t n = 16; n <= 24; n += 8) {
17500 for (size_t k = 1; k <= 5; k += 2) {
17501 GemmMicrokernelTester()
17502 .mr(5)
17503 .nr(8)
17504 .kr(1)
17505 .sr(1)
17506 .m(5)
17507 .n(n)
17508 .k(k)
17509 .cn_stride(11)
17510 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17511 }
17512 }
17513 }
17514
17515 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_div_8_strided_a) {
17516 TEST_REQUIRES_X86_FMA3;
17517 for (uint32_t n = 16; n <= 24; n += 8) {
17518 for (size_t k = 1; k <= 5; k += 2) {
17519 GemmMicrokernelTester()
17520 .mr(5)
17521 .nr(8)
17522 .kr(1)
17523 .sr(1)
17524 .m(5)
17525 .n(n)
17526 .k(k)
17527 .a_stride(7)
17528 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17529 }
17530 }
17531 }
17532
17533 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, n_div_8_subtile) {
17534 TEST_REQUIRES_X86_FMA3;
17535 for (uint32_t n = 16; n <= 24; n += 8) {
17536 for (size_t k = 1; k <= 5; k += 2) {
17537 for (uint32_t m = 1; m <= 5; m++) {
17538 GemmMicrokernelTester()
17539 .mr(5)
17540 .nr(8)
17541 .kr(1)
17542 .sr(1)
17543 .m(m)
17544 .n(n)
17545 .k(k)
17546 .iterations(1)
17547 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17548 }
17549 }
17550 }
17551 }
17552
17553 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, strided_cm_subtile) {
17554 TEST_REQUIRES_X86_FMA3;
17555 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017556 for (uint32_t n = 1; n <= 8; n++) {
17557 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017558 GemmMicrokernelTester()
17559 .mr(5)
17560 .nr(8)
17561 .kr(1)
17562 .sr(1)
17563 .m(m)
17564 .n(n)
17565 .k(k)
17566 .cm_stride(11)
17567 .iterations(1)
17568 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17569 }
17570 }
17571 }
17572 }
17573
17574 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, qmin) {
17575 TEST_REQUIRES_X86_FMA3;
17576 GemmMicrokernelTester()
17577 .mr(5)
17578 .nr(8)
17579 .kr(1)
17580 .sr(1)
17581 .m(5)
17582 .n(8)
17583 .k(1)
17584 .qmin(128)
17585 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17586 }
17587
17588 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, qmax) {
17589 TEST_REQUIRES_X86_FMA3;
17590 GemmMicrokernelTester()
17591 .mr(5)
17592 .nr(8)
17593 .kr(1)
17594 .sr(1)
17595 .m(5)
17596 .n(8)
17597 .k(1)
17598 .qmax(128)
17599 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17600 }
17601
17602 TEST(F32_GEMMINC_MINMAX_5X8__FMA3_BROADCAST, strided_cm) {
17603 TEST_REQUIRES_X86_FMA3;
17604 GemmMicrokernelTester()
17605 .mr(5)
17606 .nr(8)
17607 .kr(1)
17608 .sr(1)
17609 .m(5)
17610 .n(8)
17611 .k(1)
17612 .cm_stride(11)
17613 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17614 }
17615#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17616
17617
17618#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17619 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1) {
17620 TEST_REQUIRES_X86_FMA3;
17621 GemmMicrokernelTester()
17622 .mr(1)
17623 .nr(16)
17624 .kr(1)
17625 .sr(1)
17626 .m(1)
17627 .n(16)
17628 .k(1)
17629 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17630 }
17631
17632 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, strided_cn) {
17633 TEST_REQUIRES_X86_FMA3;
17634 GemmMicrokernelTester()
17635 .mr(1)
17636 .nr(16)
17637 .kr(1)
17638 .sr(1)
17639 .m(1)
17640 .n(16)
17641 .k(1)
17642 .cn_stride(19)
17643 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17644 }
17645
17646 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_strided_a) {
17647 TEST_REQUIRES_X86_FMA3;
17648 GemmMicrokernelTester()
17649 .mr(1)
17650 .nr(16)
17651 .kr(1)
17652 .sr(1)
17653 .m(1)
17654 .n(16)
17655 .k(1)
17656 .a_stride(3)
17657 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17658 }
17659
17660 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile) {
17661 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017662 for (uint32_t n = 1; n <= 16; n++) {
17663 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017664 GemmMicrokernelTester()
17665 .mr(1)
17666 .nr(16)
17667 .kr(1)
17668 .sr(1)
17669 .m(m)
17670 .n(n)
17671 .k(1)
17672 .iterations(1)
17673 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17674 }
17675 }
17676 }
17677
17678 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
17679 TEST_REQUIRES_X86_FMA3;
17680 for (uint32_t m = 1; m <= 1; m++) {
17681 GemmMicrokernelTester()
17682 .mr(1)
17683 .nr(16)
17684 .kr(1)
17685 .sr(1)
17686 .m(m)
17687 .n(16)
17688 .k(1)
17689 .iterations(1)
17690 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17691 }
17692 }
17693
17694 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
17695 TEST_REQUIRES_X86_FMA3;
17696 for (uint32_t n = 1; n <= 16; n++) {
17697 GemmMicrokernelTester()
17698 .mr(1)
17699 .nr(16)
17700 .kr(1)
17701 .sr(1)
17702 .m(1)
17703 .n(n)
17704 .k(1)
17705 .iterations(1)
17706 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17707 }
17708 }
17709
17710 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_gt_1) {
17711 TEST_REQUIRES_X86_FMA3;
17712 for (size_t k = 2; k < 10; k++) {
17713 GemmMicrokernelTester()
17714 .mr(1)
17715 .nr(16)
17716 .kr(1)
17717 .sr(1)
17718 .m(1)
17719 .n(16)
17720 .k(k)
17721 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17722 }
17723 }
17724
17725 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_gt_1_strided_a) {
17726 TEST_REQUIRES_X86_FMA3;
17727 for (size_t k = 2; k < 10; k++) {
17728 GemmMicrokernelTester()
17729 .mr(1)
17730 .nr(16)
17731 .kr(1)
17732 .sr(1)
17733 .m(1)
17734 .n(16)
17735 .k(k)
17736 .a_stride(11)
17737 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17738 }
17739 }
17740
17741 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, k_gt_1_subtile) {
17742 TEST_REQUIRES_X86_FMA3;
17743 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017744 for (uint32_t n = 1; n <= 16; n++) {
17745 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017746 GemmMicrokernelTester()
17747 .mr(1)
17748 .nr(16)
17749 .kr(1)
17750 .sr(1)
17751 .m(m)
17752 .n(n)
17753 .k(k)
17754 .iterations(1)
17755 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17756 }
17757 }
17758 }
17759 }
17760
17761 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_gt_16) {
17762 TEST_REQUIRES_X86_FMA3;
17763 for (uint32_t n = 17; n < 32; n++) {
17764 for (size_t k = 1; k <= 5; k += 2) {
17765 GemmMicrokernelTester()
17766 .mr(1)
17767 .nr(16)
17768 .kr(1)
17769 .sr(1)
17770 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017771 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017772 .k(k)
17773 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17774 }
17775 }
17776 }
17777
17778 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
17779 TEST_REQUIRES_X86_FMA3;
17780 for (uint32_t n = 17; n < 32; n++) {
17781 for (size_t k = 1; k <= 5; k += 2) {
17782 GemmMicrokernelTester()
17783 .mr(1)
17784 .nr(16)
17785 .kr(1)
17786 .sr(1)
17787 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017788 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017789 .k(k)
17790 .cn_stride(19)
17791 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17792 }
17793 }
17794 }
17795
17796 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_strided_a) {
17797 TEST_REQUIRES_X86_FMA3;
17798 for (uint32_t n = 17; n < 32; n++) {
17799 for (size_t k = 1; k <= 5; k += 2) {
17800 GemmMicrokernelTester()
17801 .mr(1)
17802 .nr(16)
17803 .kr(1)
17804 .sr(1)
17805 .m(1)
17806 .n(n)
17807 .k(k)
17808 .a_stride(7)
17809 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17810 }
17811 }
17812 }
17813
17814 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_subtile) {
17815 TEST_REQUIRES_X86_FMA3;
17816 for (uint32_t n = 17; n < 32; n++) {
17817 for (size_t k = 1; k <= 5; k += 2) {
17818 for (uint32_t m = 1; m <= 1; m++) {
17819 GemmMicrokernelTester()
17820 .mr(1)
17821 .nr(16)
17822 .kr(1)
17823 .sr(1)
17824 .m(m)
17825 .n(n)
17826 .k(k)
17827 .iterations(1)
17828 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17829 }
17830 }
17831 }
17832 }
17833
17834 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_div_16) {
17835 TEST_REQUIRES_X86_FMA3;
17836 for (uint32_t n = 32; n <= 48; n += 16) {
17837 for (size_t k = 1; k <= 5; k += 2) {
17838 GemmMicrokernelTester()
17839 .mr(1)
17840 .nr(16)
17841 .kr(1)
17842 .sr(1)
17843 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017844 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017845 .k(k)
17846 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17847 }
17848 }
17849 }
17850
17851 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_div_16_strided_cn) {
17852 TEST_REQUIRES_X86_FMA3;
17853 for (uint32_t n = 32; n <= 48; n += 16) {
17854 for (size_t k = 1; k <= 5; k += 2) {
17855 GemmMicrokernelTester()
17856 .mr(1)
17857 .nr(16)
17858 .kr(1)
17859 .sr(1)
17860 .m(1)
17861 .n(n)
17862 .k(k)
17863 .cn_stride(19)
17864 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17865 }
17866 }
17867 }
17868
17869 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_div_16_strided_a) {
17870 TEST_REQUIRES_X86_FMA3;
17871 for (uint32_t n = 32; n <= 48; n += 16) {
17872 for (size_t k = 1; k <= 5; k += 2) {
17873 GemmMicrokernelTester()
17874 .mr(1)
17875 .nr(16)
17876 .kr(1)
17877 .sr(1)
17878 .m(1)
17879 .n(n)
17880 .k(k)
17881 .a_stride(7)
17882 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17883 }
17884 }
17885 }
17886
17887 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, n_div_16_subtile) {
17888 TEST_REQUIRES_X86_FMA3;
17889 for (uint32_t n = 32; n <= 48; n += 16) {
17890 for (size_t k = 1; k <= 5; k += 2) {
17891 for (uint32_t m = 1; m <= 1; m++) {
17892 GemmMicrokernelTester()
17893 .mr(1)
17894 .nr(16)
17895 .kr(1)
17896 .sr(1)
17897 .m(m)
17898 .n(n)
17899 .k(k)
17900 .iterations(1)
17901 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17902 }
17903 }
17904 }
17905 }
17906
17907 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, strided_cm_subtile) {
17908 TEST_REQUIRES_X86_FMA3;
17909 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017910 for (uint32_t n = 1; n <= 16; n++) {
17911 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017912 GemmMicrokernelTester()
17913 .mr(1)
17914 .nr(16)
17915 .kr(1)
17916 .sr(1)
17917 .m(m)
17918 .n(n)
17919 .k(k)
17920 .cm_stride(19)
17921 .iterations(1)
17922 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17923 }
17924 }
17925 }
17926 }
17927
17928 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, qmin) {
17929 TEST_REQUIRES_X86_FMA3;
17930 GemmMicrokernelTester()
17931 .mr(1)
17932 .nr(16)
17933 .kr(1)
17934 .sr(1)
17935 .m(1)
17936 .n(16)
17937 .k(1)
17938 .qmin(128)
17939 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17940 }
17941
17942 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, qmax) {
17943 TEST_REQUIRES_X86_FMA3;
17944 GemmMicrokernelTester()
17945 .mr(1)
17946 .nr(16)
17947 .kr(1)
17948 .sr(1)
17949 .m(1)
17950 .n(16)
17951 .k(1)
17952 .qmax(128)
17953 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17954 }
17955
17956 TEST(F32_GEMMINC_MINMAX_1X16__FMA3_BROADCAST, strided_cm) {
17957 TEST_REQUIRES_X86_FMA3;
17958 GemmMicrokernelTester()
17959 .mr(1)
17960 .nr(16)
17961 .kr(1)
17962 .sr(1)
17963 .m(1)
17964 .n(16)
17965 .k(1)
17966 .cm_stride(19)
17967 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17968 }
17969#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17970
17971
17972#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17973 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1) {
17974 TEST_REQUIRES_X86_FMA3;
17975 GemmMicrokernelTester()
17976 .mr(3)
17977 .nr(16)
17978 .kr(1)
17979 .sr(1)
17980 .m(3)
17981 .n(16)
17982 .k(1)
17983 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17984 }
17985
17986 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, strided_cn) {
17987 TEST_REQUIRES_X86_FMA3;
17988 GemmMicrokernelTester()
17989 .mr(3)
17990 .nr(16)
17991 .kr(1)
17992 .sr(1)
17993 .m(3)
17994 .n(16)
17995 .k(1)
17996 .cn_stride(19)
17997 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17998 }
17999
18000 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_strided_a) {
18001 TEST_REQUIRES_X86_FMA3;
18002 GemmMicrokernelTester()
18003 .mr(3)
18004 .nr(16)
18005 .kr(1)
18006 .sr(1)
18007 .m(3)
18008 .n(16)
18009 .k(1)
18010 .a_stride(3)
18011 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18012 }
18013
18014 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile) {
18015 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018016 for (uint32_t n = 1; n <= 16; n++) {
18017 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018018 GemmMicrokernelTester()
18019 .mr(3)
18020 .nr(16)
18021 .kr(1)
18022 .sr(1)
18023 .m(m)
18024 .n(n)
18025 .k(1)
18026 .iterations(1)
18027 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18028 }
18029 }
18030 }
18031
18032 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
18033 TEST_REQUIRES_X86_FMA3;
18034 for (uint32_t m = 1; m <= 3; m++) {
18035 GemmMicrokernelTester()
18036 .mr(3)
18037 .nr(16)
18038 .kr(1)
18039 .sr(1)
18040 .m(m)
18041 .n(16)
18042 .k(1)
18043 .iterations(1)
18044 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18045 }
18046 }
18047
18048 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
18049 TEST_REQUIRES_X86_FMA3;
18050 for (uint32_t n = 1; n <= 16; n++) {
18051 GemmMicrokernelTester()
18052 .mr(3)
18053 .nr(16)
18054 .kr(1)
18055 .sr(1)
18056 .m(3)
18057 .n(n)
18058 .k(1)
18059 .iterations(1)
18060 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18061 }
18062 }
18063
18064 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_gt_1) {
18065 TEST_REQUIRES_X86_FMA3;
18066 for (size_t k = 2; k < 10; k++) {
18067 GemmMicrokernelTester()
18068 .mr(3)
18069 .nr(16)
18070 .kr(1)
18071 .sr(1)
18072 .m(3)
18073 .n(16)
18074 .k(k)
18075 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18076 }
18077 }
18078
18079 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_gt_1_strided_a) {
18080 TEST_REQUIRES_X86_FMA3;
18081 for (size_t k = 2; k < 10; k++) {
18082 GemmMicrokernelTester()
18083 .mr(3)
18084 .nr(16)
18085 .kr(1)
18086 .sr(1)
18087 .m(3)
18088 .n(16)
18089 .k(k)
18090 .a_stride(11)
18091 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18092 }
18093 }
18094
18095 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, k_gt_1_subtile) {
18096 TEST_REQUIRES_X86_FMA3;
18097 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018098 for (uint32_t n = 1; n <= 16; n++) {
18099 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018100 GemmMicrokernelTester()
18101 .mr(3)
18102 .nr(16)
18103 .kr(1)
18104 .sr(1)
18105 .m(m)
18106 .n(n)
18107 .k(k)
18108 .iterations(1)
18109 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18110 }
18111 }
18112 }
18113 }
18114
18115 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_gt_16) {
18116 TEST_REQUIRES_X86_FMA3;
18117 for (uint32_t n = 17; n < 32; n++) {
18118 for (size_t k = 1; k <= 5; k += 2) {
18119 GemmMicrokernelTester()
18120 .mr(3)
18121 .nr(16)
18122 .kr(1)
18123 .sr(1)
18124 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018125 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018126 .k(k)
18127 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18128 }
18129 }
18130 }
18131
18132 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
18133 TEST_REQUIRES_X86_FMA3;
18134 for (uint32_t n = 17; n < 32; n++) {
18135 for (size_t k = 1; k <= 5; k += 2) {
18136 GemmMicrokernelTester()
18137 .mr(3)
18138 .nr(16)
18139 .kr(1)
18140 .sr(1)
18141 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018142 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018143 .k(k)
18144 .cn_stride(19)
18145 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18146 }
18147 }
18148 }
18149
18150 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_strided_a) {
18151 TEST_REQUIRES_X86_FMA3;
18152 for (uint32_t n = 17; n < 32; n++) {
18153 for (size_t k = 1; k <= 5; k += 2) {
18154 GemmMicrokernelTester()
18155 .mr(3)
18156 .nr(16)
18157 .kr(1)
18158 .sr(1)
18159 .m(3)
18160 .n(n)
18161 .k(k)
18162 .a_stride(7)
18163 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18164 }
18165 }
18166 }
18167
18168 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_subtile) {
18169 TEST_REQUIRES_X86_FMA3;
18170 for (uint32_t n = 17; n < 32; n++) {
18171 for (size_t k = 1; k <= 5; k += 2) {
18172 for (uint32_t m = 1; m <= 3; m++) {
18173 GemmMicrokernelTester()
18174 .mr(3)
18175 .nr(16)
18176 .kr(1)
18177 .sr(1)
18178 .m(m)
18179 .n(n)
18180 .k(k)
18181 .iterations(1)
18182 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18183 }
18184 }
18185 }
18186 }
18187
18188 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_div_16) {
18189 TEST_REQUIRES_X86_FMA3;
18190 for (uint32_t n = 32; n <= 48; n += 16) {
18191 for (size_t k = 1; k <= 5; k += 2) {
18192 GemmMicrokernelTester()
18193 .mr(3)
18194 .nr(16)
18195 .kr(1)
18196 .sr(1)
18197 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018198 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018199 .k(k)
18200 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18201 }
18202 }
18203 }
18204
18205 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_div_16_strided_cn) {
18206 TEST_REQUIRES_X86_FMA3;
18207 for (uint32_t n = 32; n <= 48; n += 16) {
18208 for (size_t k = 1; k <= 5; k += 2) {
18209 GemmMicrokernelTester()
18210 .mr(3)
18211 .nr(16)
18212 .kr(1)
18213 .sr(1)
18214 .m(3)
18215 .n(n)
18216 .k(k)
18217 .cn_stride(19)
18218 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18219 }
18220 }
18221 }
18222
18223 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_div_16_strided_a) {
18224 TEST_REQUIRES_X86_FMA3;
18225 for (uint32_t n = 32; n <= 48; n += 16) {
18226 for (size_t k = 1; k <= 5; k += 2) {
18227 GemmMicrokernelTester()
18228 .mr(3)
18229 .nr(16)
18230 .kr(1)
18231 .sr(1)
18232 .m(3)
18233 .n(n)
18234 .k(k)
18235 .a_stride(7)
18236 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18237 }
18238 }
18239 }
18240
18241 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, n_div_16_subtile) {
18242 TEST_REQUIRES_X86_FMA3;
18243 for (uint32_t n = 32; n <= 48; n += 16) {
18244 for (size_t k = 1; k <= 5; k += 2) {
18245 for (uint32_t m = 1; m <= 3; m++) {
18246 GemmMicrokernelTester()
18247 .mr(3)
18248 .nr(16)
18249 .kr(1)
18250 .sr(1)
18251 .m(m)
18252 .n(n)
18253 .k(k)
18254 .iterations(1)
18255 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18256 }
18257 }
18258 }
18259 }
18260
18261 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, strided_cm_subtile) {
18262 TEST_REQUIRES_X86_FMA3;
18263 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018264 for (uint32_t n = 1; n <= 16; n++) {
18265 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018266 GemmMicrokernelTester()
18267 .mr(3)
18268 .nr(16)
18269 .kr(1)
18270 .sr(1)
18271 .m(m)
18272 .n(n)
18273 .k(k)
18274 .cm_stride(19)
18275 .iterations(1)
18276 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18277 }
18278 }
18279 }
18280 }
18281
18282 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, qmin) {
18283 TEST_REQUIRES_X86_FMA3;
18284 GemmMicrokernelTester()
18285 .mr(3)
18286 .nr(16)
18287 .kr(1)
18288 .sr(1)
18289 .m(3)
18290 .n(16)
18291 .k(1)
18292 .qmin(128)
18293 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18294 }
18295
18296 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, qmax) {
18297 TEST_REQUIRES_X86_FMA3;
18298 GemmMicrokernelTester()
18299 .mr(3)
18300 .nr(16)
18301 .kr(1)
18302 .sr(1)
18303 .m(3)
18304 .n(16)
18305 .k(1)
18306 .qmax(128)
18307 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18308 }
18309
18310 TEST(F32_GEMMINC_MINMAX_3X16__FMA3_BROADCAST, strided_cm) {
18311 TEST_REQUIRES_X86_FMA3;
18312 GemmMicrokernelTester()
18313 .mr(3)
18314 .nr(16)
18315 .kr(1)
18316 .sr(1)
18317 .m(3)
18318 .n(16)
18319 .k(1)
18320 .cm_stride(19)
18321 .Test(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18322 }
18323#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18324
18325
18326#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18327 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1) {
18328 TEST_REQUIRES_X86_AVX512F;
18329 GemmMicrokernelTester()
18330 .mr(1)
18331 .nr(16)
18332 .kr(1)
18333 .sr(1)
18334 .m(1)
18335 .n(16)
18336 .k(1)
18337 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18338 }
18339
18340 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, strided_cn) {
18341 TEST_REQUIRES_X86_AVX512F;
18342 GemmMicrokernelTester()
18343 .mr(1)
18344 .nr(16)
18345 .kr(1)
18346 .sr(1)
18347 .m(1)
18348 .n(16)
18349 .k(1)
18350 .cn_stride(19)
18351 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18352 }
18353
18354 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
18355 TEST_REQUIRES_X86_AVX512F;
18356 GemmMicrokernelTester()
18357 .mr(1)
18358 .nr(16)
18359 .kr(1)
18360 .sr(1)
18361 .m(1)
18362 .n(16)
18363 .k(1)
18364 .a_stride(3)
18365 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18366 }
18367
18368 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile) {
18369 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018370 for (uint32_t n = 1; n <= 16; n++) {
18371 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018372 GemmMicrokernelTester()
18373 .mr(1)
18374 .nr(16)
18375 .kr(1)
18376 .sr(1)
18377 .m(m)
18378 .n(n)
18379 .k(1)
18380 .iterations(1)
18381 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18382 }
18383 }
18384 }
18385
18386 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
18387 TEST_REQUIRES_X86_AVX512F;
18388 for (uint32_t m = 1; m <= 1; m++) {
18389 GemmMicrokernelTester()
18390 .mr(1)
18391 .nr(16)
18392 .kr(1)
18393 .sr(1)
18394 .m(m)
18395 .n(16)
18396 .k(1)
18397 .iterations(1)
18398 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18399 }
18400 }
18401
18402 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
18403 TEST_REQUIRES_X86_AVX512F;
18404 for (uint32_t n = 1; n <= 16; n++) {
18405 GemmMicrokernelTester()
18406 .mr(1)
18407 .nr(16)
18408 .kr(1)
18409 .sr(1)
18410 .m(1)
18411 .n(n)
18412 .k(1)
18413 .iterations(1)
18414 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18415 }
18416 }
18417
18418 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1) {
18419 TEST_REQUIRES_X86_AVX512F;
18420 for (size_t k = 2; k < 10; k++) {
18421 GemmMicrokernelTester()
18422 .mr(1)
18423 .nr(16)
18424 .kr(1)
18425 .sr(1)
18426 .m(1)
18427 .n(16)
18428 .k(k)
18429 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18430 }
18431 }
18432
18433 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
18434 TEST_REQUIRES_X86_AVX512F;
18435 for (size_t k = 2; k < 10; k++) {
18436 GemmMicrokernelTester()
18437 .mr(1)
18438 .nr(16)
18439 .kr(1)
18440 .sr(1)
18441 .m(1)
18442 .n(16)
18443 .k(k)
18444 .a_stride(11)
18445 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18446 }
18447 }
18448
18449 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1_subtile) {
18450 TEST_REQUIRES_X86_AVX512F;
18451 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018452 for (uint32_t n = 1; n <= 16; n++) {
18453 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018454 GemmMicrokernelTester()
18455 .mr(1)
18456 .nr(16)
18457 .kr(1)
18458 .sr(1)
18459 .m(m)
18460 .n(n)
18461 .k(k)
18462 .iterations(1)
18463 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18464 }
18465 }
18466 }
18467 }
18468
18469 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16) {
18470 TEST_REQUIRES_X86_AVX512F;
18471 for (uint32_t n = 17; n < 32; n++) {
18472 for (size_t k = 1; k <= 5; k += 2) {
18473 GemmMicrokernelTester()
18474 .mr(1)
18475 .nr(16)
18476 .kr(1)
18477 .sr(1)
18478 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018479 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018480 .k(k)
18481 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18482 }
18483 }
18484 }
18485
18486 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
18487 TEST_REQUIRES_X86_AVX512F;
18488 for (uint32_t n = 17; n < 32; n++) {
18489 for (size_t k = 1; k <= 5; k += 2) {
18490 GemmMicrokernelTester()
18491 .mr(1)
18492 .nr(16)
18493 .kr(1)
18494 .sr(1)
18495 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018496 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018497 .k(k)
18498 .cn_stride(19)
18499 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18500 }
18501 }
18502 }
18503
18504 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
18505 TEST_REQUIRES_X86_AVX512F;
18506 for (uint32_t n = 17; n < 32; n++) {
18507 for (size_t k = 1; k <= 5; k += 2) {
18508 GemmMicrokernelTester()
18509 .mr(1)
18510 .nr(16)
18511 .kr(1)
18512 .sr(1)
18513 .m(1)
18514 .n(n)
18515 .k(k)
18516 .a_stride(7)
18517 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18518 }
18519 }
18520 }
18521
18522 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_subtile) {
18523 TEST_REQUIRES_X86_AVX512F;
18524 for (uint32_t n = 17; n < 32; n++) {
18525 for (size_t k = 1; k <= 5; k += 2) {
18526 for (uint32_t m = 1; m <= 1; m++) {
18527 GemmMicrokernelTester()
18528 .mr(1)
18529 .nr(16)
18530 .kr(1)
18531 .sr(1)
18532 .m(m)
18533 .n(n)
18534 .k(k)
18535 .iterations(1)
18536 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18537 }
18538 }
18539 }
18540 }
18541
18542 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_div_16) {
18543 TEST_REQUIRES_X86_AVX512F;
18544 for (uint32_t n = 32; n <= 48; n += 16) {
18545 for (size_t k = 1; k <= 5; k += 2) {
18546 GemmMicrokernelTester()
18547 .mr(1)
18548 .nr(16)
18549 .kr(1)
18550 .sr(1)
18551 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018552 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018553 .k(k)
18554 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18555 }
18556 }
18557 }
18558
18559 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
18560 TEST_REQUIRES_X86_AVX512F;
18561 for (uint32_t n = 32; n <= 48; n += 16) {
18562 for (size_t k = 1; k <= 5; k += 2) {
18563 GemmMicrokernelTester()
18564 .mr(1)
18565 .nr(16)
18566 .kr(1)
18567 .sr(1)
18568 .m(1)
18569 .n(n)
18570 .k(k)
18571 .cn_stride(19)
18572 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18573 }
18574 }
18575 }
18576
18577 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_strided_a) {
18578 TEST_REQUIRES_X86_AVX512F;
18579 for (uint32_t n = 32; n <= 48; n += 16) {
18580 for (size_t k = 1; k <= 5; k += 2) {
18581 GemmMicrokernelTester()
18582 .mr(1)
18583 .nr(16)
18584 .kr(1)
18585 .sr(1)
18586 .m(1)
18587 .n(n)
18588 .k(k)
18589 .a_stride(7)
18590 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18591 }
18592 }
18593 }
18594
18595 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_subtile) {
18596 TEST_REQUIRES_X86_AVX512F;
18597 for (uint32_t n = 32; n <= 48; n += 16) {
18598 for (size_t k = 1; k <= 5; k += 2) {
18599 for (uint32_t m = 1; m <= 1; m++) {
18600 GemmMicrokernelTester()
18601 .mr(1)
18602 .nr(16)
18603 .kr(1)
18604 .sr(1)
18605 .m(m)
18606 .n(n)
18607 .k(k)
18608 .iterations(1)
18609 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18610 }
18611 }
18612 }
18613 }
18614
18615 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, strided_cm_subtile) {
18616 TEST_REQUIRES_X86_AVX512F;
18617 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018618 for (uint32_t n = 1; n <= 16; n++) {
18619 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018620 GemmMicrokernelTester()
18621 .mr(1)
18622 .nr(16)
18623 .kr(1)
18624 .sr(1)
18625 .m(m)
18626 .n(n)
18627 .k(k)
18628 .cm_stride(19)
18629 .iterations(1)
18630 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18631 }
18632 }
18633 }
18634 }
18635
18636 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, qmin) {
18637 TEST_REQUIRES_X86_AVX512F;
18638 GemmMicrokernelTester()
18639 .mr(1)
18640 .nr(16)
18641 .kr(1)
18642 .sr(1)
18643 .m(1)
18644 .n(16)
18645 .k(1)
18646 .qmin(128)
18647 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18648 }
18649
18650 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, qmax) {
18651 TEST_REQUIRES_X86_AVX512F;
18652 GemmMicrokernelTester()
18653 .mr(1)
18654 .nr(16)
18655 .kr(1)
18656 .sr(1)
18657 .m(1)
18658 .n(16)
18659 .k(1)
18660 .qmax(128)
18661 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18662 }
18663
18664 TEST(F32_GEMMINC_MINMAX_1X16__AVX512F_BROADCAST, strided_cm) {
18665 TEST_REQUIRES_X86_AVX512F;
18666 GemmMicrokernelTester()
18667 .mr(1)
18668 .nr(16)
18669 .kr(1)
18670 .sr(1)
18671 .m(1)
18672 .n(16)
18673 .k(1)
18674 .cm_stride(19)
18675 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18676 }
18677#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18678
18679
18680#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18681 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1) {
18682 TEST_REQUIRES_X86_AVX512F;
18683 GemmMicrokernelTester()
18684 .mr(6)
18685 .nr(16)
18686 .kr(1)
18687 .sr(1)
18688 .m(6)
18689 .n(16)
18690 .k(1)
18691 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18692 }
18693
18694 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, strided_cn) {
18695 TEST_REQUIRES_X86_AVX512F;
18696 GemmMicrokernelTester()
18697 .mr(6)
18698 .nr(16)
18699 .kr(1)
18700 .sr(1)
18701 .m(6)
18702 .n(16)
18703 .k(1)
18704 .cn_stride(19)
18705 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18706 }
18707
18708 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
18709 TEST_REQUIRES_X86_AVX512F;
18710 GemmMicrokernelTester()
18711 .mr(6)
18712 .nr(16)
18713 .kr(1)
18714 .sr(1)
18715 .m(6)
18716 .n(16)
18717 .k(1)
18718 .a_stride(3)
18719 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18720 }
18721
18722 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile) {
18723 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018724 for (uint32_t n = 1; n <= 16; n++) {
18725 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018726 GemmMicrokernelTester()
18727 .mr(6)
18728 .nr(16)
18729 .kr(1)
18730 .sr(1)
18731 .m(m)
18732 .n(n)
18733 .k(1)
18734 .iterations(1)
18735 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18736 }
18737 }
18738 }
18739
18740 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
18741 TEST_REQUIRES_X86_AVX512F;
18742 for (uint32_t m = 1; m <= 6; m++) {
18743 GemmMicrokernelTester()
18744 .mr(6)
18745 .nr(16)
18746 .kr(1)
18747 .sr(1)
18748 .m(m)
18749 .n(16)
18750 .k(1)
18751 .iterations(1)
18752 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18753 }
18754 }
18755
18756 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
18757 TEST_REQUIRES_X86_AVX512F;
18758 for (uint32_t n = 1; n <= 16; n++) {
18759 GemmMicrokernelTester()
18760 .mr(6)
18761 .nr(16)
18762 .kr(1)
18763 .sr(1)
18764 .m(6)
18765 .n(n)
18766 .k(1)
18767 .iterations(1)
18768 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18769 }
18770 }
18771
18772 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1) {
18773 TEST_REQUIRES_X86_AVX512F;
18774 for (size_t k = 2; k < 10; k++) {
18775 GemmMicrokernelTester()
18776 .mr(6)
18777 .nr(16)
18778 .kr(1)
18779 .sr(1)
18780 .m(6)
18781 .n(16)
18782 .k(k)
18783 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18784 }
18785 }
18786
18787 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
18788 TEST_REQUIRES_X86_AVX512F;
18789 for (size_t k = 2; k < 10; k++) {
18790 GemmMicrokernelTester()
18791 .mr(6)
18792 .nr(16)
18793 .kr(1)
18794 .sr(1)
18795 .m(6)
18796 .n(16)
18797 .k(k)
18798 .a_stride(11)
18799 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18800 }
18801 }
18802
18803 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1_subtile) {
18804 TEST_REQUIRES_X86_AVX512F;
18805 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018806 for (uint32_t n = 1; n <= 16; n++) {
18807 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018808 GemmMicrokernelTester()
18809 .mr(6)
18810 .nr(16)
18811 .kr(1)
18812 .sr(1)
18813 .m(m)
18814 .n(n)
18815 .k(k)
18816 .iterations(1)
18817 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18818 }
18819 }
18820 }
18821 }
18822
18823 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16) {
18824 TEST_REQUIRES_X86_AVX512F;
18825 for (uint32_t n = 17; n < 32; n++) {
18826 for (size_t k = 1; k <= 5; k += 2) {
18827 GemmMicrokernelTester()
18828 .mr(6)
18829 .nr(16)
18830 .kr(1)
18831 .sr(1)
18832 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018833 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018834 .k(k)
18835 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18836 }
18837 }
18838 }
18839
18840 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
18841 TEST_REQUIRES_X86_AVX512F;
18842 for (uint32_t n = 17; n < 32; n++) {
18843 for (size_t k = 1; k <= 5; k += 2) {
18844 GemmMicrokernelTester()
18845 .mr(6)
18846 .nr(16)
18847 .kr(1)
18848 .sr(1)
18849 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018850 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018851 .k(k)
18852 .cn_stride(19)
18853 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18854 }
18855 }
18856 }
18857
18858 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
18859 TEST_REQUIRES_X86_AVX512F;
18860 for (uint32_t n = 17; n < 32; n++) {
18861 for (size_t k = 1; k <= 5; k += 2) {
18862 GemmMicrokernelTester()
18863 .mr(6)
18864 .nr(16)
18865 .kr(1)
18866 .sr(1)
18867 .m(6)
18868 .n(n)
18869 .k(k)
18870 .a_stride(7)
18871 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18872 }
18873 }
18874 }
18875
18876 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_subtile) {
18877 TEST_REQUIRES_X86_AVX512F;
18878 for (uint32_t n = 17; n < 32; n++) {
18879 for (size_t k = 1; k <= 5; k += 2) {
18880 for (uint32_t m = 1; m <= 6; m++) {
18881 GemmMicrokernelTester()
18882 .mr(6)
18883 .nr(16)
18884 .kr(1)
18885 .sr(1)
18886 .m(m)
18887 .n(n)
18888 .k(k)
18889 .iterations(1)
18890 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18891 }
18892 }
18893 }
18894 }
18895
18896 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_div_16) {
18897 TEST_REQUIRES_X86_AVX512F;
18898 for (uint32_t n = 32; n <= 48; n += 16) {
18899 for (size_t k = 1; k <= 5; k += 2) {
18900 GemmMicrokernelTester()
18901 .mr(6)
18902 .nr(16)
18903 .kr(1)
18904 .sr(1)
18905 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018906 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018907 .k(k)
18908 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18909 }
18910 }
18911 }
18912
18913 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
18914 TEST_REQUIRES_X86_AVX512F;
18915 for (uint32_t n = 32; n <= 48; n += 16) {
18916 for (size_t k = 1; k <= 5; k += 2) {
18917 GemmMicrokernelTester()
18918 .mr(6)
18919 .nr(16)
18920 .kr(1)
18921 .sr(1)
18922 .m(6)
18923 .n(n)
18924 .k(k)
18925 .cn_stride(19)
18926 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18927 }
18928 }
18929 }
18930
18931 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_strided_a) {
18932 TEST_REQUIRES_X86_AVX512F;
18933 for (uint32_t n = 32; n <= 48; n += 16) {
18934 for (size_t k = 1; k <= 5; k += 2) {
18935 GemmMicrokernelTester()
18936 .mr(6)
18937 .nr(16)
18938 .kr(1)
18939 .sr(1)
18940 .m(6)
18941 .n(n)
18942 .k(k)
18943 .a_stride(7)
18944 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18945 }
18946 }
18947 }
18948
18949 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_subtile) {
18950 TEST_REQUIRES_X86_AVX512F;
18951 for (uint32_t n = 32; n <= 48; n += 16) {
18952 for (size_t k = 1; k <= 5; k += 2) {
18953 for (uint32_t m = 1; m <= 6; m++) {
18954 GemmMicrokernelTester()
18955 .mr(6)
18956 .nr(16)
18957 .kr(1)
18958 .sr(1)
18959 .m(m)
18960 .n(n)
18961 .k(k)
18962 .iterations(1)
18963 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18964 }
18965 }
18966 }
18967 }
18968
18969 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, strided_cm_subtile) {
18970 TEST_REQUIRES_X86_AVX512F;
18971 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018972 for (uint32_t n = 1; n <= 16; n++) {
18973 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018974 GemmMicrokernelTester()
18975 .mr(6)
18976 .nr(16)
18977 .kr(1)
18978 .sr(1)
18979 .m(m)
18980 .n(n)
18981 .k(k)
18982 .cm_stride(19)
18983 .iterations(1)
18984 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
18985 }
18986 }
18987 }
18988 }
18989
18990 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, qmin) {
18991 TEST_REQUIRES_X86_AVX512F;
18992 GemmMicrokernelTester()
18993 .mr(6)
18994 .nr(16)
18995 .kr(1)
18996 .sr(1)
18997 .m(6)
18998 .n(16)
18999 .k(1)
19000 .qmin(128)
19001 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19002 }
19003
19004 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, qmax) {
19005 TEST_REQUIRES_X86_AVX512F;
19006 GemmMicrokernelTester()
19007 .mr(6)
19008 .nr(16)
19009 .kr(1)
19010 .sr(1)
19011 .m(6)
19012 .n(16)
19013 .k(1)
19014 .qmax(128)
19015 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19016 }
19017
19018 TEST(F32_GEMMINC_MINMAX_6X16__AVX512F_BROADCAST, strided_cm) {
19019 TEST_REQUIRES_X86_AVX512F;
19020 GemmMicrokernelTester()
19021 .mr(6)
19022 .nr(16)
19023 .kr(1)
19024 .sr(1)
19025 .m(6)
19026 .n(16)
19027 .k(1)
19028 .cm_stride(19)
19029 .Test(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19030 }
19031#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19032
19033
19034#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19035 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1) {
19036 TEST_REQUIRES_X86_AVX512F;
19037 GemmMicrokernelTester()
19038 .mr(7)
19039 .nr(16)
19040 .kr(1)
19041 .sr(1)
19042 .m(7)
19043 .n(16)
19044 .k(1)
19045 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19046 }
19047
19048 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, strided_cn) {
19049 TEST_REQUIRES_X86_AVX512F;
19050 GemmMicrokernelTester()
19051 .mr(7)
19052 .nr(16)
19053 .kr(1)
19054 .sr(1)
19055 .m(7)
19056 .n(16)
19057 .k(1)
19058 .cn_stride(19)
19059 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19060 }
19061
19062 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
19063 TEST_REQUIRES_X86_AVX512F;
19064 GemmMicrokernelTester()
19065 .mr(7)
19066 .nr(16)
19067 .kr(1)
19068 .sr(1)
19069 .m(7)
19070 .n(16)
19071 .k(1)
19072 .a_stride(3)
19073 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19074 }
19075
19076 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile) {
19077 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019078 for (uint32_t n = 1; n <= 16; n++) {
19079 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019080 GemmMicrokernelTester()
19081 .mr(7)
19082 .nr(16)
19083 .kr(1)
19084 .sr(1)
19085 .m(m)
19086 .n(n)
19087 .k(1)
19088 .iterations(1)
19089 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19090 }
19091 }
19092 }
19093
19094 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
19095 TEST_REQUIRES_X86_AVX512F;
19096 for (uint32_t m = 1; m <= 7; m++) {
19097 GemmMicrokernelTester()
19098 .mr(7)
19099 .nr(16)
19100 .kr(1)
19101 .sr(1)
19102 .m(m)
19103 .n(16)
19104 .k(1)
19105 .iterations(1)
19106 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19107 }
19108 }
19109
19110 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
19111 TEST_REQUIRES_X86_AVX512F;
19112 for (uint32_t n = 1; n <= 16; n++) {
19113 GemmMicrokernelTester()
19114 .mr(7)
19115 .nr(16)
19116 .kr(1)
19117 .sr(1)
19118 .m(7)
19119 .n(n)
19120 .k(1)
19121 .iterations(1)
19122 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19123 }
19124 }
19125
19126 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1) {
19127 TEST_REQUIRES_X86_AVX512F;
19128 for (size_t k = 2; k < 10; k++) {
19129 GemmMicrokernelTester()
19130 .mr(7)
19131 .nr(16)
19132 .kr(1)
19133 .sr(1)
19134 .m(7)
19135 .n(16)
19136 .k(k)
19137 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19138 }
19139 }
19140
19141 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
19142 TEST_REQUIRES_X86_AVX512F;
19143 for (size_t k = 2; k < 10; k++) {
19144 GemmMicrokernelTester()
19145 .mr(7)
19146 .nr(16)
19147 .kr(1)
19148 .sr(1)
19149 .m(7)
19150 .n(16)
19151 .k(k)
19152 .a_stride(11)
19153 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19154 }
19155 }
19156
19157 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1_subtile) {
19158 TEST_REQUIRES_X86_AVX512F;
19159 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019160 for (uint32_t n = 1; n <= 16; n++) {
19161 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019162 GemmMicrokernelTester()
19163 .mr(7)
19164 .nr(16)
19165 .kr(1)
19166 .sr(1)
19167 .m(m)
19168 .n(n)
19169 .k(k)
19170 .iterations(1)
19171 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19172 }
19173 }
19174 }
19175 }
19176
19177 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16) {
19178 TEST_REQUIRES_X86_AVX512F;
19179 for (uint32_t n = 17; n < 32; n++) {
19180 for (size_t k = 1; k <= 5; k += 2) {
19181 GemmMicrokernelTester()
19182 .mr(7)
19183 .nr(16)
19184 .kr(1)
19185 .sr(1)
19186 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019187 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019188 .k(k)
19189 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19190 }
19191 }
19192 }
19193
19194 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
19195 TEST_REQUIRES_X86_AVX512F;
19196 for (uint32_t n = 17; n < 32; n++) {
19197 for (size_t k = 1; k <= 5; k += 2) {
19198 GemmMicrokernelTester()
19199 .mr(7)
19200 .nr(16)
19201 .kr(1)
19202 .sr(1)
19203 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019204 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019205 .k(k)
19206 .cn_stride(19)
19207 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19208 }
19209 }
19210 }
19211
19212 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
19213 TEST_REQUIRES_X86_AVX512F;
19214 for (uint32_t n = 17; n < 32; n++) {
19215 for (size_t k = 1; k <= 5; k += 2) {
19216 GemmMicrokernelTester()
19217 .mr(7)
19218 .nr(16)
19219 .kr(1)
19220 .sr(1)
19221 .m(7)
19222 .n(n)
19223 .k(k)
19224 .a_stride(7)
19225 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19226 }
19227 }
19228 }
19229
19230 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_subtile) {
19231 TEST_REQUIRES_X86_AVX512F;
19232 for (uint32_t n = 17; n < 32; n++) {
19233 for (size_t k = 1; k <= 5; k += 2) {
19234 for (uint32_t m = 1; m <= 7; m++) {
19235 GemmMicrokernelTester()
19236 .mr(7)
19237 .nr(16)
19238 .kr(1)
19239 .sr(1)
19240 .m(m)
19241 .n(n)
19242 .k(k)
19243 .iterations(1)
19244 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19245 }
19246 }
19247 }
19248 }
19249
19250 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_div_16) {
19251 TEST_REQUIRES_X86_AVX512F;
19252 for (uint32_t n = 32; n <= 48; n += 16) {
19253 for (size_t k = 1; k <= 5; k += 2) {
19254 GemmMicrokernelTester()
19255 .mr(7)
19256 .nr(16)
19257 .kr(1)
19258 .sr(1)
19259 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019260 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019261 .k(k)
19262 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19263 }
19264 }
19265 }
19266
19267 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
19268 TEST_REQUIRES_X86_AVX512F;
19269 for (uint32_t n = 32; n <= 48; n += 16) {
19270 for (size_t k = 1; k <= 5; k += 2) {
19271 GemmMicrokernelTester()
19272 .mr(7)
19273 .nr(16)
19274 .kr(1)
19275 .sr(1)
19276 .m(7)
19277 .n(n)
19278 .k(k)
19279 .cn_stride(19)
19280 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19281 }
19282 }
19283 }
19284
19285 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_strided_a) {
19286 TEST_REQUIRES_X86_AVX512F;
19287 for (uint32_t n = 32; n <= 48; n += 16) {
19288 for (size_t k = 1; k <= 5; k += 2) {
19289 GemmMicrokernelTester()
19290 .mr(7)
19291 .nr(16)
19292 .kr(1)
19293 .sr(1)
19294 .m(7)
19295 .n(n)
19296 .k(k)
19297 .a_stride(7)
19298 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19299 }
19300 }
19301 }
19302
19303 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_subtile) {
19304 TEST_REQUIRES_X86_AVX512F;
19305 for (uint32_t n = 32; n <= 48; n += 16) {
19306 for (size_t k = 1; k <= 5; k += 2) {
19307 for (uint32_t m = 1; m <= 7; m++) {
19308 GemmMicrokernelTester()
19309 .mr(7)
19310 .nr(16)
19311 .kr(1)
19312 .sr(1)
19313 .m(m)
19314 .n(n)
19315 .k(k)
19316 .iterations(1)
19317 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19318 }
19319 }
19320 }
19321 }
19322
19323 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, strided_cm_subtile) {
19324 TEST_REQUIRES_X86_AVX512F;
19325 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019326 for (uint32_t n = 1; n <= 16; n++) {
19327 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019328 GemmMicrokernelTester()
19329 .mr(7)
19330 .nr(16)
19331 .kr(1)
19332 .sr(1)
19333 .m(m)
19334 .n(n)
19335 .k(k)
19336 .cm_stride(19)
19337 .iterations(1)
19338 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19339 }
19340 }
19341 }
19342 }
19343
19344 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, qmin) {
19345 TEST_REQUIRES_X86_AVX512F;
19346 GemmMicrokernelTester()
19347 .mr(7)
19348 .nr(16)
19349 .kr(1)
19350 .sr(1)
19351 .m(7)
19352 .n(16)
19353 .k(1)
19354 .qmin(128)
19355 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19356 }
19357
19358 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, qmax) {
19359 TEST_REQUIRES_X86_AVX512F;
19360 GemmMicrokernelTester()
19361 .mr(7)
19362 .nr(16)
19363 .kr(1)
19364 .sr(1)
19365 .m(7)
19366 .n(16)
19367 .k(1)
19368 .qmax(128)
19369 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19370 }
19371
19372 TEST(F32_GEMMINC_MINMAX_7X16__AVX512F_BROADCAST, strided_cm) {
19373 TEST_REQUIRES_X86_AVX512F;
19374 GemmMicrokernelTester()
19375 .mr(7)
19376 .nr(16)
19377 .kr(1)
19378 .sr(1)
19379 .m(7)
19380 .n(16)
19381 .k(1)
19382 .cm_stride(19)
19383 .Test(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19384 }
19385#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19386
19387
19388#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19389 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1) {
19390 TEST_REQUIRES_X86_AVX512F;
19391 GemmMicrokernelTester()
19392 .mr(8)
19393 .nr(16)
19394 .kr(1)
19395 .sr(1)
19396 .m(8)
19397 .n(16)
19398 .k(1)
19399 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19400 }
19401
19402 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, strided_cn) {
19403 TEST_REQUIRES_X86_AVX512F;
19404 GemmMicrokernelTester()
19405 .mr(8)
19406 .nr(16)
19407 .kr(1)
19408 .sr(1)
19409 .m(8)
19410 .n(16)
19411 .k(1)
19412 .cn_stride(19)
19413 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19414 }
19415
19416 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
19417 TEST_REQUIRES_X86_AVX512F;
19418 GemmMicrokernelTester()
19419 .mr(8)
19420 .nr(16)
19421 .kr(1)
19422 .sr(1)
19423 .m(8)
19424 .n(16)
19425 .k(1)
19426 .a_stride(3)
19427 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19428 }
19429
19430 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile) {
19431 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019432 for (uint32_t n = 1; n <= 16; n++) {
19433 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019434 GemmMicrokernelTester()
19435 .mr(8)
19436 .nr(16)
19437 .kr(1)
19438 .sr(1)
19439 .m(m)
19440 .n(n)
19441 .k(1)
19442 .iterations(1)
19443 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19444 }
19445 }
19446 }
19447
19448 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
19449 TEST_REQUIRES_X86_AVX512F;
19450 for (uint32_t m = 1; m <= 8; m++) {
19451 GemmMicrokernelTester()
19452 .mr(8)
19453 .nr(16)
19454 .kr(1)
19455 .sr(1)
19456 .m(m)
19457 .n(16)
19458 .k(1)
19459 .iterations(1)
19460 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19461 }
19462 }
19463
19464 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
19465 TEST_REQUIRES_X86_AVX512F;
19466 for (uint32_t n = 1; n <= 16; n++) {
19467 GemmMicrokernelTester()
19468 .mr(8)
19469 .nr(16)
19470 .kr(1)
19471 .sr(1)
19472 .m(8)
19473 .n(n)
19474 .k(1)
19475 .iterations(1)
19476 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19477 }
19478 }
19479
19480 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1) {
19481 TEST_REQUIRES_X86_AVX512F;
19482 for (size_t k = 2; k < 10; k++) {
19483 GemmMicrokernelTester()
19484 .mr(8)
19485 .nr(16)
19486 .kr(1)
19487 .sr(1)
19488 .m(8)
19489 .n(16)
19490 .k(k)
19491 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19492 }
19493 }
19494
19495 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
19496 TEST_REQUIRES_X86_AVX512F;
19497 for (size_t k = 2; k < 10; k++) {
19498 GemmMicrokernelTester()
19499 .mr(8)
19500 .nr(16)
19501 .kr(1)
19502 .sr(1)
19503 .m(8)
19504 .n(16)
19505 .k(k)
19506 .a_stride(11)
19507 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19508 }
19509 }
19510
19511 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1_subtile) {
19512 TEST_REQUIRES_X86_AVX512F;
19513 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019514 for (uint32_t n = 1; n <= 16; n++) {
19515 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019516 GemmMicrokernelTester()
19517 .mr(8)
19518 .nr(16)
19519 .kr(1)
19520 .sr(1)
19521 .m(m)
19522 .n(n)
19523 .k(k)
19524 .iterations(1)
19525 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19526 }
19527 }
19528 }
19529 }
19530
19531 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16) {
19532 TEST_REQUIRES_X86_AVX512F;
19533 for (uint32_t n = 17; n < 32; n++) {
19534 for (size_t k = 1; k <= 5; k += 2) {
19535 GemmMicrokernelTester()
19536 .mr(8)
19537 .nr(16)
19538 .kr(1)
19539 .sr(1)
19540 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019541 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019542 .k(k)
19543 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19544 }
19545 }
19546 }
19547
19548 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
19549 TEST_REQUIRES_X86_AVX512F;
19550 for (uint32_t n = 17; n < 32; n++) {
19551 for (size_t k = 1; k <= 5; k += 2) {
19552 GemmMicrokernelTester()
19553 .mr(8)
19554 .nr(16)
19555 .kr(1)
19556 .sr(1)
19557 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019558 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019559 .k(k)
19560 .cn_stride(19)
19561 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19562 }
19563 }
19564 }
19565
19566 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
19567 TEST_REQUIRES_X86_AVX512F;
19568 for (uint32_t n = 17; n < 32; n++) {
19569 for (size_t k = 1; k <= 5; k += 2) {
19570 GemmMicrokernelTester()
19571 .mr(8)
19572 .nr(16)
19573 .kr(1)
19574 .sr(1)
19575 .m(8)
19576 .n(n)
19577 .k(k)
19578 .a_stride(7)
19579 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19580 }
19581 }
19582 }
19583
19584 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_subtile) {
19585 TEST_REQUIRES_X86_AVX512F;
19586 for (uint32_t n = 17; n < 32; n++) {
19587 for (size_t k = 1; k <= 5; k += 2) {
19588 for (uint32_t m = 1; m <= 8; m++) {
19589 GemmMicrokernelTester()
19590 .mr(8)
19591 .nr(16)
19592 .kr(1)
19593 .sr(1)
19594 .m(m)
19595 .n(n)
19596 .k(k)
19597 .iterations(1)
19598 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19599 }
19600 }
19601 }
19602 }
19603
19604 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_div_16) {
19605 TEST_REQUIRES_X86_AVX512F;
19606 for (uint32_t n = 32; n <= 48; n += 16) {
19607 for (size_t k = 1; k <= 5; k += 2) {
19608 GemmMicrokernelTester()
19609 .mr(8)
19610 .nr(16)
19611 .kr(1)
19612 .sr(1)
19613 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019614 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019615 .k(k)
19616 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19617 }
19618 }
19619 }
19620
19621 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
19622 TEST_REQUIRES_X86_AVX512F;
19623 for (uint32_t n = 32; n <= 48; n += 16) {
19624 for (size_t k = 1; k <= 5; k += 2) {
19625 GemmMicrokernelTester()
19626 .mr(8)
19627 .nr(16)
19628 .kr(1)
19629 .sr(1)
19630 .m(8)
19631 .n(n)
19632 .k(k)
19633 .cn_stride(19)
19634 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19635 }
19636 }
19637 }
19638
19639 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_strided_a) {
19640 TEST_REQUIRES_X86_AVX512F;
19641 for (uint32_t n = 32; n <= 48; n += 16) {
19642 for (size_t k = 1; k <= 5; k += 2) {
19643 GemmMicrokernelTester()
19644 .mr(8)
19645 .nr(16)
19646 .kr(1)
19647 .sr(1)
19648 .m(8)
19649 .n(n)
19650 .k(k)
19651 .a_stride(7)
19652 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19653 }
19654 }
19655 }
19656
19657 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_subtile) {
19658 TEST_REQUIRES_X86_AVX512F;
19659 for (uint32_t n = 32; n <= 48; n += 16) {
19660 for (size_t k = 1; k <= 5; k += 2) {
19661 for (uint32_t m = 1; m <= 8; m++) {
19662 GemmMicrokernelTester()
19663 .mr(8)
19664 .nr(16)
19665 .kr(1)
19666 .sr(1)
19667 .m(m)
19668 .n(n)
19669 .k(k)
19670 .iterations(1)
19671 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19672 }
19673 }
19674 }
19675 }
19676
19677 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, strided_cm_subtile) {
19678 TEST_REQUIRES_X86_AVX512F;
19679 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019680 for (uint32_t n = 1; n <= 16; n++) {
19681 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019682 GemmMicrokernelTester()
19683 .mr(8)
19684 .nr(16)
19685 .kr(1)
19686 .sr(1)
19687 .m(m)
19688 .n(n)
19689 .k(k)
19690 .cm_stride(19)
19691 .iterations(1)
19692 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19693 }
19694 }
19695 }
19696 }
19697
19698 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, qmin) {
19699 TEST_REQUIRES_X86_AVX512F;
19700 GemmMicrokernelTester()
19701 .mr(8)
19702 .nr(16)
19703 .kr(1)
19704 .sr(1)
19705 .m(8)
19706 .n(16)
19707 .k(1)
19708 .qmin(128)
19709 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19710 }
19711
19712 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, qmax) {
19713 TEST_REQUIRES_X86_AVX512F;
19714 GemmMicrokernelTester()
19715 .mr(8)
19716 .nr(16)
19717 .kr(1)
19718 .sr(1)
19719 .m(8)
19720 .n(16)
19721 .k(1)
19722 .qmax(128)
19723 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19724 }
19725
19726 TEST(F32_GEMMINC_MINMAX_8X16__AVX512F_BROADCAST, strided_cm) {
19727 TEST_REQUIRES_X86_AVX512F;
19728 GemmMicrokernelTester()
19729 .mr(8)
19730 .nr(16)
19731 .kr(1)
19732 .sr(1)
19733 .m(8)
19734 .n(16)
19735 .k(1)
19736 .cm_stride(19)
19737 .Test(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
19738 }
19739#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19740
19741
19742#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19743 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
19744 GemmMicrokernelTester()
19745 .mr(1)
19746 .nr(8)
19747 .kr(1)
19748 .sr(1)
19749 .m(1)
19750 .n(8)
19751 .k(1)
19752 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19753 }
19754
19755 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
19756 GemmMicrokernelTester()
19757 .mr(1)
19758 .nr(8)
19759 .kr(1)
19760 .sr(1)
19761 .m(1)
19762 .n(8)
19763 .k(1)
19764 .cn_stride(11)
19765 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19766 }
19767
19768 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
19769 GemmMicrokernelTester()
19770 .mr(1)
19771 .nr(8)
19772 .kr(1)
19773 .sr(1)
19774 .m(1)
19775 .n(8)
19776 .k(1)
19777 .a_stride(3)
19778 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19779 }
19780
19781 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019782 for (uint32_t n = 1; n <= 8; n++) {
19783 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019784 GemmMicrokernelTester()
19785 .mr(1)
19786 .nr(8)
19787 .kr(1)
19788 .sr(1)
19789 .m(m)
19790 .n(n)
19791 .k(1)
19792 .iterations(1)
19793 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19794 }
19795 }
19796 }
19797
19798 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
19799 for (uint32_t m = 1; m <= 1; m++) {
19800 GemmMicrokernelTester()
19801 .mr(1)
19802 .nr(8)
19803 .kr(1)
19804 .sr(1)
19805 .m(m)
19806 .n(8)
19807 .k(1)
19808 .iterations(1)
19809 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19810 }
19811 }
19812
19813 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
19814 for (uint32_t n = 1; n <= 8; n++) {
19815 GemmMicrokernelTester()
19816 .mr(1)
19817 .nr(8)
19818 .kr(1)
19819 .sr(1)
19820 .m(1)
19821 .n(n)
19822 .k(1)
19823 .iterations(1)
19824 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19825 }
19826 }
19827
19828 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
19829 for (size_t k = 2; k < 10; k++) {
19830 GemmMicrokernelTester()
19831 .mr(1)
19832 .nr(8)
19833 .kr(1)
19834 .sr(1)
19835 .m(1)
19836 .n(8)
19837 .k(k)
19838 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19839 }
19840 }
19841
19842 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
19843 for (size_t k = 2; k < 10; k++) {
19844 GemmMicrokernelTester()
19845 .mr(1)
19846 .nr(8)
19847 .kr(1)
19848 .sr(1)
19849 .m(1)
19850 .n(8)
19851 .k(k)
19852 .a_stride(11)
19853 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19854 }
19855 }
19856
19857 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
19858 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019859 for (uint32_t n = 1; n <= 8; n++) {
19860 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019861 GemmMicrokernelTester()
19862 .mr(1)
19863 .nr(8)
19864 .kr(1)
19865 .sr(1)
19866 .m(m)
19867 .n(n)
19868 .k(k)
19869 .iterations(1)
19870 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19871 }
19872 }
19873 }
19874 }
19875
19876 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
19877 for (uint32_t n = 9; n < 16; n++) {
19878 for (size_t k = 1; k <= 5; k += 2) {
19879 GemmMicrokernelTester()
19880 .mr(1)
19881 .nr(8)
19882 .kr(1)
19883 .sr(1)
19884 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019885 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019886 .k(k)
19887 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19888 }
19889 }
19890 }
19891
19892 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
19893 for (uint32_t n = 9; n < 16; n++) {
19894 for (size_t k = 1; k <= 5; k += 2) {
19895 GemmMicrokernelTester()
19896 .mr(1)
19897 .nr(8)
19898 .kr(1)
19899 .sr(1)
19900 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019901 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019902 .k(k)
19903 .cn_stride(11)
19904 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19905 }
19906 }
19907 }
19908
19909 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
19910 for (uint32_t n = 9; n < 16; n++) {
19911 for (size_t k = 1; k <= 5; k += 2) {
19912 GemmMicrokernelTester()
19913 .mr(1)
19914 .nr(8)
19915 .kr(1)
19916 .sr(1)
19917 .m(1)
19918 .n(n)
19919 .k(k)
19920 .a_stride(7)
19921 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19922 }
19923 }
19924 }
19925
19926 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
19927 for (uint32_t n = 9; n < 16; n++) {
19928 for (size_t k = 1; k <= 5; k += 2) {
19929 for (uint32_t m = 1; m <= 1; m++) {
19930 GemmMicrokernelTester()
19931 .mr(1)
19932 .nr(8)
19933 .kr(1)
19934 .sr(1)
19935 .m(m)
19936 .n(n)
19937 .k(k)
19938 .iterations(1)
19939 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19940 }
19941 }
19942 }
19943 }
19944
19945 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
19946 for (uint32_t n = 16; n <= 24; n += 8) {
19947 for (size_t k = 1; k <= 5; k += 2) {
19948 GemmMicrokernelTester()
19949 .mr(1)
19950 .nr(8)
19951 .kr(1)
19952 .sr(1)
19953 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019954 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019955 .k(k)
19956 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19957 }
19958 }
19959 }
19960
19961 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
19962 for (uint32_t n = 16; n <= 24; n += 8) {
19963 for (size_t k = 1; k <= 5; k += 2) {
19964 GemmMicrokernelTester()
19965 .mr(1)
19966 .nr(8)
19967 .kr(1)
19968 .sr(1)
19969 .m(1)
19970 .n(n)
19971 .k(k)
19972 .cn_stride(11)
19973 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19974 }
19975 }
19976 }
19977
19978 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
19979 for (uint32_t n = 16; n <= 24; n += 8) {
19980 for (size_t k = 1; k <= 5; k += 2) {
19981 GemmMicrokernelTester()
19982 .mr(1)
19983 .nr(8)
19984 .kr(1)
19985 .sr(1)
19986 .m(1)
19987 .n(n)
19988 .k(k)
19989 .a_stride(7)
19990 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
19991 }
19992 }
19993 }
19994
19995 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
19996 for (uint32_t n = 16; n <= 24; n += 8) {
19997 for (size_t k = 1; k <= 5; k += 2) {
19998 for (uint32_t m = 1; m <= 1; m++) {
19999 GemmMicrokernelTester()
20000 .mr(1)
20001 .nr(8)
20002 .kr(1)
20003 .sr(1)
20004 .m(m)
20005 .n(n)
20006 .k(k)
20007 .iterations(1)
20008 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20009 }
20010 }
20011 }
20012 }
20013
20014 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
20015 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020016 for (uint32_t n = 1; n <= 8; n++) {
20017 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020018 GemmMicrokernelTester()
20019 .mr(1)
20020 .nr(8)
20021 .kr(1)
20022 .sr(1)
20023 .m(m)
20024 .n(n)
20025 .k(k)
20026 .cm_stride(11)
20027 .iterations(1)
20028 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20029 }
20030 }
20031 }
20032 }
20033
20034 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
20035 GemmMicrokernelTester()
20036 .mr(1)
20037 .nr(8)
20038 .kr(1)
20039 .sr(1)
20040 .m(1)
20041 .n(8)
20042 .k(1)
20043 .qmin(128)
20044 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20045 }
20046
20047 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
20048 GemmMicrokernelTester()
20049 .mr(1)
20050 .nr(8)
20051 .kr(1)
20052 .sr(1)
20053 .m(1)
20054 .n(8)
20055 .k(1)
20056 .qmax(128)
20057 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20058 }
20059
20060 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
20061 GemmMicrokernelTester()
20062 .mr(1)
20063 .nr(8)
20064 .kr(1)
20065 .sr(1)
20066 .m(1)
20067 .n(8)
20068 .k(1)
20069 .cm_stride(11)
20070 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20071 }
20072#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20073
20074
20075#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20076 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
20077 GemmMicrokernelTester()
20078 .mr(3)
20079 .nr(8)
20080 .kr(1)
20081 .sr(1)
20082 .m(3)
20083 .n(8)
20084 .k(1)
20085 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20086 }
20087
20088 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
20089 GemmMicrokernelTester()
20090 .mr(3)
20091 .nr(8)
20092 .kr(1)
20093 .sr(1)
20094 .m(3)
20095 .n(8)
20096 .k(1)
20097 .cn_stride(11)
20098 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20099 }
20100
20101 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
20102 GemmMicrokernelTester()
20103 .mr(3)
20104 .nr(8)
20105 .kr(1)
20106 .sr(1)
20107 .m(3)
20108 .n(8)
20109 .k(1)
20110 .a_stride(3)
20111 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20112 }
20113
20114 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020115 for (uint32_t n = 1; n <= 8; n++) {
20116 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020117 GemmMicrokernelTester()
20118 .mr(3)
20119 .nr(8)
20120 .kr(1)
20121 .sr(1)
20122 .m(m)
20123 .n(n)
20124 .k(1)
20125 .iterations(1)
20126 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20127 }
20128 }
20129 }
20130
20131 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
20132 for (uint32_t m = 1; m <= 3; m++) {
20133 GemmMicrokernelTester()
20134 .mr(3)
20135 .nr(8)
20136 .kr(1)
20137 .sr(1)
20138 .m(m)
20139 .n(8)
20140 .k(1)
20141 .iterations(1)
20142 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20143 }
20144 }
20145
20146 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
20147 for (uint32_t n = 1; n <= 8; n++) {
20148 GemmMicrokernelTester()
20149 .mr(3)
20150 .nr(8)
20151 .kr(1)
20152 .sr(1)
20153 .m(3)
20154 .n(n)
20155 .k(1)
20156 .iterations(1)
20157 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20158 }
20159 }
20160
20161 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
20162 for (size_t k = 2; k < 10; k++) {
20163 GemmMicrokernelTester()
20164 .mr(3)
20165 .nr(8)
20166 .kr(1)
20167 .sr(1)
20168 .m(3)
20169 .n(8)
20170 .k(k)
20171 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20172 }
20173 }
20174
20175 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
20176 for (size_t k = 2; k < 10; k++) {
20177 GemmMicrokernelTester()
20178 .mr(3)
20179 .nr(8)
20180 .kr(1)
20181 .sr(1)
20182 .m(3)
20183 .n(8)
20184 .k(k)
20185 .a_stride(11)
20186 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20187 }
20188 }
20189
20190 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
20191 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020192 for (uint32_t n = 1; n <= 8; n++) {
20193 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020194 GemmMicrokernelTester()
20195 .mr(3)
20196 .nr(8)
20197 .kr(1)
20198 .sr(1)
20199 .m(m)
20200 .n(n)
20201 .k(k)
20202 .iterations(1)
20203 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20204 }
20205 }
20206 }
20207 }
20208
20209 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
20210 for (uint32_t n = 9; n < 16; n++) {
20211 for (size_t k = 1; k <= 5; k += 2) {
20212 GemmMicrokernelTester()
20213 .mr(3)
20214 .nr(8)
20215 .kr(1)
20216 .sr(1)
20217 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020218 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020219 .k(k)
20220 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20221 }
20222 }
20223 }
20224
20225 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
20226 for (uint32_t n = 9; n < 16; n++) {
20227 for (size_t k = 1; k <= 5; k += 2) {
20228 GemmMicrokernelTester()
20229 .mr(3)
20230 .nr(8)
20231 .kr(1)
20232 .sr(1)
20233 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020234 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020235 .k(k)
20236 .cn_stride(11)
20237 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20238 }
20239 }
20240 }
20241
20242 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
20243 for (uint32_t n = 9; n < 16; n++) {
20244 for (size_t k = 1; k <= 5; k += 2) {
20245 GemmMicrokernelTester()
20246 .mr(3)
20247 .nr(8)
20248 .kr(1)
20249 .sr(1)
20250 .m(3)
20251 .n(n)
20252 .k(k)
20253 .a_stride(7)
20254 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20255 }
20256 }
20257 }
20258
20259 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
20260 for (uint32_t n = 9; n < 16; n++) {
20261 for (size_t k = 1; k <= 5; k += 2) {
20262 for (uint32_t m = 1; m <= 3; m++) {
20263 GemmMicrokernelTester()
20264 .mr(3)
20265 .nr(8)
20266 .kr(1)
20267 .sr(1)
20268 .m(m)
20269 .n(n)
20270 .k(k)
20271 .iterations(1)
20272 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20273 }
20274 }
20275 }
20276 }
20277
20278 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
20279 for (uint32_t n = 16; n <= 24; n += 8) {
20280 for (size_t k = 1; k <= 5; k += 2) {
20281 GemmMicrokernelTester()
20282 .mr(3)
20283 .nr(8)
20284 .kr(1)
20285 .sr(1)
20286 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020287 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020288 .k(k)
20289 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20290 }
20291 }
20292 }
20293
20294 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
20295 for (uint32_t n = 16; n <= 24; n += 8) {
20296 for (size_t k = 1; k <= 5; k += 2) {
20297 GemmMicrokernelTester()
20298 .mr(3)
20299 .nr(8)
20300 .kr(1)
20301 .sr(1)
20302 .m(3)
20303 .n(n)
20304 .k(k)
20305 .cn_stride(11)
20306 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20307 }
20308 }
20309 }
20310
20311 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
20312 for (uint32_t n = 16; n <= 24; n += 8) {
20313 for (size_t k = 1; k <= 5; k += 2) {
20314 GemmMicrokernelTester()
20315 .mr(3)
20316 .nr(8)
20317 .kr(1)
20318 .sr(1)
20319 .m(3)
20320 .n(n)
20321 .k(k)
20322 .a_stride(7)
20323 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20324 }
20325 }
20326 }
20327
20328 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
20329 for (uint32_t n = 16; n <= 24; n += 8) {
20330 for (size_t k = 1; k <= 5; k += 2) {
20331 for (uint32_t m = 1; m <= 3; m++) {
20332 GemmMicrokernelTester()
20333 .mr(3)
20334 .nr(8)
20335 .kr(1)
20336 .sr(1)
20337 .m(m)
20338 .n(n)
20339 .k(k)
20340 .iterations(1)
20341 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20342 }
20343 }
20344 }
20345 }
20346
20347 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
20348 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020349 for (uint32_t n = 1; n <= 8; n++) {
20350 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020351 GemmMicrokernelTester()
20352 .mr(3)
20353 .nr(8)
20354 .kr(1)
20355 .sr(1)
20356 .m(m)
20357 .n(n)
20358 .k(k)
20359 .cm_stride(11)
20360 .iterations(1)
20361 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20362 }
20363 }
20364 }
20365 }
20366
20367 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
20368 GemmMicrokernelTester()
20369 .mr(3)
20370 .nr(8)
20371 .kr(1)
20372 .sr(1)
20373 .m(3)
20374 .n(8)
20375 .k(1)
20376 .qmin(128)
20377 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20378 }
20379
20380 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
20381 GemmMicrokernelTester()
20382 .mr(3)
20383 .nr(8)
20384 .kr(1)
20385 .sr(1)
20386 .m(3)
20387 .n(8)
20388 .k(1)
20389 .qmax(128)
20390 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20391 }
20392
20393 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
20394 GemmMicrokernelTester()
20395 .mr(3)
20396 .nr(8)
20397 .kr(1)
20398 .sr(1)
20399 .m(3)
20400 .n(8)
20401 .k(1)
20402 .cm_stride(11)
20403 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20404 }
20405#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20406
20407
20408#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20409 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
20410 GemmMicrokernelTester()
20411 .mr(4)
20412 .nr(8)
20413 .kr(1)
20414 .sr(1)
20415 .m(4)
20416 .n(8)
20417 .k(1)
20418 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20419 }
20420
20421 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
20422 GemmMicrokernelTester()
20423 .mr(4)
20424 .nr(8)
20425 .kr(1)
20426 .sr(1)
20427 .m(4)
20428 .n(8)
20429 .k(1)
20430 .cn_stride(11)
20431 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20432 }
20433
20434 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
20435 GemmMicrokernelTester()
20436 .mr(4)
20437 .nr(8)
20438 .kr(1)
20439 .sr(1)
20440 .m(4)
20441 .n(8)
20442 .k(1)
20443 .a_stride(3)
20444 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20445 }
20446
20447 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020448 for (uint32_t n = 1; n <= 8; n++) {
20449 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020450 GemmMicrokernelTester()
20451 .mr(4)
20452 .nr(8)
20453 .kr(1)
20454 .sr(1)
20455 .m(m)
20456 .n(n)
20457 .k(1)
20458 .iterations(1)
20459 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20460 }
20461 }
20462 }
20463
20464 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
20465 for (uint32_t m = 1; m <= 4; m++) {
20466 GemmMicrokernelTester()
20467 .mr(4)
20468 .nr(8)
20469 .kr(1)
20470 .sr(1)
20471 .m(m)
20472 .n(8)
20473 .k(1)
20474 .iterations(1)
20475 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20476 }
20477 }
20478
20479 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
20480 for (uint32_t n = 1; n <= 8; n++) {
20481 GemmMicrokernelTester()
20482 .mr(4)
20483 .nr(8)
20484 .kr(1)
20485 .sr(1)
20486 .m(4)
20487 .n(n)
20488 .k(1)
20489 .iterations(1)
20490 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20491 }
20492 }
20493
20494 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
20495 for (size_t k = 2; k < 10; k++) {
20496 GemmMicrokernelTester()
20497 .mr(4)
20498 .nr(8)
20499 .kr(1)
20500 .sr(1)
20501 .m(4)
20502 .n(8)
20503 .k(k)
20504 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20505 }
20506 }
20507
20508 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
20509 for (size_t k = 2; k < 10; k++) {
20510 GemmMicrokernelTester()
20511 .mr(4)
20512 .nr(8)
20513 .kr(1)
20514 .sr(1)
20515 .m(4)
20516 .n(8)
20517 .k(k)
20518 .a_stride(11)
20519 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20520 }
20521 }
20522
20523 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
20524 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020525 for (uint32_t n = 1; n <= 8; n++) {
20526 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020527 GemmMicrokernelTester()
20528 .mr(4)
20529 .nr(8)
20530 .kr(1)
20531 .sr(1)
20532 .m(m)
20533 .n(n)
20534 .k(k)
20535 .iterations(1)
20536 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20537 }
20538 }
20539 }
20540 }
20541
20542 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
20543 for (uint32_t n = 9; n < 16; n++) {
20544 for (size_t k = 1; k <= 5; k += 2) {
20545 GemmMicrokernelTester()
20546 .mr(4)
20547 .nr(8)
20548 .kr(1)
20549 .sr(1)
20550 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020551 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020552 .k(k)
20553 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20554 }
20555 }
20556 }
20557
20558 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
20559 for (uint32_t n = 9; n < 16; n++) {
20560 for (size_t k = 1; k <= 5; k += 2) {
20561 GemmMicrokernelTester()
20562 .mr(4)
20563 .nr(8)
20564 .kr(1)
20565 .sr(1)
20566 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020567 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020568 .k(k)
20569 .cn_stride(11)
20570 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20571 }
20572 }
20573 }
20574
20575 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
20576 for (uint32_t n = 9; n < 16; n++) {
20577 for (size_t k = 1; k <= 5; k += 2) {
20578 GemmMicrokernelTester()
20579 .mr(4)
20580 .nr(8)
20581 .kr(1)
20582 .sr(1)
20583 .m(4)
20584 .n(n)
20585 .k(k)
20586 .a_stride(7)
20587 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20588 }
20589 }
20590 }
20591
20592 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
20593 for (uint32_t n = 9; n < 16; n++) {
20594 for (size_t k = 1; k <= 5; k += 2) {
20595 for (uint32_t m = 1; m <= 4; m++) {
20596 GemmMicrokernelTester()
20597 .mr(4)
20598 .nr(8)
20599 .kr(1)
20600 .sr(1)
20601 .m(m)
20602 .n(n)
20603 .k(k)
20604 .iterations(1)
20605 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20606 }
20607 }
20608 }
20609 }
20610
20611 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
20612 for (uint32_t n = 16; n <= 24; n += 8) {
20613 for (size_t k = 1; k <= 5; k += 2) {
20614 GemmMicrokernelTester()
20615 .mr(4)
20616 .nr(8)
20617 .kr(1)
20618 .sr(1)
20619 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020620 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020621 .k(k)
20622 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20623 }
20624 }
20625 }
20626
20627 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
20628 for (uint32_t n = 16; n <= 24; n += 8) {
20629 for (size_t k = 1; k <= 5; k += 2) {
20630 GemmMicrokernelTester()
20631 .mr(4)
20632 .nr(8)
20633 .kr(1)
20634 .sr(1)
20635 .m(4)
20636 .n(n)
20637 .k(k)
20638 .cn_stride(11)
20639 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20640 }
20641 }
20642 }
20643
20644 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
20645 for (uint32_t n = 16; n <= 24; n += 8) {
20646 for (size_t k = 1; k <= 5; k += 2) {
20647 GemmMicrokernelTester()
20648 .mr(4)
20649 .nr(8)
20650 .kr(1)
20651 .sr(1)
20652 .m(4)
20653 .n(n)
20654 .k(k)
20655 .a_stride(7)
20656 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20657 }
20658 }
20659 }
20660
20661 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
20662 for (uint32_t n = 16; n <= 24; n += 8) {
20663 for (size_t k = 1; k <= 5; k += 2) {
20664 for (uint32_t m = 1; m <= 4; m++) {
20665 GemmMicrokernelTester()
20666 .mr(4)
20667 .nr(8)
20668 .kr(1)
20669 .sr(1)
20670 .m(m)
20671 .n(n)
20672 .k(k)
20673 .iterations(1)
20674 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20675 }
20676 }
20677 }
20678 }
20679
20680 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
20681 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020682 for (uint32_t n = 1; n <= 8; n++) {
20683 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020684 GemmMicrokernelTester()
20685 .mr(4)
20686 .nr(8)
20687 .kr(1)
20688 .sr(1)
20689 .m(m)
20690 .n(n)
20691 .k(k)
20692 .cm_stride(11)
20693 .iterations(1)
20694 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20695 }
20696 }
20697 }
20698 }
20699
20700 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
20701 GemmMicrokernelTester()
20702 .mr(4)
20703 .nr(8)
20704 .kr(1)
20705 .sr(1)
20706 .m(4)
20707 .n(8)
20708 .k(1)
20709 .qmin(128)
20710 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20711 }
20712
20713 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
20714 GemmMicrokernelTester()
20715 .mr(4)
20716 .nr(8)
20717 .kr(1)
20718 .sr(1)
20719 .m(4)
20720 .n(8)
20721 .k(1)
20722 .qmax(128)
20723 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20724 }
20725
20726 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
20727 GemmMicrokernelTester()
20728 .mr(4)
20729 .nr(8)
20730 .kr(1)
20731 .sr(1)
20732 .m(4)
20733 .n(8)
20734 .k(1)
20735 .cm_stride(11)
20736 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20737 }
20738#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20739
20740
20741#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20742 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
20743 GemmMicrokernelTester()
20744 .mr(6)
20745 .nr(8)
20746 .kr(1)
20747 .sr(1)
20748 .m(6)
20749 .n(8)
20750 .k(1)
20751 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20752 }
20753
20754 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
20755 GemmMicrokernelTester()
20756 .mr(6)
20757 .nr(8)
20758 .kr(1)
20759 .sr(1)
20760 .m(6)
20761 .n(8)
20762 .k(1)
20763 .cn_stride(11)
20764 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20765 }
20766
20767 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
20768 GemmMicrokernelTester()
20769 .mr(6)
20770 .nr(8)
20771 .kr(1)
20772 .sr(1)
20773 .m(6)
20774 .n(8)
20775 .k(1)
20776 .a_stride(3)
20777 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20778 }
20779
20780 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020781 for (uint32_t n = 1; n <= 8; n++) {
20782 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020783 GemmMicrokernelTester()
20784 .mr(6)
20785 .nr(8)
20786 .kr(1)
20787 .sr(1)
20788 .m(m)
20789 .n(n)
20790 .k(1)
20791 .iterations(1)
20792 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20793 }
20794 }
20795 }
20796
20797 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
20798 for (uint32_t m = 1; m <= 6; m++) {
20799 GemmMicrokernelTester()
20800 .mr(6)
20801 .nr(8)
20802 .kr(1)
20803 .sr(1)
20804 .m(m)
20805 .n(8)
20806 .k(1)
20807 .iterations(1)
20808 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20809 }
20810 }
20811
20812 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
20813 for (uint32_t n = 1; n <= 8; n++) {
20814 GemmMicrokernelTester()
20815 .mr(6)
20816 .nr(8)
20817 .kr(1)
20818 .sr(1)
20819 .m(6)
20820 .n(n)
20821 .k(1)
20822 .iterations(1)
20823 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20824 }
20825 }
20826
20827 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
20828 for (size_t k = 2; k < 10; k++) {
20829 GemmMicrokernelTester()
20830 .mr(6)
20831 .nr(8)
20832 .kr(1)
20833 .sr(1)
20834 .m(6)
20835 .n(8)
20836 .k(k)
20837 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20838 }
20839 }
20840
20841 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
20842 for (size_t k = 2; k < 10; k++) {
20843 GemmMicrokernelTester()
20844 .mr(6)
20845 .nr(8)
20846 .kr(1)
20847 .sr(1)
20848 .m(6)
20849 .n(8)
20850 .k(k)
20851 .a_stride(11)
20852 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20853 }
20854 }
20855
20856 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
20857 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020858 for (uint32_t n = 1; n <= 8; n++) {
20859 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020860 GemmMicrokernelTester()
20861 .mr(6)
20862 .nr(8)
20863 .kr(1)
20864 .sr(1)
20865 .m(m)
20866 .n(n)
20867 .k(k)
20868 .iterations(1)
20869 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20870 }
20871 }
20872 }
20873 }
20874
20875 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
20876 for (uint32_t n = 9; n < 16; n++) {
20877 for (size_t k = 1; k <= 5; k += 2) {
20878 GemmMicrokernelTester()
20879 .mr(6)
20880 .nr(8)
20881 .kr(1)
20882 .sr(1)
20883 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020884 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020885 .k(k)
20886 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20887 }
20888 }
20889 }
20890
20891 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
20892 for (uint32_t n = 9; n < 16; n++) {
20893 for (size_t k = 1; k <= 5; k += 2) {
20894 GemmMicrokernelTester()
20895 .mr(6)
20896 .nr(8)
20897 .kr(1)
20898 .sr(1)
20899 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020900 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020901 .k(k)
20902 .cn_stride(11)
20903 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20904 }
20905 }
20906 }
20907
20908 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
20909 for (uint32_t n = 9; n < 16; n++) {
20910 for (size_t k = 1; k <= 5; k += 2) {
20911 GemmMicrokernelTester()
20912 .mr(6)
20913 .nr(8)
20914 .kr(1)
20915 .sr(1)
20916 .m(6)
20917 .n(n)
20918 .k(k)
20919 .a_stride(7)
20920 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20921 }
20922 }
20923 }
20924
20925 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
20926 for (uint32_t n = 9; n < 16; n++) {
20927 for (size_t k = 1; k <= 5; k += 2) {
20928 for (uint32_t m = 1; m <= 6; m++) {
20929 GemmMicrokernelTester()
20930 .mr(6)
20931 .nr(8)
20932 .kr(1)
20933 .sr(1)
20934 .m(m)
20935 .n(n)
20936 .k(k)
20937 .iterations(1)
20938 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20939 }
20940 }
20941 }
20942 }
20943
20944 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
20945 for (uint32_t n = 16; n <= 24; n += 8) {
20946 for (size_t k = 1; k <= 5; k += 2) {
20947 GemmMicrokernelTester()
20948 .mr(6)
20949 .nr(8)
20950 .kr(1)
20951 .sr(1)
20952 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020953 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020954 .k(k)
20955 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20956 }
20957 }
20958 }
20959
20960 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
20961 for (uint32_t n = 16; n <= 24; n += 8) {
20962 for (size_t k = 1; k <= 5; k += 2) {
20963 GemmMicrokernelTester()
20964 .mr(6)
20965 .nr(8)
20966 .kr(1)
20967 .sr(1)
20968 .m(6)
20969 .n(n)
20970 .k(k)
20971 .cn_stride(11)
20972 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20973 }
20974 }
20975 }
20976
20977 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
20978 for (uint32_t n = 16; n <= 24; n += 8) {
20979 for (size_t k = 1; k <= 5; k += 2) {
20980 GemmMicrokernelTester()
20981 .mr(6)
20982 .nr(8)
20983 .kr(1)
20984 .sr(1)
20985 .m(6)
20986 .n(n)
20987 .k(k)
20988 .a_stride(7)
20989 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
20990 }
20991 }
20992 }
20993
20994 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
20995 for (uint32_t n = 16; n <= 24; n += 8) {
20996 for (size_t k = 1; k <= 5; k += 2) {
20997 for (uint32_t m = 1; m <= 6; m++) {
20998 GemmMicrokernelTester()
20999 .mr(6)
21000 .nr(8)
21001 .kr(1)
21002 .sr(1)
21003 .m(m)
21004 .n(n)
21005 .k(k)
21006 .iterations(1)
21007 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21008 }
21009 }
21010 }
21011 }
21012
21013 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
21014 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021015 for (uint32_t n = 1; n <= 8; n++) {
21016 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021017 GemmMicrokernelTester()
21018 .mr(6)
21019 .nr(8)
21020 .kr(1)
21021 .sr(1)
21022 .m(m)
21023 .n(n)
21024 .k(k)
21025 .cm_stride(11)
21026 .iterations(1)
21027 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21028 }
21029 }
21030 }
21031 }
21032
21033 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
21034 GemmMicrokernelTester()
21035 .mr(6)
21036 .nr(8)
21037 .kr(1)
21038 .sr(1)
21039 .m(6)
21040 .n(8)
21041 .k(1)
21042 .qmin(128)
21043 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21044 }
21045
21046 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
21047 GemmMicrokernelTester()
21048 .mr(6)
21049 .nr(8)
21050 .kr(1)
21051 .sr(1)
21052 .m(6)
21053 .n(8)
21054 .k(1)
21055 .qmax(128)
21056 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21057 }
21058
21059 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
21060 GemmMicrokernelTester()
21061 .mr(6)
21062 .nr(8)
21063 .kr(1)
21064 .sr(1)
21065 .m(6)
21066 .n(8)
21067 .k(1)
21068 .cm_stride(11)
21069 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21070 }
21071#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21072
21073
21074#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21075 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
21076 GemmMicrokernelTester()
21077 .mr(5)
21078 .nr(8)
21079 .kr(1)
21080 .sr(1)
21081 .m(5)
21082 .n(8)
21083 .k(1)
21084 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21085 }
21086
21087 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
21088 GemmMicrokernelTester()
21089 .mr(5)
21090 .nr(8)
21091 .kr(1)
21092 .sr(1)
21093 .m(5)
21094 .n(8)
21095 .k(1)
21096 .cn_stride(11)
21097 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21098 }
21099
21100 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
21101 GemmMicrokernelTester()
21102 .mr(5)
21103 .nr(8)
21104 .kr(1)
21105 .sr(1)
21106 .m(5)
21107 .n(8)
21108 .k(1)
21109 .a_stride(3)
21110 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21111 }
21112
21113 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021114 for (uint32_t n = 1; n <= 8; n++) {
21115 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021116 GemmMicrokernelTester()
21117 .mr(5)
21118 .nr(8)
21119 .kr(1)
21120 .sr(1)
21121 .m(m)
21122 .n(n)
21123 .k(1)
21124 .iterations(1)
21125 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21126 }
21127 }
21128 }
21129
21130 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
21131 for (uint32_t m = 1; m <= 5; m++) {
21132 GemmMicrokernelTester()
21133 .mr(5)
21134 .nr(8)
21135 .kr(1)
21136 .sr(1)
21137 .m(m)
21138 .n(8)
21139 .k(1)
21140 .iterations(1)
21141 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21142 }
21143 }
21144
21145 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
21146 for (uint32_t n = 1; n <= 8; n++) {
21147 GemmMicrokernelTester()
21148 .mr(5)
21149 .nr(8)
21150 .kr(1)
21151 .sr(1)
21152 .m(5)
21153 .n(n)
21154 .k(1)
21155 .iterations(1)
21156 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21157 }
21158 }
21159
21160 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
21161 for (size_t k = 2; k < 10; k++) {
21162 GemmMicrokernelTester()
21163 .mr(5)
21164 .nr(8)
21165 .kr(1)
21166 .sr(1)
21167 .m(5)
21168 .n(8)
21169 .k(k)
21170 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21171 }
21172 }
21173
21174 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
21175 for (size_t k = 2; k < 10; k++) {
21176 GemmMicrokernelTester()
21177 .mr(5)
21178 .nr(8)
21179 .kr(1)
21180 .sr(1)
21181 .m(5)
21182 .n(8)
21183 .k(k)
21184 .a_stride(11)
21185 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21186 }
21187 }
21188
21189 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
21190 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021191 for (uint32_t n = 1; n <= 8; n++) {
21192 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021193 GemmMicrokernelTester()
21194 .mr(5)
21195 .nr(8)
21196 .kr(1)
21197 .sr(1)
21198 .m(m)
21199 .n(n)
21200 .k(k)
21201 .iterations(1)
21202 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21203 }
21204 }
21205 }
21206 }
21207
21208 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
21209 for (uint32_t n = 9; n < 16; n++) {
21210 for (size_t k = 1; k <= 5; k += 2) {
21211 GemmMicrokernelTester()
21212 .mr(5)
21213 .nr(8)
21214 .kr(1)
21215 .sr(1)
21216 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021217 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021218 .k(k)
21219 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21220 }
21221 }
21222 }
21223
21224 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
21225 for (uint32_t n = 9; n < 16; n++) {
21226 for (size_t k = 1; k <= 5; k += 2) {
21227 GemmMicrokernelTester()
21228 .mr(5)
21229 .nr(8)
21230 .kr(1)
21231 .sr(1)
21232 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021233 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021234 .k(k)
21235 .cn_stride(11)
21236 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21237 }
21238 }
21239 }
21240
21241 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
21242 for (uint32_t n = 9; n < 16; n++) {
21243 for (size_t k = 1; k <= 5; k += 2) {
21244 GemmMicrokernelTester()
21245 .mr(5)
21246 .nr(8)
21247 .kr(1)
21248 .sr(1)
21249 .m(5)
21250 .n(n)
21251 .k(k)
21252 .a_stride(7)
21253 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21254 }
21255 }
21256 }
21257
21258 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
21259 for (uint32_t n = 9; n < 16; n++) {
21260 for (size_t k = 1; k <= 5; k += 2) {
21261 for (uint32_t m = 1; m <= 5; m++) {
21262 GemmMicrokernelTester()
21263 .mr(5)
21264 .nr(8)
21265 .kr(1)
21266 .sr(1)
21267 .m(m)
21268 .n(n)
21269 .k(k)
21270 .iterations(1)
21271 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21272 }
21273 }
21274 }
21275 }
21276
21277 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
21278 for (uint32_t n = 16; n <= 24; n += 8) {
21279 for (size_t k = 1; k <= 5; k += 2) {
21280 GemmMicrokernelTester()
21281 .mr(5)
21282 .nr(8)
21283 .kr(1)
21284 .sr(1)
21285 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021286 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021287 .k(k)
21288 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21289 }
21290 }
21291 }
21292
21293 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
21294 for (uint32_t n = 16; n <= 24; n += 8) {
21295 for (size_t k = 1; k <= 5; k += 2) {
21296 GemmMicrokernelTester()
21297 .mr(5)
21298 .nr(8)
21299 .kr(1)
21300 .sr(1)
21301 .m(5)
21302 .n(n)
21303 .k(k)
21304 .cn_stride(11)
21305 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21306 }
21307 }
21308 }
21309
21310 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
21311 for (uint32_t n = 16; n <= 24; n += 8) {
21312 for (size_t k = 1; k <= 5; k += 2) {
21313 GemmMicrokernelTester()
21314 .mr(5)
21315 .nr(8)
21316 .kr(1)
21317 .sr(1)
21318 .m(5)
21319 .n(n)
21320 .k(k)
21321 .a_stride(7)
21322 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21323 }
21324 }
21325 }
21326
21327 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
21328 for (uint32_t n = 16; n <= 24; n += 8) {
21329 for (size_t k = 1; k <= 5; k += 2) {
21330 for (uint32_t m = 1; m <= 5; m++) {
21331 GemmMicrokernelTester()
21332 .mr(5)
21333 .nr(8)
21334 .kr(1)
21335 .sr(1)
21336 .m(m)
21337 .n(n)
21338 .k(k)
21339 .iterations(1)
21340 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21341 }
21342 }
21343 }
21344 }
21345
21346 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
21347 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021348 for (uint32_t n = 1; n <= 8; n++) {
21349 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021350 GemmMicrokernelTester()
21351 .mr(5)
21352 .nr(8)
21353 .kr(1)
21354 .sr(1)
21355 .m(m)
21356 .n(n)
21357 .k(k)
21358 .cm_stride(11)
21359 .iterations(1)
21360 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21361 }
21362 }
21363 }
21364 }
21365
21366 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, qmin) {
21367 GemmMicrokernelTester()
21368 .mr(5)
21369 .nr(8)
21370 .kr(1)
21371 .sr(1)
21372 .m(5)
21373 .n(8)
21374 .k(1)
21375 .qmin(128)
21376 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21377 }
21378
21379 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, qmax) {
21380 GemmMicrokernelTester()
21381 .mr(5)
21382 .nr(8)
21383 .kr(1)
21384 .sr(1)
21385 .m(5)
21386 .n(8)
21387 .k(1)
21388 .qmax(128)
21389 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21390 }
21391
21392 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
21393 GemmMicrokernelTester()
21394 .mr(5)
21395 .nr(8)
21396 .kr(1)
21397 .sr(1)
21398 .m(5)
21399 .n(8)
21400 .k(1)
21401 .cm_stride(11)
21402 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21403 }
21404#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21405
21406
21407#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21408 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
21409 GemmMicrokernelTester()
21410 .mr(1)
21411 .nr(8)
21412 .kr(1)
21413 .sr(1)
21414 .m(1)
21415 .n(8)
21416 .k(4)
21417 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21418 }
21419
21420 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cn) {
21421 GemmMicrokernelTester()
21422 .mr(1)
21423 .nr(8)
21424 .kr(1)
21425 .sr(1)
21426 .m(1)
21427 .n(8)
21428 .k(4)
21429 .cn_stride(11)
21430 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21431 }
21432
21433 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
21434 GemmMicrokernelTester()
21435 .mr(1)
21436 .nr(8)
21437 .kr(1)
21438 .sr(1)
21439 .m(1)
21440 .n(8)
21441 .k(4)
21442 .a_stride(7)
21443 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21444 }
21445
21446 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021447 for (uint32_t n = 1; n <= 8; n++) {
21448 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021449 GemmMicrokernelTester()
21450 .mr(1)
21451 .nr(8)
21452 .kr(1)
21453 .sr(1)
21454 .m(m)
21455 .n(n)
21456 .k(4)
21457 .iterations(1)
21458 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21459 }
21460 }
21461 }
21462
21463 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
21464 for (uint32_t m = 1; m <= 1; m++) {
21465 GemmMicrokernelTester()
21466 .mr(1)
21467 .nr(8)
21468 .kr(1)
21469 .sr(1)
21470 .m(m)
21471 .n(8)
21472 .k(4)
21473 .iterations(1)
21474 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21475 }
21476 }
21477
21478 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
21479 for (uint32_t n = 1; n <= 8; n++) {
21480 GemmMicrokernelTester()
21481 .mr(1)
21482 .nr(8)
21483 .kr(1)
21484 .sr(1)
21485 .m(1)
21486 .n(n)
21487 .k(4)
21488 .iterations(1)
21489 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21490 }
21491 }
21492
21493 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
21494 for (size_t k = 1; k < 4; k++) {
21495 GemmMicrokernelTester()
21496 .mr(1)
21497 .nr(8)
21498 .kr(1)
21499 .sr(1)
21500 .m(1)
21501 .n(8)
21502 .k(k)
21503 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21504 }
21505 }
21506
21507 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
21508 for (size_t k = 1; k < 4; k++) {
21509 GemmMicrokernelTester()
21510 .mr(1)
21511 .nr(8)
21512 .kr(1)
21513 .sr(1)
21514 .m(1)
21515 .n(8)
21516 .k(k)
21517 .a_stride(7)
21518 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21519 }
21520 }
21521
21522 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
21523 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021524 for (uint32_t n = 1; n <= 8; n++) {
21525 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021526 GemmMicrokernelTester()
21527 .mr(1)
21528 .nr(8)
21529 .kr(1)
21530 .sr(1)
21531 .m(m)
21532 .n(n)
21533 .k(k)
21534 .iterations(1)
21535 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21536 }
21537 }
21538 }
21539 }
21540
21541 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
21542 for (size_t k = 5; k < 8; k++) {
21543 GemmMicrokernelTester()
21544 .mr(1)
21545 .nr(8)
21546 .kr(1)
21547 .sr(1)
21548 .m(1)
21549 .n(8)
21550 .k(k)
21551 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21552 }
21553 }
21554
21555 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
21556 for (size_t k = 5; k < 8; k++) {
21557 GemmMicrokernelTester()
21558 .mr(1)
21559 .nr(8)
21560 .kr(1)
21561 .sr(1)
21562 .m(1)
21563 .n(8)
21564 .k(k)
21565 .a_stride(11)
21566 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21567 }
21568 }
21569
21570 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
21571 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021572 for (uint32_t n = 1; n <= 8; n++) {
21573 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021574 GemmMicrokernelTester()
21575 .mr(1)
21576 .nr(8)
21577 .kr(1)
21578 .sr(1)
21579 .m(m)
21580 .n(n)
21581 .k(k)
21582 .iterations(1)
21583 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21584 }
21585 }
21586 }
21587 }
21588
21589 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4) {
21590 for (size_t k = 8; k <= 40; k += 4) {
21591 GemmMicrokernelTester()
21592 .mr(1)
21593 .nr(8)
21594 .kr(1)
21595 .sr(1)
21596 .m(1)
21597 .n(8)
21598 .k(k)
21599 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21600 }
21601 }
21602
21603 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
21604 for (size_t k = 8; k <= 40; k += 4) {
21605 GemmMicrokernelTester()
21606 .mr(1)
21607 .nr(8)
21608 .kr(1)
21609 .sr(1)
21610 .m(1)
21611 .n(8)
21612 .k(k)
21613 .a_stride(43)
21614 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21615 }
21616 }
21617
21618 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
21619 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021620 for (uint32_t n = 1; n <= 8; n++) {
21621 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021622 GemmMicrokernelTester()
21623 .mr(1)
21624 .nr(8)
21625 .kr(1)
21626 .sr(1)
21627 .m(m)
21628 .n(n)
21629 .k(k)
21630 .iterations(1)
21631 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21632 }
21633 }
21634 }
21635 }
21636
21637 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
21638 for (uint32_t n = 9; n < 16; n++) {
21639 for (size_t k = 1; k <= 20; k += 5) {
21640 GemmMicrokernelTester()
21641 .mr(1)
21642 .nr(8)
21643 .kr(1)
21644 .sr(1)
21645 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021646 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021647 .k(k)
21648 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21649 }
21650 }
21651 }
21652
21653 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
21654 for (uint32_t n = 9; n < 16; n++) {
21655 for (size_t k = 1; k <= 20; k += 5) {
21656 GemmMicrokernelTester()
21657 .mr(1)
21658 .nr(8)
21659 .kr(1)
21660 .sr(1)
21661 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021662 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021663 .k(k)
21664 .cn_stride(11)
21665 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21666 }
21667 }
21668 }
21669
21670 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
21671 for (uint32_t n = 9; n < 16; n++) {
21672 for (size_t k = 1; k <= 20; k += 5) {
21673 GemmMicrokernelTester()
21674 .mr(1)
21675 .nr(8)
21676 .kr(1)
21677 .sr(1)
21678 .m(1)
21679 .n(n)
21680 .k(k)
21681 .a_stride(23)
21682 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21683 }
21684 }
21685 }
21686
21687 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
21688 for (uint32_t n = 9; n < 16; n++) {
21689 for (size_t k = 1; k <= 20; k += 5) {
21690 for (uint32_t m = 1; m <= 1; m++) {
21691 GemmMicrokernelTester()
21692 .mr(1)
21693 .nr(8)
21694 .kr(1)
21695 .sr(1)
21696 .m(m)
21697 .n(n)
21698 .k(k)
21699 .iterations(1)
21700 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21701 }
21702 }
21703 }
21704 }
21705
21706 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8) {
21707 for (uint32_t n = 16; n <= 24; n += 8) {
21708 for (size_t k = 1; k <= 20; k += 5) {
21709 GemmMicrokernelTester()
21710 .mr(1)
21711 .nr(8)
21712 .kr(1)
21713 .sr(1)
21714 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021715 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021716 .k(k)
21717 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21718 }
21719 }
21720 }
21721
21722 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
21723 for (uint32_t n = 16; n <= 24; n += 8) {
21724 for (size_t k = 1; k <= 20; k += 5) {
21725 GemmMicrokernelTester()
21726 .mr(1)
21727 .nr(8)
21728 .kr(1)
21729 .sr(1)
21730 .m(1)
21731 .n(n)
21732 .k(k)
21733 .cn_stride(11)
21734 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21735 }
21736 }
21737 }
21738
21739 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
21740 for (uint32_t n = 16; n <= 24; n += 8) {
21741 for (size_t k = 1; k <= 20; k += 5) {
21742 GemmMicrokernelTester()
21743 .mr(1)
21744 .nr(8)
21745 .kr(1)
21746 .sr(1)
21747 .m(1)
21748 .n(n)
21749 .k(k)
21750 .a_stride(23)
21751 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21752 }
21753 }
21754 }
21755
21756 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
21757 for (uint32_t n = 16; n <= 24; n += 8) {
21758 for (size_t k = 1; k <= 20; k += 5) {
21759 for (uint32_t m = 1; m <= 1; m++) {
21760 GemmMicrokernelTester()
21761 .mr(1)
21762 .nr(8)
21763 .kr(1)
21764 .sr(1)
21765 .m(m)
21766 .n(n)
21767 .k(k)
21768 .iterations(1)
21769 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21770 }
21771 }
21772 }
21773 }
21774
21775 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
21776 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021777 for (uint32_t n = 1; n <= 8; n++) {
21778 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021779 GemmMicrokernelTester()
21780 .mr(1)
21781 .nr(8)
21782 .kr(1)
21783 .sr(1)
21784 .m(m)
21785 .n(n)
21786 .k(k)
21787 .cm_stride(11)
21788 .iterations(1)
21789 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21790 }
21791 }
21792 }
21793 }
21794
21795 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmin) {
21796 GemmMicrokernelTester()
21797 .mr(1)
21798 .nr(8)
21799 .kr(1)
21800 .sr(1)
21801 .m(1)
21802 .n(8)
21803 .k(4)
21804 .qmin(128)
21805 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21806 }
21807
21808 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmax) {
21809 GemmMicrokernelTester()
21810 .mr(1)
21811 .nr(8)
21812 .kr(1)
21813 .sr(1)
21814 .m(1)
21815 .n(8)
21816 .k(4)
21817 .qmax(128)
21818 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21819 }
21820
21821 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm) {
21822 GemmMicrokernelTester()
21823 .mr(1)
21824 .nr(8)
21825 .kr(1)
21826 .sr(1)
21827 .m(1)
21828 .n(8)
21829 .k(4)
21830 .cm_stride(11)
21831 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21832 }
21833#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21834
21835
21836#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21837 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
21838 GemmMicrokernelTester()
21839 .mr(6)
21840 .nr(8)
21841 .kr(1)
21842 .sr(1)
21843 .m(6)
21844 .n(8)
21845 .k(4)
21846 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21847 }
21848
21849 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cn) {
21850 GemmMicrokernelTester()
21851 .mr(6)
21852 .nr(8)
21853 .kr(1)
21854 .sr(1)
21855 .m(6)
21856 .n(8)
21857 .k(4)
21858 .cn_stride(11)
21859 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21860 }
21861
21862 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
21863 GemmMicrokernelTester()
21864 .mr(6)
21865 .nr(8)
21866 .kr(1)
21867 .sr(1)
21868 .m(6)
21869 .n(8)
21870 .k(4)
21871 .a_stride(7)
21872 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21873 }
21874
21875 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021876 for (uint32_t n = 1; n <= 8; n++) {
21877 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021878 GemmMicrokernelTester()
21879 .mr(6)
21880 .nr(8)
21881 .kr(1)
21882 .sr(1)
21883 .m(m)
21884 .n(n)
21885 .k(4)
21886 .iterations(1)
21887 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21888 }
21889 }
21890 }
21891
21892 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
21893 for (uint32_t m = 1; m <= 6; m++) {
21894 GemmMicrokernelTester()
21895 .mr(6)
21896 .nr(8)
21897 .kr(1)
21898 .sr(1)
21899 .m(m)
21900 .n(8)
21901 .k(4)
21902 .iterations(1)
21903 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21904 }
21905 }
21906
21907 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
21908 for (uint32_t n = 1; n <= 8; n++) {
21909 GemmMicrokernelTester()
21910 .mr(6)
21911 .nr(8)
21912 .kr(1)
21913 .sr(1)
21914 .m(6)
21915 .n(n)
21916 .k(4)
21917 .iterations(1)
21918 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21919 }
21920 }
21921
21922 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
21923 for (size_t k = 1; k < 4; k++) {
21924 GemmMicrokernelTester()
21925 .mr(6)
21926 .nr(8)
21927 .kr(1)
21928 .sr(1)
21929 .m(6)
21930 .n(8)
21931 .k(k)
21932 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21933 }
21934 }
21935
21936 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
21937 for (size_t k = 1; k < 4; k++) {
21938 GemmMicrokernelTester()
21939 .mr(6)
21940 .nr(8)
21941 .kr(1)
21942 .sr(1)
21943 .m(6)
21944 .n(8)
21945 .k(k)
21946 .a_stride(7)
21947 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21948 }
21949 }
21950
21951 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
21952 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021953 for (uint32_t n = 1; n <= 8; n++) {
21954 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021955 GemmMicrokernelTester()
21956 .mr(6)
21957 .nr(8)
21958 .kr(1)
21959 .sr(1)
21960 .m(m)
21961 .n(n)
21962 .k(k)
21963 .iterations(1)
21964 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21965 }
21966 }
21967 }
21968 }
21969
21970 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
21971 for (size_t k = 5; k < 8; k++) {
21972 GemmMicrokernelTester()
21973 .mr(6)
21974 .nr(8)
21975 .kr(1)
21976 .sr(1)
21977 .m(6)
21978 .n(8)
21979 .k(k)
21980 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21981 }
21982 }
21983
21984 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
21985 for (size_t k = 5; k < 8; k++) {
21986 GemmMicrokernelTester()
21987 .mr(6)
21988 .nr(8)
21989 .kr(1)
21990 .sr(1)
21991 .m(6)
21992 .n(8)
21993 .k(k)
21994 .a_stride(11)
21995 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
21996 }
21997 }
21998
21999 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
22000 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022001 for (uint32_t n = 1; n <= 8; n++) {
22002 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022003 GemmMicrokernelTester()
22004 .mr(6)
22005 .nr(8)
22006 .kr(1)
22007 .sr(1)
22008 .m(m)
22009 .n(n)
22010 .k(k)
22011 .iterations(1)
22012 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22013 }
22014 }
22015 }
22016 }
22017
22018 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4) {
22019 for (size_t k = 8; k <= 40; k += 4) {
22020 GemmMicrokernelTester()
22021 .mr(6)
22022 .nr(8)
22023 .kr(1)
22024 .sr(1)
22025 .m(6)
22026 .n(8)
22027 .k(k)
22028 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22029 }
22030 }
22031
22032 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
22033 for (size_t k = 8; k <= 40; k += 4) {
22034 GemmMicrokernelTester()
22035 .mr(6)
22036 .nr(8)
22037 .kr(1)
22038 .sr(1)
22039 .m(6)
22040 .n(8)
22041 .k(k)
22042 .a_stride(43)
22043 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22044 }
22045 }
22046
22047 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
22048 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022049 for (uint32_t n = 1; n <= 8; n++) {
22050 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022051 GemmMicrokernelTester()
22052 .mr(6)
22053 .nr(8)
22054 .kr(1)
22055 .sr(1)
22056 .m(m)
22057 .n(n)
22058 .k(k)
22059 .iterations(1)
22060 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22061 }
22062 }
22063 }
22064 }
22065
22066 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
22067 for (uint32_t n = 9; n < 16; n++) {
22068 for (size_t k = 1; k <= 20; k += 5) {
22069 GemmMicrokernelTester()
22070 .mr(6)
22071 .nr(8)
22072 .kr(1)
22073 .sr(1)
22074 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022075 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022076 .k(k)
22077 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22078 }
22079 }
22080 }
22081
22082 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
22083 for (uint32_t n = 9; n < 16; n++) {
22084 for (size_t k = 1; k <= 20; k += 5) {
22085 GemmMicrokernelTester()
22086 .mr(6)
22087 .nr(8)
22088 .kr(1)
22089 .sr(1)
22090 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022091 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022092 .k(k)
22093 .cn_stride(11)
22094 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22095 }
22096 }
22097 }
22098
22099 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
22100 for (uint32_t n = 9; n < 16; n++) {
22101 for (size_t k = 1; k <= 20; k += 5) {
22102 GemmMicrokernelTester()
22103 .mr(6)
22104 .nr(8)
22105 .kr(1)
22106 .sr(1)
22107 .m(6)
22108 .n(n)
22109 .k(k)
22110 .a_stride(23)
22111 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22112 }
22113 }
22114 }
22115
22116 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
22117 for (uint32_t n = 9; n < 16; n++) {
22118 for (size_t k = 1; k <= 20; k += 5) {
22119 for (uint32_t m = 1; m <= 6; m++) {
22120 GemmMicrokernelTester()
22121 .mr(6)
22122 .nr(8)
22123 .kr(1)
22124 .sr(1)
22125 .m(m)
22126 .n(n)
22127 .k(k)
22128 .iterations(1)
22129 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22130 }
22131 }
22132 }
22133 }
22134
22135 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8) {
22136 for (uint32_t n = 16; n <= 24; n += 8) {
22137 for (size_t k = 1; k <= 20; k += 5) {
22138 GemmMicrokernelTester()
22139 .mr(6)
22140 .nr(8)
22141 .kr(1)
22142 .sr(1)
22143 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022144 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022145 .k(k)
22146 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22147 }
22148 }
22149 }
22150
22151 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
22152 for (uint32_t n = 16; n <= 24; n += 8) {
22153 for (size_t k = 1; k <= 20; k += 5) {
22154 GemmMicrokernelTester()
22155 .mr(6)
22156 .nr(8)
22157 .kr(1)
22158 .sr(1)
22159 .m(6)
22160 .n(n)
22161 .k(k)
22162 .cn_stride(11)
22163 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22164 }
22165 }
22166 }
22167
22168 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
22169 for (uint32_t n = 16; n <= 24; n += 8) {
22170 for (size_t k = 1; k <= 20; k += 5) {
22171 GemmMicrokernelTester()
22172 .mr(6)
22173 .nr(8)
22174 .kr(1)
22175 .sr(1)
22176 .m(6)
22177 .n(n)
22178 .k(k)
22179 .a_stride(23)
22180 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22181 }
22182 }
22183 }
22184
22185 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
22186 for (uint32_t n = 16; n <= 24; n += 8) {
22187 for (size_t k = 1; k <= 20; k += 5) {
22188 for (uint32_t m = 1; m <= 6; m++) {
22189 GemmMicrokernelTester()
22190 .mr(6)
22191 .nr(8)
22192 .kr(1)
22193 .sr(1)
22194 .m(m)
22195 .n(n)
22196 .k(k)
22197 .iterations(1)
22198 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22199 }
22200 }
22201 }
22202 }
22203
22204 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
22205 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022206 for (uint32_t n = 1; n <= 8; n++) {
22207 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022208 GemmMicrokernelTester()
22209 .mr(6)
22210 .nr(8)
22211 .kr(1)
22212 .sr(1)
22213 .m(m)
22214 .n(n)
22215 .k(k)
22216 .cm_stride(11)
22217 .iterations(1)
22218 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22219 }
22220 }
22221 }
22222 }
22223
22224 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmin) {
22225 GemmMicrokernelTester()
22226 .mr(6)
22227 .nr(8)
22228 .kr(1)
22229 .sr(1)
22230 .m(6)
22231 .n(8)
22232 .k(4)
22233 .qmin(128)
22234 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22235 }
22236
22237 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmax) {
22238 GemmMicrokernelTester()
22239 .mr(6)
22240 .nr(8)
22241 .kr(1)
22242 .sr(1)
22243 .m(6)
22244 .n(8)
22245 .k(4)
22246 .qmax(128)
22247 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22248 }
22249
22250 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm) {
22251 GemmMicrokernelTester()
22252 .mr(6)
22253 .nr(8)
22254 .kr(1)
22255 .sr(1)
22256 .m(6)
22257 .n(8)
22258 .k(4)
22259 .cm_stride(11)
22260 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22261 }
22262#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22263
22264
22265#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22266 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4) {
22267 GemmMicrokernelTester()
22268 .mr(1)
22269 .nr(8)
22270 .kr(1)
22271 .sr(1)
22272 .m(1)
22273 .n(8)
22274 .k(4)
22275 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22276 }
22277
22278 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cn) {
22279 GemmMicrokernelTester()
22280 .mr(1)
22281 .nr(8)
22282 .kr(1)
22283 .sr(1)
22284 .m(1)
22285 .n(8)
22286 .k(4)
22287 .cn_stride(11)
22288 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22289 }
22290
22291 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
22292 GemmMicrokernelTester()
22293 .mr(1)
22294 .nr(8)
22295 .kr(1)
22296 .sr(1)
22297 .m(1)
22298 .n(8)
22299 .k(4)
22300 .a_stride(7)
22301 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22302 }
22303
22304 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022305 for (uint32_t n = 1; n <= 8; n++) {
22306 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022307 GemmMicrokernelTester()
22308 .mr(1)
22309 .nr(8)
22310 .kr(1)
22311 .sr(1)
22312 .m(m)
22313 .n(n)
22314 .k(4)
22315 .iterations(1)
22316 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22317 }
22318 }
22319 }
22320
22321 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
22322 for (uint32_t m = 1; m <= 1; m++) {
22323 GemmMicrokernelTester()
22324 .mr(1)
22325 .nr(8)
22326 .kr(1)
22327 .sr(1)
22328 .m(m)
22329 .n(8)
22330 .k(4)
22331 .iterations(1)
22332 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22333 }
22334 }
22335
22336 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
22337 for (uint32_t n = 1; n <= 8; n++) {
22338 GemmMicrokernelTester()
22339 .mr(1)
22340 .nr(8)
22341 .kr(1)
22342 .sr(1)
22343 .m(1)
22344 .n(n)
22345 .k(4)
22346 .iterations(1)
22347 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22348 }
22349 }
22350
22351 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4) {
22352 for (size_t k = 1; k < 4; k++) {
22353 GemmMicrokernelTester()
22354 .mr(1)
22355 .nr(8)
22356 .kr(1)
22357 .sr(1)
22358 .m(1)
22359 .n(8)
22360 .k(k)
22361 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22362 }
22363 }
22364
22365 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
22366 for (size_t k = 1; k < 4; k++) {
22367 GemmMicrokernelTester()
22368 .mr(1)
22369 .nr(8)
22370 .kr(1)
22371 .sr(1)
22372 .m(1)
22373 .n(8)
22374 .k(k)
22375 .a_stride(7)
22376 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22377 }
22378 }
22379
22380 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
22381 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022382 for (uint32_t n = 1; n <= 8; n++) {
22383 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022384 GemmMicrokernelTester()
22385 .mr(1)
22386 .nr(8)
22387 .kr(1)
22388 .sr(1)
22389 .m(m)
22390 .n(n)
22391 .k(k)
22392 .iterations(1)
22393 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22394 }
22395 }
22396 }
22397 }
22398
22399 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4) {
22400 for (size_t k = 5; k < 8; k++) {
22401 GemmMicrokernelTester()
22402 .mr(1)
22403 .nr(8)
22404 .kr(1)
22405 .sr(1)
22406 .m(1)
22407 .n(8)
22408 .k(k)
22409 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22410 }
22411 }
22412
22413 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
22414 for (size_t k = 5; k < 8; k++) {
22415 GemmMicrokernelTester()
22416 .mr(1)
22417 .nr(8)
22418 .kr(1)
22419 .sr(1)
22420 .m(1)
22421 .n(8)
22422 .k(k)
22423 .a_stride(11)
22424 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22425 }
22426 }
22427
22428 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
22429 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022430 for (uint32_t n = 1; n <= 8; n++) {
22431 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022432 GemmMicrokernelTester()
22433 .mr(1)
22434 .nr(8)
22435 .kr(1)
22436 .sr(1)
22437 .m(m)
22438 .n(n)
22439 .k(k)
22440 .iterations(1)
22441 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22442 }
22443 }
22444 }
22445 }
22446
22447 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4) {
22448 for (size_t k = 8; k <= 40; k += 4) {
22449 GemmMicrokernelTester()
22450 .mr(1)
22451 .nr(8)
22452 .kr(1)
22453 .sr(1)
22454 .m(1)
22455 .n(8)
22456 .k(k)
22457 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22458 }
22459 }
22460
22461 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
22462 for (size_t k = 8; k <= 40; k += 4) {
22463 GemmMicrokernelTester()
22464 .mr(1)
22465 .nr(8)
22466 .kr(1)
22467 .sr(1)
22468 .m(1)
22469 .n(8)
22470 .k(k)
22471 .a_stride(43)
22472 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22473 }
22474 }
22475
22476 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
22477 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022478 for (uint32_t n = 1; n <= 8; n++) {
22479 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022480 GemmMicrokernelTester()
22481 .mr(1)
22482 .nr(8)
22483 .kr(1)
22484 .sr(1)
22485 .m(m)
22486 .n(n)
22487 .k(k)
22488 .iterations(1)
22489 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22490 }
22491 }
22492 }
22493 }
22494
22495 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8) {
22496 for (uint32_t n = 9; n < 16; n++) {
22497 for (size_t k = 1; k <= 20; k += 5) {
22498 GemmMicrokernelTester()
22499 .mr(1)
22500 .nr(8)
22501 .kr(1)
22502 .sr(1)
22503 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022504 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022505 .k(k)
22506 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22507 }
22508 }
22509 }
22510
22511 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
22512 for (uint32_t n = 9; n < 16; n++) {
22513 for (size_t k = 1; k <= 20; k += 5) {
22514 GemmMicrokernelTester()
22515 .mr(1)
22516 .nr(8)
22517 .kr(1)
22518 .sr(1)
22519 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022520 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022521 .k(k)
22522 .cn_stride(11)
22523 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22524 }
22525 }
22526 }
22527
22528 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
22529 for (uint32_t n = 9; n < 16; n++) {
22530 for (size_t k = 1; k <= 20; k += 5) {
22531 GemmMicrokernelTester()
22532 .mr(1)
22533 .nr(8)
22534 .kr(1)
22535 .sr(1)
22536 .m(1)
22537 .n(n)
22538 .k(k)
22539 .a_stride(23)
22540 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22541 }
22542 }
22543 }
22544
22545 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
22546 for (uint32_t n = 9; n < 16; n++) {
22547 for (size_t k = 1; k <= 20; k += 5) {
22548 for (uint32_t m = 1; m <= 1; m++) {
22549 GemmMicrokernelTester()
22550 .mr(1)
22551 .nr(8)
22552 .kr(1)
22553 .sr(1)
22554 .m(m)
22555 .n(n)
22556 .k(k)
22557 .iterations(1)
22558 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22559 }
22560 }
22561 }
22562 }
22563
22564 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8) {
22565 for (uint32_t n = 16; n <= 24; n += 8) {
22566 for (size_t k = 1; k <= 20; k += 5) {
22567 GemmMicrokernelTester()
22568 .mr(1)
22569 .nr(8)
22570 .kr(1)
22571 .sr(1)
22572 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022573 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022574 .k(k)
22575 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22576 }
22577 }
22578 }
22579
22580 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
22581 for (uint32_t n = 16; n <= 24; n += 8) {
22582 for (size_t k = 1; k <= 20; k += 5) {
22583 GemmMicrokernelTester()
22584 .mr(1)
22585 .nr(8)
22586 .kr(1)
22587 .sr(1)
22588 .m(1)
22589 .n(n)
22590 .k(k)
22591 .cn_stride(11)
22592 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22593 }
22594 }
22595 }
22596
22597 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
22598 for (uint32_t n = 16; n <= 24; n += 8) {
22599 for (size_t k = 1; k <= 20; k += 5) {
22600 GemmMicrokernelTester()
22601 .mr(1)
22602 .nr(8)
22603 .kr(1)
22604 .sr(1)
22605 .m(1)
22606 .n(n)
22607 .k(k)
22608 .a_stride(23)
22609 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22610 }
22611 }
22612 }
22613
22614 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
22615 for (uint32_t n = 16; n <= 24; n += 8) {
22616 for (size_t k = 1; k <= 20; k += 5) {
22617 for (uint32_t m = 1; m <= 1; m++) {
22618 GemmMicrokernelTester()
22619 .mr(1)
22620 .nr(8)
22621 .kr(1)
22622 .sr(1)
22623 .m(m)
22624 .n(n)
22625 .k(k)
22626 .iterations(1)
22627 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22628 }
22629 }
22630 }
22631 }
22632
22633 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
22634 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022635 for (uint32_t n = 1; n <= 8; n++) {
22636 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022637 GemmMicrokernelTester()
22638 .mr(1)
22639 .nr(8)
22640 .kr(1)
22641 .sr(1)
22642 .m(m)
22643 .n(n)
22644 .k(k)
22645 .cm_stride(11)
22646 .iterations(1)
22647 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22648 }
22649 }
22650 }
22651 }
22652
22653 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmin) {
22654 GemmMicrokernelTester()
22655 .mr(1)
22656 .nr(8)
22657 .kr(1)
22658 .sr(1)
22659 .m(1)
22660 .n(8)
22661 .k(4)
22662 .qmin(128)
22663 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22664 }
22665
22666 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmax) {
22667 GemmMicrokernelTester()
22668 .mr(1)
22669 .nr(8)
22670 .kr(1)
22671 .sr(1)
22672 .m(1)
22673 .n(8)
22674 .k(4)
22675 .qmax(128)
22676 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22677 }
22678
22679 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm) {
22680 GemmMicrokernelTester()
22681 .mr(1)
22682 .nr(8)
22683 .kr(1)
22684 .sr(1)
22685 .m(1)
22686 .n(8)
22687 .k(4)
22688 .cm_stride(11)
22689 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22690 }
22691#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22692
22693
22694#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22695 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4) {
22696 GemmMicrokernelTester()
22697 .mr(6)
22698 .nr(8)
22699 .kr(1)
22700 .sr(1)
22701 .m(6)
22702 .n(8)
22703 .k(4)
22704 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22705 }
22706
22707 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cn) {
22708 GemmMicrokernelTester()
22709 .mr(6)
22710 .nr(8)
22711 .kr(1)
22712 .sr(1)
22713 .m(6)
22714 .n(8)
22715 .k(4)
22716 .cn_stride(11)
22717 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22718 }
22719
22720 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
22721 GemmMicrokernelTester()
22722 .mr(6)
22723 .nr(8)
22724 .kr(1)
22725 .sr(1)
22726 .m(6)
22727 .n(8)
22728 .k(4)
22729 .a_stride(7)
22730 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22731 }
22732
22733 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022734 for (uint32_t n = 1; n <= 8; n++) {
22735 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022736 GemmMicrokernelTester()
22737 .mr(6)
22738 .nr(8)
22739 .kr(1)
22740 .sr(1)
22741 .m(m)
22742 .n(n)
22743 .k(4)
22744 .iterations(1)
22745 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22746 }
22747 }
22748 }
22749
22750 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
22751 for (uint32_t m = 1; m <= 6; m++) {
22752 GemmMicrokernelTester()
22753 .mr(6)
22754 .nr(8)
22755 .kr(1)
22756 .sr(1)
22757 .m(m)
22758 .n(8)
22759 .k(4)
22760 .iterations(1)
22761 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22762 }
22763 }
22764
22765 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
22766 for (uint32_t n = 1; n <= 8; n++) {
22767 GemmMicrokernelTester()
22768 .mr(6)
22769 .nr(8)
22770 .kr(1)
22771 .sr(1)
22772 .m(6)
22773 .n(n)
22774 .k(4)
22775 .iterations(1)
22776 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22777 }
22778 }
22779
22780 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4) {
22781 for (size_t k = 1; k < 4; k++) {
22782 GemmMicrokernelTester()
22783 .mr(6)
22784 .nr(8)
22785 .kr(1)
22786 .sr(1)
22787 .m(6)
22788 .n(8)
22789 .k(k)
22790 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22791 }
22792 }
22793
22794 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
22795 for (size_t k = 1; k < 4; k++) {
22796 GemmMicrokernelTester()
22797 .mr(6)
22798 .nr(8)
22799 .kr(1)
22800 .sr(1)
22801 .m(6)
22802 .n(8)
22803 .k(k)
22804 .a_stride(7)
22805 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22806 }
22807 }
22808
22809 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
22810 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022811 for (uint32_t n = 1; n <= 8; n++) {
22812 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022813 GemmMicrokernelTester()
22814 .mr(6)
22815 .nr(8)
22816 .kr(1)
22817 .sr(1)
22818 .m(m)
22819 .n(n)
22820 .k(k)
22821 .iterations(1)
22822 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22823 }
22824 }
22825 }
22826 }
22827
22828 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4) {
22829 for (size_t k = 5; k < 8; k++) {
22830 GemmMicrokernelTester()
22831 .mr(6)
22832 .nr(8)
22833 .kr(1)
22834 .sr(1)
22835 .m(6)
22836 .n(8)
22837 .k(k)
22838 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22839 }
22840 }
22841
22842 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
22843 for (size_t k = 5; k < 8; k++) {
22844 GemmMicrokernelTester()
22845 .mr(6)
22846 .nr(8)
22847 .kr(1)
22848 .sr(1)
22849 .m(6)
22850 .n(8)
22851 .k(k)
22852 .a_stride(11)
22853 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22854 }
22855 }
22856
22857 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
22858 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022859 for (uint32_t n = 1; n <= 8; n++) {
22860 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022861 GemmMicrokernelTester()
22862 .mr(6)
22863 .nr(8)
22864 .kr(1)
22865 .sr(1)
22866 .m(m)
22867 .n(n)
22868 .k(k)
22869 .iterations(1)
22870 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22871 }
22872 }
22873 }
22874 }
22875
22876 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4) {
22877 for (size_t k = 8; k <= 40; k += 4) {
22878 GemmMicrokernelTester()
22879 .mr(6)
22880 .nr(8)
22881 .kr(1)
22882 .sr(1)
22883 .m(6)
22884 .n(8)
22885 .k(k)
22886 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22887 }
22888 }
22889
22890 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
22891 for (size_t k = 8; k <= 40; k += 4) {
22892 GemmMicrokernelTester()
22893 .mr(6)
22894 .nr(8)
22895 .kr(1)
22896 .sr(1)
22897 .m(6)
22898 .n(8)
22899 .k(k)
22900 .a_stride(43)
22901 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22902 }
22903 }
22904
22905 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
22906 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022907 for (uint32_t n = 1; n <= 8; n++) {
22908 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022909 GemmMicrokernelTester()
22910 .mr(6)
22911 .nr(8)
22912 .kr(1)
22913 .sr(1)
22914 .m(m)
22915 .n(n)
22916 .k(k)
22917 .iterations(1)
22918 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22919 }
22920 }
22921 }
22922 }
22923
22924 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8) {
22925 for (uint32_t n = 9; n < 16; n++) {
22926 for (size_t k = 1; k <= 20; k += 5) {
22927 GemmMicrokernelTester()
22928 .mr(6)
22929 .nr(8)
22930 .kr(1)
22931 .sr(1)
22932 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022933 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022934 .k(k)
22935 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22936 }
22937 }
22938 }
22939
22940 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
22941 for (uint32_t n = 9; n < 16; n++) {
22942 for (size_t k = 1; k <= 20; k += 5) {
22943 GemmMicrokernelTester()
22944 .mr(6)
22945 .nr(8)
22946 .kr(1)
22947 .sr(1)
22948 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022949 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022950 .k(k)
22951 .cn_stride(11)
22952 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22953 }
22954 }
22955 }
22956
22957 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
22958 for (uint32_t n = 9; n < 16; n++) {
22959 for (size_t k = 1; k <= 20; k += 5) {
22960 GemmMicrokernelTester()
22961 .mr(6)
22962 .nr(8)
22963 .kr(1)
22964 .sr(1)
22965 .m(6)
22966 .n(n)
22967 .k(k)
22968 .a_stride(23)
22969 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22970 }
22971 }
22972 }
22973
22974 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
22975 for (uint32_t n = 9; n < 16; n++) {
22976 for (size_t k = 1; k <= 20; k += 5) {
22977 for (uint32_t m = 1; m <= 6; m++) {
22978 GemmMicrokernelTester()
22979 .mr(6)
22980 .nr(8)
22981 .kr(1)
22982 .sr(1)
22983 .m(m)
22984 .n(n)
22985 .k(k)
22986 .iterations(1)
22987 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
22988 }
22989 }
22990 }
22991 }
22992
22993 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8) {
22994 for (uint32_t n = 16; n <= 24; n += 8) {
22995 for (size_t k = 1; k <= 20; k += 5) {
22996 GemmMicrokernelTester()
22997 .mr(6)
22998 .nr(8)
22999 .kr(1)
23000 .sr(1)
23001 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023002 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023003 .k(k)
23004 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23005 }
23006 }
23007 }
23008
23009 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
23010 for (uint32_t n = 16; n <= 24; n += 8) {
23011 for (size_t k = 1; k <= 20; k += 5) {
23012 GemmMicrokernelTester()
23013 .mr(6)
23014 .nr(8)
23015 .kr(1)
23016 .sr(1)
23017 .m(6)
23018 .n(n)
23019 .k(k)
23020 .cn_stride(11)
23021 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23022 }
23023 }
23024 }
23025
23026 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
23027 for (uint32_t n = 16; n <= 24; n += 8) {
23028 for (size_t k = 1; k <= 20; k += 5) {
23029 GemmMicrokernelTester()
23030 .mr(6)
23031 .nr(8)
23032 .kr(1)
23033 .sr(1)
23034 .m(6)
23035 .n(n)
23036 .k(k)
23037 .a_stride(23)
23038 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23039 }
23040 }
23041 }
23042
23043 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
23044 for (uint32_t n = 16; n <= 24; n += 8) {
23045 for (size_t k = 1; k <= 20; k += 5) {
23046 for (uint32_t m = 1; m <= 6; m++) {
23047 GemmMicrokernelTester()
23048 .mr(6)
23049 .nr(8)
23050 .kr(1)
23051 .sr(1)
23052 .m(m)
23053 .n(n)
23054 .k(k)
23055 .iterations(1)
23056 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23057 }
23058 }
23059 }
23060 }
23061
23062 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
23063 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023064 for (uint32_t n = 1; n <= 8; n++) {
23065 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023066 GemmMicrokernelTester()
23067 .mr(6)
23068 .nr(8)
23069 .kr(1)
23070 .sr(1)
23071 .m(m)
23072 .n(n)
23073 .k(k)
23074 .cm_stride(11)
23075 .iterations(1)
23076 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23077 }
23078 }
23079 }
23080 }
23081
23082 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmin) {
23083 GemmMicrokernelTester()
23084 .mr(6)
23085 .nr(8)
23086 .kr(1)
23087 .sr(1)
23088 .m(6)
23089 .n(8)
23090 .k(4)
23091 .qmin(128)
23092 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23093 }
23094
23095 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmax) {
23096 GemmMicrokernelTester()
23097 .mr(6)
23098 .nr(8)
23099 .kr(1)
23100 .sr(1)
23101 .m(6)
23102 .n(8)
23103 .k(4)
23104 .qmax(128)
23105 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23106 }
23107
23108 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm) {
23109 GemmMicrokernelTester()
23110 .mr(6)
23111 .nr(8)
23112 .kr(1)
23113 .sr(1)
23114 .m(6)
23115 .n(8)
23116 .k(4)
23117 .cm_stride(11)
23118 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23119 }
23120#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23121
23122
23123#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23124 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4) {
23125 GemmMicrokernelTester()
23126 .mr(3)
23127 .nr(8)
23128 .kr(1)
23129 .sr(4)
23130 .m(3)
23131 .n(8)
23132 .k(4)
23133 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23134 }
23135
23136 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, strided_cn) {
23137 GemmMicrokernelTester()
23138 .mr(3)
23139 .nr(8)
23140 .kr(1)
23141 .sr(4)
23142 .m(3)
23143 .n(8)
23144 .k(4)
23145 .cn_stride(11)
23146 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23147 }
23148
23149 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
23150 GemmMicrokernelTester()
23151 .mr(3)
23152 .nr(8)
23153 .kr(1)
23154 .sr(4)
23155 .m(3)
23156 .n(8)
23157 .k(4)
23158 .a_stride(7)
23159 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23160 }
23161
23162 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023163 for (uint32_t n = 1; n <= 8; n++) {
23164 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023165 GemmMicrokernelTester()
23166 .mr(3)
23167 .nr(8)
23168 .kr(1)
23169 .sr(4)
23170 .m(m)
23171 .n(n)
23172 .k(4)
23173 .iterations(1)
23174 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23175 }
23176 }
23177 }
23178
23179 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
23180 for (uint32_t m = 1; m <= 3; m++) {
23181 GemmMicrokernelTester()
23182 .mr(3)
23183 .nr(8)
23184 .kr(1)
23185 .sr(4)
23186 .m(m)
23187 .n(8)
23188 .k(4)
23189 .iterations(1)
23190 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23191 }
23192 }
23193
23194 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
23195 for (uint32_t n = 1; n <= 8; n++) {
23196 GemmMicrokernelTester()
23197 .mr(3)
23198 .nr(8)
23199 .kr(1)
23200 .sr(4)
23201 .m(3)
23202 .n(n)
23203 .k(4)
23204 .iterations(1)
23205 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23206 }
23207 }
23208
23209 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4) {
23210 for (size_t k = 1; k < 4; k++) {
23211 GemmMicrokernelTester()
23212 .mr(3)
23213 .nr(8)
23214 .kr(1)
23215 .sr(4)
23216 .m(3)
23217 .n(8)
23218 .k(k)
23219 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23220 }
23221 }
23222
23223 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
23224 for (size_t k = 1; k < 4; k++) {
23225 GemmMicrokernelTester()
23226 .mr(3)
23227 .nr(8)
23228 .kr(1)
23229 .sr(4)
23230 .m(3)
23231 .n(8)
23232 .k(k)
23233 .a_stride(7)
23234 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23235 }
23236 }
23237
23238 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
23239 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023240 for (uint32_t n = 1; n <= 8; n++) {
23241 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023242 GemmMicrokernelTester()
23243 .mr(3)
23244 .nr(8)
23245 .kr(1)
23246 .sr(4)
23247 .m(m)
23248 .n(n)
23249 .k(k)
23250 .iterations(1)
23251 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23252 }
23253 }
23254 }
23255 }
23256
23257 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4) {
23258 for (size_t k = 5; k < 8; k++) {
23259 GemmMicrokernelTester()
23260 .mr(3)
23261 .nr(8)
23262 .kr(1)
23263 .sr(4)
23264 .m(3)
23265 .n(8)
23266 .k(k)
23267 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23268 }
23269 }
23270
23271 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
23272 for (size_t k = 5; k < 8; k++) {
23273 GemmMicrokernelTester()
23274 .mr(3)
23275 .nr(8)
23276 .kr(1)
23277 .sr(4)
23278 .m(3)
23279 .n(8)
23280 .k(k)
23281 .a_stride(11)
23282 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23283 }
23284 }
23285
23286 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
23287 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023288 for (uint32_t n = 1; n <= 8; n++) {
23289 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023290 GemmMicrokernelTester()
23291 .mr(3)
23292 .nr(8)
23293 .kr(1)
23294 .sr(4)
23295 .m(m)
23296 .n(n)
23297 .k(k)
23298 .iterations(1)
23299 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23300 }
23301 }
23302 }
23303 }
23304
23305 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4) {
23306 for (size_t k = 8; k <= 40; k += 4) {
23307 GemmMicrokernelTester()
23308 .mr(3)
23309 .nr(8)
23310 .kr(1)
23311 .sr(4)
23312 .m(3)
23313 .n(8)
23314 .k(k)
23315 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23316 }
23317 }
23318
23319 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
23320 for (size_t k = 8; k <= 40; k += 4) {
23321 GemmMicrokernelTester()
23322 .mr(3)
23323 .nr(8)
23324 .kr(1)
23325 .sr(4)
23326 .m(3)
23327 .n(8)
23328 .k(k)
23329 .a_stride(43)
23330 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23331 }
23332 }
23333
23334 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4_subtile) {
23335 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023336 for (uint32_t n = 1; n <= 8; n++) {
23337 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023338 GemmMicrokernelTester()
23339 .mr(3)
23340 .nr(8)
23341 .kr(1)
23342 .sr(4)
23343 .m(m)
23344 .n(n)
23345 .k(k)
23346 .iterations(1)
23347 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23348 }
23349 }
23350 }
23351 }
23352
23353 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8) {
23354 for (uint32_t n = 9; n < 16; n++) {
23355 for (size_t k = 1; k <= 20; k += 5) {
23356 GemmMicrokernelTester()
23357 .mr(3)
23358 .nr(8)
23359 .kr(1)
23360 .sr(4)
23361 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023362 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023363 .k(k)
23364 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23365 }
23366 }
23367 }
23368
23369 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
23370 for (uint32_t n = 9; n < 16; n++) {
23371 for (size_t k = 1; k <= 20; k += 5) {
23372 GemmMicrokernelTester()
23373 .mr(3)
23374 .nr(8)
23375 .kr(1)
23376 .sr(4)
23377 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023378 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023379 .k(k)
23380 .cn_stride(11)
23381 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23382 }
23383 }
23384 }
23385
23386 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
23387 for (uint32_t n = 9; n < 16; n++) {
23388 for (size_t k = 1; k <= 20; k += 5) {
23389 GemmMicrokernelTester()
23390 .mr(3)
23391 .nr(8)
23392 .kr(1)
23393 .sr(4)
23394 .m(3)
23395 .n(n)
23396 .k(k)
23397 .a_stride(23)
23398 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23399 }
23400 }
23401 }
23402
23403 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
23404 for (uint32_t n = 9; n < 16; n++) {
23405 for (size_t k = 1; k <= 20; k += 5) {
23406 for (uint32_t m = 1; m <= 3; m++) {
23407 GemmMicrokernelTester()
23408 .mr(3)
23409 .nr(8)
23410 .kr(1)
23411 .sr(4)
23412 .m(m)
23413 .n(n)
23414 .k(k)
23415 .iterations(1)
23416 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23417 }
23418 }
23419 }
23420 }
23421
23422 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8) {
23423 for (uint32_t n = 16; n <= 24; n += 8) {
23424 for (size_t k = 1; k <= 20; k += 5) {
23425 GemmMicrokernelTester()
23426 .mr(3)
23427 .nr(8)
23428 .kr(1)
23429 .sr(4)
23430 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023431 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023432 .k(k)
23433 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23434 }
23435 }
23436 }
23437
23438 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
23439 for (uint32_t n = 16; n <= 24; n += 8) {
23440 for (size_t k = 1; k <= 20; k += 5) {
23441 GemmMicrokernelTester()
23442 .mr(3)
23443 .nr(8)
23444 .kr(1)
23445 .sr(4)
23446 .m(3)
23447 .n(n)
23448 .k(k)
23449 .cn_stride(11)
23450 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23451 }
23452 }
23453 }
23454
23455 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
23456 for (uint32_t n = 16; n <= 24; n += 8) {
23457 for (size_t k = 1; k <= 20; k += 5) {
23458 GemmMicrokernelTester()
23459 .mr(3)
23460 .nr(8)
23461 .kr(1)
23462 .sr(4)
23463 .m(3)
23464 .n(n)
23465 .k(k)
23466 .a_stride(23)
23467 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23468 }
23469 }
23470 }
23471
23472 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_subtile) {
23473 for (uint32_t n = 16; n <= 24; n += 8) {
23474 for (size_t k = 1; k <= 20; k += 5) {
23475 for (uint32_t m = 1; m <= 3; m++) {
23476 GemmMicrokernelTester()
23477 .mr(3)
23478 .nr(8)
23479 .kr(1)
23480 .sr(4)
23481 .m(m)
23482 .n(n)
23483 .k(k)
23484 .iterations(1)
23485 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23486 }
23487 }
23488 }
23489 }
23490
23491 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm_subtile) {
23492 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023493 for (uint32_t n = 1; n <= 8; n++) {
23494 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023495 GemmMicrokernelTester()
23496 .mr(3)
23497 .nr(8)
23498 .kr(1)
23499 .sr(4)
23500 .m(m)
23501 .n(n)
23502 .k(k)
23503 .cm_stride(11)
23504 .iterations(1)
23505 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23506 }
23507 }
23508 }
23509 }
23510
23511 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, qmin) {
23512 GemmMicrokernelTester()
23513 .mr(3)
23514 .nr(8)
23515 .kr(1)
23516 .sr(4)
23517 .m(3)
23518 .n(8)
23519 .k(4)
23520 .qmin(128)
23521 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23522 }
23523
23524 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, qmax) {
23525 GemmMicrokernelTester()
23526 .mr(3)
23527 .nr(8)
23528 .kr(1)
23529 .sr(4)
23530 .m(3)
23531 .n(8)
23532 .k(4)
23533 .qmax(128)
23534 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23535 }
23536
23537 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm) {
23538 GemmMicrokernelTester()
23539 .mr(3)
23540 .nr(8)
23541 .kr(1)
23542 .sr(4)
23543 .m(3)
23544 .n(8)
23545 .k(4)
23546 .cm_stride(11)
23547 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23548 }
23549#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23550
23551
23552#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23553 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4) {
23554 GemmMicrokernelTester()
23555 .mr(6)
23556 .nr(8)
23557 .kr(1)
23558 .sr(4)
23559 .m(6)
23560 .n(8)
23561 .k(4)
23562 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23563 }
23564
23565 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, strided_cn) {
23566 GemmMicrokernelTester()
23567 .mr(6)
23568 .nr(8)
23569 .kr(1)
23570 .sr(4)
23571 .m(6)
23572 .n(8)
23573 .k(4)
23574 .cn_stride(11)
23575 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23576 }
23577
23578 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
23579 GemmMicrokernelTester()
23580 .mr(6)
23581 .nr(8)
23582 .kr(1)
23583 .sr(4)
23584 .m(6)
23585 .n(8)
23586 .k(4)
23587 .a_stride(7)
23588 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23589 }
23590
23591 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023592 for (uint32_t n = 1; n <= 8; n++) {
23593 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023594 GemmMicrokernelTester()
23595 .mr(6)
23596 .nr(8)
23597 .kr(1)
23598 .sr(4)
23599 .m(m)
23600 .n(n)
23601 .k(4)
23602 .iterations(1)
23603 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23604 }
23605 }
23606 }
23607
23608 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
23609 for (uint32_t m = 1; m <= 6; m++) {
23610 GemmMicrokernelTester()
23611 .mr(6)
23612 .nr(8)
23613 .kr(1)
23614 .sr(4)
23615 .m(m)
23616 .n(8)
23617 .k(4)
23618 .iterations(1)
23619 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23620 }
23621 }
23622
23623 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
23624 for (uint32_t n = 1; n <= 8; n++) {
23625 GemmMicrokernelTester()
23626 .mr(6)
23627 .nr(8)
23628 .kr(1)
23629 .sr(4)
23630 .m(6)
23631 .n(n)
23632 .k(4)
23633 .iterations(1)
23634 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23635 }
23636 }
23637
23638 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4) {
23639 for (size_t k = 1; k < 4; k++) {
23640 GemmMicrokernelTester()
23641 .mr(6)
23642 .nr(8)
23643 .kr(1)
23644 .sr(4)
23645 .m(6)
23646 .n(8)
23647 .k(k)
23648 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23649 }
23650 }
23651
23652 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
23653 for (size_t k = 1; k < 4; k++) {
23654 GemmMicrokernelTester()
23655 .mr(6)
23656 .nr(8)
23657 .kr(1)
23658 .sr(4)
23659 .m(6)
23660 .n(8)
23661 .k(k)
23662 .a_stride(7)
23663 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23664 }
23665 }
23666
23667 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
23668 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023669 for (uint32_t n = 1; n <= 8; n++) {
23670 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023671 GemmMicrokernelTester()
23672 .mr(6)
23673 .nr(8)
23674 .kr(1)
23675 .sr(4)
23676 .m(m)
23677 .n(n)
23678 .k(k)
23679 .iterations(1)
23680 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23681 }
23682 }
23683 }
23684 }
23685
23686 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4) {
23687 for (size_t k = 5; k < 8; k++) {
23688 GemmMicrokernelTester()
23689 .mr(6)
23690 .nr(8)
23691 .kr(1)
23692 .sr(4)
23693 .m(6)
23694 .n(8)
23695 .k(k)
23696 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23697 }
23698 }
23699
23700 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
23701 for (size_t k = 5; k < 8; k++) {
23702 GemmMicrokernelTester()
23703 .mr(6)
23704 .nr(8)
23705 .kr(1)
23706 .sr(4)
23707 .m(6)
23708 .n(8)
23709 .k(k)
23710 .a_stride(11)
23711 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23712 }
23713 }
23714
23715 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
23716 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023717 for (uint32_t n = 1; n <= 8; n++) {
23718 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023719 GemmMicrokernelTester()
23720 .mr(6)
23721 .nr(8)
23722 .kr(1)
23723 .sr(4)
23724 .m(m)
23725 .n(n)
23726 .k(k)
23727 .iterations(1)
23728 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23729 }
23730 }
23731 }
23732 }
23733
23734 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4) {
23735 for (size_t k = 8; k <= 40; k += 4) {
23736 GemmMicrokernelTester()
23737 .mr(6)
23738 .nr(8)
23739 .kr(1)
23740 .sr(4)
23741 .m(6)
23742 .n(8)
23743 .k(k)
23744 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23745 }
23746 }
23747
23748 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
23749 for (size_t k = 8; k <= 40; k += 4) {
23750 GemmMicrokernelTester()
23751 .mr(6)
23752 .nr(8)
23753 .kr(1)
23754 .sr(4)
23755 .m(6)
23756 .n(8)
23757 .k(k)
23758 .a_stride(43)
23759 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23760 }
23761 }
23762
23763 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4_subtile) {
23764 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023765 for (uint32_t n = 1; n <= 8; n++) {
23766 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023767 GemmMicrokernelTester()
23768 .mr(6)
23769 .nr(8)
23770 .kr(1)
23771 .sr(4)
23772 .m(m)
23773 .n(n)
23774 .k(k)
23775 .iterations(1)
23776 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23777 }
23778 }
23779 }
23780 }
23781
23782 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8) {
23783 for (uint32_t n = 9; n < 16; n++) {
23784 for (size_t k = 1; k <= 20; k += 5) {
23785 GemmMicrokernelTester()
23786 .mr(6)
23787 .nr(8)
23788 .kr(1)
23789 .sr(4)
23790 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023791 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023792 .k(k)
23793 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23794 }
23795 }
23796 }
23797
23798 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
23799 for (uint32_t n = 9; n < 16; n++) {
23800 for (size_t k = 1; k <= 20; k += 5) {
23801 GemmMicrokernelTester()
23802 .mr(6)
23803 .nr(8)
23804 .kr(1)
23805 .sr(4)
23806 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023807 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023808 .k(k)
23809 .cn_stride(11)
23810 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23811 }
23812 }
23813 }
23814
23815 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
23816 for (uint32_t n = 9; n < 16; n++) {
23817 for (size_t k = 1; k <= 20; k += 5) {
23818 GemmMicrokernelTester()
23819 .mr(6)
23820 .nr(8)
23821 .kr(1)
23822 .sr(4)
23823 .m(6)
23824 .n(n)
23825 .k(k)
23826 .a_stride(23)
23827 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23828 }
23829 }
23830 }
23831
23832 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
23833 for (uint32_t n = 9; n < 16; n++) {
23834 for (size_t k = 1; k <= 20; k += 5) {
23835 for (uint32_t m = 1; m <= 6; m++) {
23836 GemmMicrokernelTester()
23837 .mr(6)
23838 .nr(8)
23839 .kr(1)
23840 .sr(4)
23841 .m(m)
23842 .n(n)
23843 .k(k)
23844 .iterations(1)
23845 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23846 }
23847 }
23848 }
23849 }
23850
23851 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8) {
23852 for (uint32_t n = 16; n <= 24; n += 8) {
23853 for (size_t k = 1; k <= 20; k += 5) {
23854 GemmMicrokernelTester()
23855 .mr(6)
23856 .nr(8)
23857 .kr(1)
23858 .sr(4)
23859 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023860 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023861 .k(k)
23862 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23863 }
23864 }
23865 }
23866
23867 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
23868 for (uint32_t n = 16; n <= 24; n += 8) {
23869 for (size_t k = 1; k <= 20; k += 5) {
23870 GemmMicrokernelTester()
23871 .mr(6)
23872 .nr(8)
23873 .kr(1)
23874 .sr(4)
23875 .m(6)
23876 .n(n)
23877 .k(k)
23878 .cn_stride(11)
23879 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23880 }
23881 }
23882 }
23883
23884 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
23885 for (uint32_t n = 16; n <= 24; n += 8) {
23886 for (size_t k = 1; k <= 20; k += 5) {
23887 GemmMicrokernelTester()
23888 .mr(6)
23889 .nr(8)
23890 .kr(1)
23891 .sr(4)
23892 .m(6)
23893 .n(n)
23894 .k(k)
23895 .a_stride(23)
23896 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23897 }
23898 }
23899 }
23900
23901 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_subtile) {
23902 for (uint32_t n = 16; n <= 24; n += 8) {
23903 for (size_t k = 1; k <= 20; k += 5) {
23904 for (uint32_t m = 1; m <= 6; m++) {
23905 GemmMicrokernelTester()
23906 .mr(6)
23907 .nr(8)
23908 .kr(1)
23909 .sr(4)
23910 .m(m)
23911 .n(n)
23912 .k(k)
23913 .iterations(1)
23914 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23915 }
23916 }
23917 }
23918 }
23919
23920 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm_subtile) {
23921 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023922 for (uint32_t n = 1; n <= 8; n++) {
23923 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023924 GemmMicrokernelTester()
23925 .mr(6)
23926 .nr(8)
23927 .kr(1)
23928 .sr(4)
23929 .m(m)
23930 .n(n)
23931 .k(k)
23932 .cm_stride(11)
23933 .iterations(1)
23934 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23935 }
23936 }
23937 }
23938 }
23939
23940 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, qmin) {
23941 GemmMicrokernelTester()
23942 .mr(6)
23943 .nr(8)
23944 .kr(1)
23945 .sr(4)
23946 .m(6)
23947 .n(8)
23948 .k(4)
23949 .qmin(128)
23950 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23951 }
23952
23953 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, qmax) {
23954 GemmMicrokernelTester()
23955 .mr(6)
23956 .nr(8)
23957 .kr(1)
23958 .sr(4)
23959 .m(6)
23960 .n(8)
23961 .k(4)
23962 .qmax(128)
23963 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23964 }
23965
23966 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm) {
23967 GemmMicrokernelTester()
23968 .mr(6)
23969 .nr(8)
23970 .kr(1)
23971 .sr(4)
23972 .m(6)
23973 .n(8)
23974 .k(4)
23975 .cm_stride(11)
23976 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
23977 }
23978#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23979
23980
23981#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23982 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4) {
23983 GemmMicrokernelTester()
23984 .mr(3)
23985 .nr(8)
23986 .kr(1)
23987 .sr(4)
23988 .m(3)
23989 .n(8)
23990 .k(4)
23991 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
23992 }
23993
23994 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, strided_cn) {
23995 GemmMicrokernelTester()
23996 .mr(3)
23997 .nr(8)
23998 .kr(1)
23999 .sr(4)
24000 .m(3)
24001 .n(8)
24002 .k(4)
24003 .cn_stride(11)
24004 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24005 }
24006
24007 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
24008 GemmMicrokernelTester()
24009 .mr(3)
24010 .nr(8)
24011 .kr(1)
24012 .sr(4)
24013 .m(3)
24014 .n(8)
24015 .k(4)
24016 .a_stride(7)
24017 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24018 }
24019
24020 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024021 for (uint32_t n = 1; n <= 8; n++) {
24022 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024023 GemmMicrokernelTester()
24024 .mr(3)
24025 .nr(8)
24026 .kr(1)
24027 .sr(4)
24028 .m(m)
24029 .n(n)
24030 .k(4)
24031 .iterations(1)
24032 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24033 }
24034 }
24035 }
24036
24037 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
24038 for (uint32_t m = 1; m <= 3; m++) {
24039 GemmMicrokernelTester()
24040 .mr(3)
24041 .nr(8)
24042 .kr(1)
24043 .sr(4)
24044 .m(m)
24045 .n(8)
24046 .k(4)
24047 .iterations(1)
24048 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24049 }
24050 }
24051
24052 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
24053 for (uint32_t n = 1; n <= 8; n++) {
24054 GemmMicrokernelTester()
24055 .mr(3)
24056 .nr(8)
24057 .kr(1)
24058 .sr(4)
24059 .m(3)
24060 .n(n)
24061 .k(4)
24062 .iterations(1)
24063 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24064 }
24065 }
24066
24067 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4) {
24068 for (size_t k = 1; k < 4; k++) {
24069 GemmMicrokernelTester()
24070 .mr(3)
24071 .nr(8)
24072 .kr(1)
24073 .sr(4)
24074 .m(3)
24075 .n(8)
24076 .k(k)
24077 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24078 }
24079 }
24080
24081 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
24082 for (size_t k = 1; k < 4; k++) {
24083 GemmMicrokernelTester()
24084 .mr(3)
24085 .nr(8)
24086 .kr(1)
24087 .sr(4)
24088 .m(3)
24089 .n(8)
24090 .k(k)
24091 .a_stride(7)
24092 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24093 }
24094 }
24095
24096 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4_subtile) {
24097 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024098 for (uint32_t n = 1; n <= 8; n++) {
24099 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024100 GemmMicrokernelTester()
24101 .mr(3)
24102 .nr(8)
24103 .kr(1)
24104 .sr(4)
24105 .m(m)
24106 .n(n)
24107 .k(k)
24108 .iterations(1)
24109 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24110 }
24111 }
24112 }
24113 }
24114
24115 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4) {
24116 for (size_t k = 5; k < 8; k++) {
24117 GemmMicrokernelTester()
24118 .mr(3)
24119 .nr(8)
24120 .kr(1)
24121 .sr(4)
24122 .m(3)
24123 .n(8)
24124 .k(k)
24125 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24126 }
24127 }
24128
24129 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
24130 for (size_t k = 5; k < 8; k++) {
24131 GemmMicrokernelTester()
24132 .mr(3)
24133 .nr(8)
24134 .kr(1)
24135 .sr(4)
24136 .m(3)
24137 .n(8)
24138 .k(k)
24139 .a_stride(11)
24140 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24141 }
24142 }
24143
24144 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4_subtile) {
24145 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024146 for (uint32_t n = 1; n <= 8; n++) {
24147 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024148 GemmMicrokernelTester()
24149 .mr(3)
24150 .nr(8)
24151 .kr(1)
24152 .sr(4)
24153 .m(m)
24154 .n(n)
24155 .k(k)
24156 .iterations(1)
24157 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24158 }
24159 }
24160 }
24161 }
24162
24163 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_div_4) {
24164 for (size_t k = 8; k <= 40; k += 4) {
24165 GemmMicrokernelTester()
24166 .mr(3)
24167 .nr(8)
24168 .kr(1)
24169 .sr(4)
24170 .m(3)
24171 .n(8)
24172 .k(k)
24173 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24174 }
24175 }
24176
24177 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_div_4_strided_a) {
24178 for (size_t k = 8; k <= 40; k += 4) {
24179 GemmMicrokernelTester()
24180 .mr(3)
24181 .nr(8)
24182 .kr(1)
24183 .sr(4)
24184 .m(3)
24185 .n(8)
24186 .k(k)
24187 .a_stride(43)
24188 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24189 }
24190 }
24191
24192 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, k_div_4_subtile) {
24193 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024194 for (uint32_t n = 1; n <= 8; n++) {
24195 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024196 GemmMicrokernelTester()
24197 .mr(3)
24198 .nr(8)
24199 .kr(1)
24200 .sr(4)
24201 .m(m)
24202 .n(n)
24203 .k(k)
24204 .iterations(1)
24205 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24206 }
24207 }
24208 }
24209 }
24210
24211 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8) {
24212 for (uint32_t n = 9; n < 16; n++) {
24213 for (size_t k = 1; k <= 20; k += 5) {
24214 GemmMicrokernelTester()
24215 .mr(3)
24216 .nr(8)
24217 .kr(1)
24218 .sr(4)
24219 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024220 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024221 .k(k)
24222 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24223 }
24224 }
24225 }
24226
24227 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
24228 for (uint32_t n = 9; n < 16; n++) {
24229 for (size_t k = 1; k <= 20; k += 5) {
24230 GemmMicrokernelTester()
24231 .mr(3)
24232 .nr(8)
24233 .kr(1)
24234 .sr(4)
24235 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024236 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024237 .k(k)
24238 .cn_stride(11)
24239 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24240 }
24241 }
24242 }
24243
24244 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
24245 for (uint32_t n = 9; n < 16; n++) {
24246 for (size_t k = 1; k <= 20; k += 5) {
24247 GemmMicrokernelTester()
24248 .mr(3)
24249 .nr(8)
24250 .kr(1)
24251 .sr(4)
24252 .m(3)
24253 .n(n)
24254 .k(k)
24255 .a_stride(23)
24256 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24257 }
24258 }
24259 }
24260
24261 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_subtile) {
24262 for (uint32_t n = 9; n < 16; n++) {
24263 for (size_t k = 1; k <= 20; k += 5) {
24264 for (uint32_t m = 1; m <= 3; m++) {
24265 GemmMicrokernelTester()
24266 .mr(3)
24267 .nr(8)
24268 .kr(1)
24269 .sr(4)
24270 .m(m)
24271 .n(n)
24272 .k(k)
24273 .iterations(1)
24274 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24275 }
24276 }
24277 }
24278 }
24279
24280 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_div_8) {
24281 for (uint32_t n = 16; n <= 24; n += 8) {
24282 for (size_t k = 1; k <= 20; k += 5) {
24283 GemmMicrokernelTester()
24284 .mr(3)
24285 .nr(8)
24286 .kr(1)
24287 .sr(4)
24288 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024289 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024290 .k(k)
24291 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24292 }
24293 }
24294 }
24295
24296 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
24297 for (uint32_t n = 16; n <= 24; n += 8) {
24298 for (size_t k = 1; k <= 20; k += 5) {
24299 GemmMicrokernelTester()
24300 .mr(3)
24301 .nr(8)
24302 .kr(1)
24303 .sr(4)
24304 .m(3)
24305 .n(n)
24306 .k(k)
24307 .cn_stride(11)
24308 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24309 }
24310 }
24311 }
24312
24313 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_strided_a) {
24314 for (uint32_t n = 16; n <= 24; n += 8) {
24315 for (size_t k = 1; k <= 20; k += 5) {
24316 GemmMicrokernelTester()
24317 .mr(3)
24318 .nr(8)
24319 .kr(1)
24320 .sr(4)
24321 .m(3)
24322 .n(n)
24323 .k(k)
24324 .a_stride(23)
24325 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24326 }
24327 }
24328 }
24329
24330 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_subtile) {
24331 for (uint32_t n = 16; n <= 24; n += 8) {
24332 for (size_t k = 1; k <= 20; k += 5) {
24333 for (uint32_t m = 1; m <= 3; m++) {
24334 GemmMicrokernelTester()
24335 .mr(3)
24336 .nr(8)
24337 .kr(1)
24338 .sr(4)
24339 .m(m)
24340 .n(n)
24341 .k(k)
24342 .iterations(1)
24343 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24344 }
24345 }
24346 }
24347 }
24348
24349 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, strided_cm_subtile) {
24350 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024351 for (uint32_t n = 1; n <= 8; n++) {
24352 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024353 GemmMicrokernelTester()
24354 .mr(3)
24355 .nr(8)
24356 .kr(1)
24357 .sr(4)
24358 .m(m)
24359 .n(n)
24360 .k(k)
24361 .cm_stride(11)
24362 .iterations(1)
24363 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24364 }
24365 }
24366 }
24367 }
24368
24369 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, qmin) {
24370 GemmMicrokernelTester()
24371 .mr(3)
24372 .nr(8)
24373 .kr(1)
24374 .sr(4)
24375 .m(3)
24376 .n(8)
24377 .k(4)
24378 .qmin(128)
24379 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24380 }
24381
24382 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, qmax) {
24383 GemmMicrokernelTester()
24384 .mr(3)
24385 .nr(8)
24386 .kr(1)
24387 .sr(4)
24388 .m(3)
24389 .n(8)
24390 .k(4)
24391 .qmax(128)
24392 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24393 }
24394
24395 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMSIMD_X86, strided_cm) {
24396 GemmMicrokernelTester()
24397 .mr(3)
24398 .nr(8)
24399 .kr(1)
24400 .sr(4)
24401 .m(3)
24402 .n(8)
24403 .k(4)
24404 .cm_stride(11)
24405 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24406 }
24407#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24408
24409
24410#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24411 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4) {
24412 GemmMicrokernelTester()
24413 .mr(6)
24414 .nr(8)
24415 .kr(1)
24416 .sr(4)
24417 .m(6)
24418 .n(8)
24419 .k(4)
24420 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24421 }
24422
24423 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, strided_cn) {
24424 GemmMicrokernelTester()
24425 .mr(6)
24426 .nr(8)
24427 .kr(1)
24428 .sr(4)
24429 .m(6)
24430 .n(8)
24431 .k(4)
24432 .cn_stride(11)
24433 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24434 }
24435
24436 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
24437 GemmMicrokernelTester()
24438 .mr(6)
24439 .nr(8)
24440 .kr(1)
24441 .sr(4)
24442 .m(6)
24443 .n(8)
24444 .k(4)
24445 .a_stride(7)
24446 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24447 }
24448
24449 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024450 for (uint32_t n = 1; n <= 8; n++) {
24451 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024452 GemmMicrokernelTester()
24453 .mr(6)
24454 .nr(8)
24455 .kr(1)
24456 .sr(4)
24457 .m(m)
24458 .n(n)
24459 .k(4)
24460 .iterations(1)
24461 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24462 }
24463 }
24464 }
24465
24466 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
24467 for (uint32_t m = 1; m <= 6; m++) {
24468 GemmMicrokernelTester()
24469 .mr(6)
24470 .nr(8)
24471 .kr(1)
24472 .sr(4)
24473 .m(m)
24474 .n(8)
24475 .k(4)
24476 .iterations(1)
24477 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24478 }
24479 }
24480
24481 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
24482 for (uint32_t n = 1; n <= 8; n++) {
24483 GemmMicrokernelTester()
24484 .mr(6)
24485 .nr(8)
24486 .kr(1)
24487 .sr(4)
24488 .m(6)
24489 .n(n)
24490 .k(4)
24491 .iterations(1)
24492 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24493 }
24494 }
24495
24496 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4) {
24497 for (size_t k = 1; k < 4; k++) {
24498 GemmMicrokernelTester()
24499 .mr(6)
24500 .nr(8)
24501 .kr(1)
24502 .sr(4)
24503 .m(6)
24504 .n(8)
24505 .k(k)
24506 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24507 }
24508 }
24509
24510 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
24511 for (size_t k = 1; k < 4; k++) {
24512 GemmMicrokernelTester()
24513 .mr(6)
24514 .nr(8)
24515 .kr(1)
24516 .sr(4)
24517 .m(6)
24518 .n(8)
24519 .k(k)
24520 .a_stride(7)
24521 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24522 }
24523 }
24524
24525 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4_subtile) {
24526 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024527 for (uint32_t n = 1; n <= 8; n++) {
24528 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024529 GemmMicrokernelTester()
24530 .mr(6)
24531 .nr(8)
24532 .kr(1)
24533 .sr(4)
24534 .m(m)
24535 .n(n)
24536 .k(k)
24537 .iterations(1)
24538 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24539 }
24540 }
24541 }
24542 }
24543
24544 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4) {
24545 for (size_t k = 5; k < 8; k++) {
24546 GemmMicrokernelTester()
24547 .mr(6)
24548 .nr(8)
24549 .kr(1)
24550 .sr(4)
24551 .m(6)
24552 .n(8)
24553 .k(k)
24554 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24555 }
24556 }
24557
24558 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
24559 for (size_t k = 5; k < 8; k++) {
24560 GemmMicrokernelTester()
24561 .mr(6)
24562 .nr(8)
24563 .kr(1)
24564 .sr(4)
24565 .m(6)
24566 .n(8)
24567 .k(k)
24568 .a_stride(11)
24569 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24570 }
24571 }
24572
24573 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4_subtile) {
24574 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024575 for (uint32_t n = 1; n <= 8; n++) {
24576 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024577 GemmMicrokernelTester()
24578 .mr(6)
24579 .nr(8)
24580 .kr(1)
24581 .sr(4)
24582 .m(m)
24583 .n(n)
24584 .k(k)
24585 .iterations(1)
24586 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24587 }
24588 }
24589 }
24590 }
24591
24592 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_div_4) {
24593 for (size_t k = 8; k <= 40; k += 4) {
24594 GemmMicrokernelTester()
24595 .mr(6)
24596 .nr(8)
24597 .kr(1)
24598 .sr(4)
24599 .m(6)
24600 .n(8)
24601 .k(k)
24602 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24603 }
24604 }
24605
24606 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_div_4_strided_a) {
24607 for (size_t k = 8; k <= 40; k += 4) {
24608 GemmMicrokernelTester()
24609 .mr(6)
24610 .nr(8)
24611 .kr(1)
24612 .sr(4)
24613 .m(6)
24614 .n(8)
24615 .k(k)
24616 .a_stride(43)
24617 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24618 }
24619 }
24620
24621 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, k_div_4_subtile) {
24622 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024623 for (uint32_t n = 1; n <= 8; n++) {
24624 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024625 GemmMicrokernelTester()
24626 .mr(6)
24627 .nr(8)
24628 .kr(1)
24629 .sr(4)
24630 .m(m)
24631 .n(n)
24632 .k(k)
24633 .iterations(1)
24634 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24635 }
24636 }
24637 }
24638 }
24639
24640 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8) {
24641 for (uint32_t n = 9; n < 16; n++) {
24642 for (size_t k = 1; k <= 20; k += 5) {
24643 GemmMicrokernelTester()
24644 .mr(6)
24645 .nr(8)
24646 .kr(1)
24647 .sr(4)
24648 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024649 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024650 .k(k)
24651 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24652 }
24653 }
24654 }
24655
24656 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
24657 for (uint32_t n = 9; n < 16; n++) {
24658 for (size_t k = 1; k <= 20; k += 5) {
24659 GemmMicrokernelTester()
24660 .mr(6)
24661 .nr(8)
24662 .kr(1)
24663 .sr(4)
24664 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024665 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024666 .k(k)
24667 .cn_stride(11)
24668 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24669 }
24670 }
24671 }
24672
24673 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
24674 for (uint32_t n = 9; n < 16; n++) {
24675 for (size_t k = 1; k <= 20; k += 5) {
24676 GemmMicrokernelTester()
24677 .mr(6)
24678 .nr(8)
24679 .kr(1)
24680 .sr(4)
24681 .m(6)
24682 .n(n)
24683 .k(k)
24684 .a_stride(23)
24685 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24686 }
24687 }
24688 }
24689
24690 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_subtile) {
24691 for (uint32_t n = 9; n < 16; n++) {
24692 for (size_t k = 1; k <= 20; k += 5) {
24693 for (uint32_t m = 1; m <= 6; m++) {
24694 GemmMicrokernelTester()
24695 .mr(6)
24696 .nr(8)
24697 .kr(1)
24698 .sr(4)
24699 .m(m)
24700 .n(n)
24701 .k(k)
24702 .iterations(1)
24703 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24704 }
24705 }
24706 }
24707 }
24708
24709 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_div_8) {
24710 for (uint32_t n = 16; n <= 24; n += 8) {
24711 for (size_t k = 1; k <= 20; k += 5) {
24712 GemmMicrokernelTester()
24713 .mr(6)
24714 .nr(8)
24715 .kr(1)
24716 .sr(4)
24717 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024718 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024719 .k(k)
24720 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24721 }
24722 }
24723 }
24724
24725 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
24726 for (uint32_t n = 16; n <= 24; n += 8) {
24727 for (size_t k = 1; k <= 20; k += 5) {
24728 GemmMicrokernelTester()
24729 .mr(6)
24730 .nr(8)
24731 .kr(1)
24732 .sr(4)
24733 .m(6)
24734 .n(n)
24735 .k(k)
24736 .cn_stride(11)
24737 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24738 }
24739 }
24740 }
24741
24742 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_strided_a) {
24743 for (uint32_t n = 16; n <= 24; n += 8) {
24744 for (size_t k = 1; k <= 20; k += 5) {
24745 GemmMicrokernelTester()
24746 .mr(6)
24747 .nr(8)
24748 .kr(1)
24749 .sr(4)
24750 .m(6)
24751 .n(n)
24752 .k(k)
24753 .a_stride(23)
24754 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24755 }
24756 }
24757 }
24758
24759 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_subtile) {
24760 for (uint32_t n = 16; n <= 24; n += 8) {
24761 for (size_t k = 1; k <= 20; k += 5) {
24762 for (uint32_t m = 1; m <= 6; m++) {
24763 GemmMicrokernelTester()
24764 .mr(6)
24765 .nr(8)
24766 .kr(1)
24767 .sr(4)
24768 .m(m)
24769 .n(n)
24770 .k(k)
24771 .iterations(1)
24772 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24773 }
24774 }
24775 }
24776 }
24777
24778 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, strided_cm_subtile) {
24779 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024780 for (uint32_t n = 1; n <= 8; n++) {
24781 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024782 GemmMicrokernelTester()
24783 .mr(6)
24784 .nr(8)
24785 .kr(1)
24786 .sr(4)
24787 .m(m)
24788 .n(n)
24789 .k(k)
24790 .cm_stride(11)
24791 .iterations(1)
24792 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24793 }
24794 }
24795 }
24796 }
24797
24798 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, qmin) {
24799 GemmMicrokernelTester()
24800 .mr(6)
24801 .nr(8)
24802 .kr(1)
24803 .sr(4)
24804 .m(6)
24805 .n(8)
24806 .k(4)
24807 .qmin(128)
24808 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24809 }
24810
24811 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, qmax) {
24812 GemmMicrokernelTester()
24813 .mr(6)
24814 .nr(8)
24815 .kr(1)
24816 .sr(4)
24817 .m(6)
24818 .n(8)
24819 .k(4)
24820 .qmax(128)
24821 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24822 }
24823
24824 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMSIMD_X86, strided_cm) {
24825 GemmMicrokernelTester()
24826 .mr(6)
24827 .nr(8)
24828 .kr(1)
24829 .sr(4)
24830 .m(6)
24831 .n(8)
24832 .k(4)
24833 .cm_stride(11)
24834 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24835 }
24836#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24837
24838
24839#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24840 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1) {
24841 GemmMicrokernelTester()
24842 .mr(2)
24843 .nr(4)
24844 .kr(1)
24845 .sr(1)
24846 .m(2)
24847 .n(4)
24848 .k(1)
24849 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24850 }
24851
24852 TEST(F32_GEMMINC_MINMAX_2X4__WASM, strided_cn) {
24853 GemmMicrokernelTester()
24854 .mr(2)
24855 .nr(4)
24856 .kr(1)
24857 .sr(1)
24858 .m(2)
24859 .n(4)
24860 .k(1)
24861 .cn_stride(7)
24862 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24863 }
24864
24865 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1_strided_a) {
24866 GemmMicrokernelTester()
24867 .mr(2)
24868 .nr(4)
24869 .kr(1)
24870 .sr(1)
24871 .m(2)
24872 .n(4)
24873 .k(1)
24874 .a_stride(3)
24875 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24876 }
24877
24878 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024879 for (uint32_t n = 1; n <= 4; n++) {
24880 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024881 GemmMicrokernelTester()
24882 .mr(2)
24883 .nr(4)
24884 .kr(1)
24885 .sr(1)
24886 .m(m)
24887 .n(n)
24888 .k(1)
24889 .iterations(1)
24890 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24891 }
24892 }
24893 }
24894
24895 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1_subtile_m) {
24896 for (uint32_t m = 1; m <= 2; m++) {
24897 GemmMicrokernelTester()
24898 .mr(2)
24899 .nr(4)
24900 .kr(1)
24901 .sr(1)
24902 .m(m)
24903 .n(4)
24904 .k(1)
24905 .iterations(1)
24906 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24907 }
24908 }
24909
24910 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_eq_1_subtile_n) {
24911 for (uint32_t n = 1; n <= 4; n++) {
24912 GemmMicrokernelTester()
24913 .mr(2)
24914 .nr(4)
24915 .kr(1)
24916 .sr(1)
24917 .m(2)
24918 .n(n)
24919 .k(1)
24920 .iterations(1)
24921 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24922 }
24923 }
24924
24925 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_gt_1) {
24926 for (size_t k = 2; k < 10; k++) {
24927 GemmMicrokernelTester()
24928 .mr(2)
24929 .nr(4)
24930 .kr(1)
24931 .sr(1)
24932 .m(2)
24933 .n(4)
24934 .k(k)
24935 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24936 }
24937 }
24938
24939 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_gt_1_strided_a) {
24940 for (size_t k = 2; k < 10; k++) {
24941 GemmMicrokernelTester()
24942 .mr(2)
24943 .nr(4)
24944 .kr(1)
24945 .sr(1)
24946 .m(2)
24947 .n(4)
24948 .k(k)
24949 .a_stride(11)
24950 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24951 }
24952 }
24953
24954 TEST(F32_GEMMINC_MINMAX_2X4__WASM, k_gt_1_subtile) {
24955 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024956 for (uint32_t n = 1; n <= 4; n++) {
24957 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024958 GemmMicrokernelTester()
24959 .mr(2)
24960 .nr(4)
24961 .kr(1)
24962 .sr(1)
24963 .m(m)
24964 .n(n)
24965 .k(k)
24966 .iterations(1)
24967 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24968 }
24969 }
24970 }
24971 }
24972
24973 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_gt_4) {
24974 for (uint32_t n = 5; n < 8; n++) {
24975 for (size_t k = 1; k <= 5; k += 2) {
24976 GemmMicrokernelTester()
24977 .mr(2)
24978 .nr(4)
24979 .kr(1)
24980 .sr(1)
24981 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024982 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024983 .k(k)
24984 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
24985 }
24986 }
24987 }
24988
24989 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_gt_4_strided_cn) {
24990 for (uint32_t n = 5; n < 8; n++) {
24991 for (size_t k = 1; k <= 5; k += 2) {
24992 GemmMicrokernelTester()
24993 .mr(2)
24994 .nr(4)
24995 .kr(1)
24996 .sr(1)
24997 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024998 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024999 .k(k)
25000 .cn_stride(7)
25001 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25002 }
25003 }
25004 }
25005
25006 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_gt_4_strided_a) {
25007 for (uint32_t n = 5; n < 8; n++) {
25008 for (size_t k = 1; k <= 5; k += 2) {
25009 GemmMicrokernelTester()
25010 .mr(2)
25011 .nr(4)
25012 .kr(1)
25013 .sr(1)
25014 .m(2)
25015 .n(n)
25016 .k(k)
25017 .a_stride(7)
25018 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25019 }
25020 }
25021 }
25022
25023 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_gt_4_subtile) {
25024 for (uint32_t n = 5; n < 8; n++) {
25025 for (size_t k = 1; k <= 5; k += 2) {
25026 for (uint32_t m = 1; m <= 2; m++) {
25027 GemmMicrokernelTester()
25028 .mr(2)
25029 .nr(4)
25030 .kr(1)
25031 .sr(1)
25032 .m(m)
25033 .n(n)
25034 .k(k)
25035 .iterations(1)
25036 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25037 }
25038 }
25039 }
25040 }
25041
25042 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_div_4) {
25043 for (uint32_t n = 8; n <= 12; n += 4) {
25044 for (size_t k = 1; k <= 5; k += 2) {
25045 GemmMicrokernelTester()
25046 .mr(2)
25047 .nr(4)
25048 .kr(1)
25049 .sr(1)
25050 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025051 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025052 .k(k)
25053 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25054 }
25055 }
25056 }
25057
25058 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_div_4_strided_cn) {
25059 for (uint32_t n = 8; n <= 12; n += 4) {
25060 for (size_t k = 1; k <= 5; k += 2) {
25061 GemmMicrokernelTester()
25062 .mr(2)
25063 .nr(4)
25064 .kr(1)
25065 .sr(1)
25066 .m(2)
25067 .n(n)
25068 .k(k)
25069 .cn_stride(7)
25070 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25071 }
25072 }
25073 }
25074
25075 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_div_4_strided_a) {
25076 for (uint32_t n = 8; n <= 12; n += 4) {
25077 for (size_t k = 1; k <= 5; k += 2) {
25078 GemmMicrokernelTester()
25079 .mr(2)
25080 .nr(4)
25081 .kr(1)
25082 .sr(1)
25083 .m(2)
25084 .n(n)
25085 .k(k)
25086 .a_stride(7)
25087 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25088 }
25089 }
25090 }
25091
25092 TEST(F32_GEMMINC_MINMAX_2X4__WASM, n_div_4_subtile) {
25093 for (uint32_t n = 8; n <= 12; n += 4) {
25094 for (size_t k = 1; k <= 5; k += 2) {
25095 for (uint32_t m = 1; m <= 2; m++) {
25096 GemmMicrokernelTester()
25097 .mr(2)
25098 .nr(4)
25099 .kr(1)
25100 .sr(1)
25101 .m(m)
25102 .n(n)
25103 .k(k)
25104 .iterations(1)
25105 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25106 }
25107 }
25108 }
25109 }
25110
25111 TEST(F32_GEMMINC_MINMAX_2X4__WASM, strided_cm_subtile) {
25112 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025113 for (uint32_t n = 1; n <= 4; n++) {
25114 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025115 GemmMicrokernelTester()
25116 .mr(2)
25117 .nr(4)
25118 .kr(1)
25119 .sr(1)
25120 .m(m)
25121 .n(n)
25122 .k(k)
25123 .cm_stride(7)
25124 .iterations(1)
25125 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25126 }
25127 }
25128 }
25129 }
25130
25131 TEST(F32_GEMMINC_MINMAX_2X4__WASM, qmin) {
25132 GemmMicrokernelTester()
25133 .mr(2)
25134 .nr(4)
25135 .kr(1)
25136 .sr(1)
25137 .m(2)
25138 .n(4)
25139 .k(1)
25140 .qmin(128)
25141 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25142 }
25143
25144 TEST(F32_GEMMINC_MINMAX_2X4__WASM, qmax) {
25145 GemmMicrokernelTester()
25146 .mr(2)
25147 .nr(4)
25148 .kr(1)
25149 .sr(1)
25150 .m(2)
25151 .n(4)
25152 .k(1)
25153 .qmax(128)
25154 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25155 }
25156
25157 TEST(F32_GEMMINC_MINMAX_2X4__WASM, strided_cm) {
25158 GemmMicrokernelTester()
25159 .mr(2)
25160 .nr(4)
25161 .kr(1)
25162 .sr(1)
25163 .m(2)
25164 .n(4)
25165 .k(1)
25166 .cm_stride(7)
25167 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
25168 }
25169#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25170
25171
25172#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25173 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1) {
25174 GemmMicrokernelTester()
25175 .mr(4)
25176 .nr(4)
25177 .kr(1)
25178 .sr(1)
25179 .m(4)
25180 .n(4)
25181 .k(1)
25182 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25183 }
25184
25185 TEST(F32_GEMMINC_MINMAX_4X4__WASM, strided_cn) {
25186 GemmMicrokernelTester()
25187 .mr(4)
25188 .nr(4)
25189 .kr(1)
25190 .sr(1)
25191 .m(4)
25192 .n(4)
25193 .k(1)
25194 .cn_stride(7)
25195 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25196 }
25197
25198 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1_strided_a) {
25199 GemmMicrokernelTester()
25200 .mr(4)
25201 .nr(4)
25202 .kr(1)
25203 .sr(1)
25204 .m(4)
25205 .n(4)
25206 .k(1)
25207 .a_stride(3)
25208 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25209 }
25210
25211 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025212 for (uint32_t n = 1; n <= 4; n++) {
25213 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025214 GemmMicrokernelTester()
25215 .mr(4)
25216 .nr(4)
25217 .kr(1)
25218 .sr(1)
25219 .m(m)
25220 .n(n)
25221 .k(1)
25222 .iterations(1)
25223 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25224 }
25225 }
25226 }
25227
25228 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1_subtile_m) {
25229 for (uint32_t m = 1; m <= 4; m++) {
25230 GemmMicrokernelTester()
25231 .mr(4)
25232 .nr(4)
25233 .kr(1)
25234 .sr(1)
25235 .m(m)
25236 .n(4)
25237 .k(1)
25238 .iterations(1)
25239 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25240 }
25241 }
25242
25243 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_eq_1_subtile_n) {
25244 for (uint32_t n = 1; n <= 4; n++) {
25245 GemmMicrokernelTester()
25246 .mr(4)
25247 .nr(4)
25248 .kr(1)
25249 .sr(1)
25250 .m(4)
25251 .n(n)
25252 .k(1)
25253 .iterations(1)
25254 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25255 }
25256 }
25257
25258 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_gt_1) {
25259 for (size_t k = 2; k < 10; k++) {
25260 GemmMicrokernelTester()
25261 .mr(4)
25262 .nr(4)
25263 .kr(1)
25264 .sr(1)
25265 .m(4)
25266 .n(4)
25267 .k(k)
25268 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25269 }
25270 }
25271
25272 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_gt_1_strided_a) {
25273 for (size_t k = 2; k < 10; k++) {
25274 GemmMicrokernelTester()
25275 .mr(4)
25276 .nr(4)
25277 .kr(1)
25278 .sr(1)
25279 .m(4)
25280 .n(4)
25281 .k(k)
25282 .a_stride(11)
25283 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25284 }
25285 }
25286
25287 TEST(F32_GEMMINC_MINMAX_4X4__WASM, k_gt_1_subtile) {
25288 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025289 for (uint32_t n = 1; n <= 4; n++) {
25290 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025291 GemmMicrokernelTester()
25292 .mr(4)
25293 .nr(4)
25294 .kr(1)
25295 .sr(1)
25296 .m(m)
25297 .n(n)
25298 .k(k)
25299 .iterations(1)
25300 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25301 }
25302 }
25303 }
25304 }
25305
25306 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_gt_4) {
25307 for (uint32_t n = 5; n < 8; n++) {
25308 for (size_t k = 1; k <= 5; k += 2) {
25309 GemmMicrokernelTester()
25310 .mr(4)
25311 .nr(4)
25312 .kr(1)
25313 .sr(1)
25314 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025315 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025316 .k(k)
25317 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25318 }
25319 }
25320 }
25321
25322 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_gt_4_strided_cn) {
25323 for (uint32_t n = 5; n < 8; n++) {
25324 for (size_t k = 1; k <= 5; k += 2) {
25325 GemmMicrokernelTester()
25326 .mr(4)
25327 .nr(4)
25328 .kr(1)
25329 .sr(1)
25330 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025331 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025332 .k(k)
25333 .cn_stride(7)
25334 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25335 }
25336 }
25337 }
25338
25339 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_gt_4_strided_a) {
25340 for (uint32_t n = 5; n < 8; n++) {
25341 for (size_t k = 1; k <= 5; k += 2) {
25342 GemmMicrokernelTester()
25343 .mr(4)
25344 .nr(4)
25345 .kr(1)
25346 .sr(1)
25347 .m(4)
25348 .n(n)
25349 .k(k)
25350 .a_stride(7)
25351 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25352 }
25353 }
25354 }
25355
25356 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_gt_4_subtile) {
25357 for (uint32_t n = 5; n < 8; n++) {
25358 for (size_t k = 1; k <= 5; k += 2) {
25359 for (uint32_t m = 1; m <= 4; m++) {
25360 GemmMicrokernelTester()
25361 .mr(4)
25362 .nr(4)
25363 .kr(1)
25364 .sr(1)
25365 .m(m)
25366 .n(n)
25367 .k(k)
25368 .iterations(1)
25369 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25370 }
25371 }
25372 }
25373 }
25374
25375 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_div_4) {
25376 for (uint32_t n = 8; n <= 12; n += 4) {
25377 for (size_t k = 1; k <= 5; k += 2) {
25378 GemmMicrokernelTester()
25379 .mr(4)
25380 .nr(4)
25381 .kr(1)
25382 .sr(1)
25383 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025384 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025385 .k(k)
25386 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25387 }
25388 }
25389 }
25390
25391 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_div_4_strided_cn) {
25392 for (uint32_t n = 8; n <= 12; n += 4) {
25393 for (size_t k = 1; k <= 5; k += 2) {
25394 GemmMicrokernelTester()
25395 .mr(4)
25396 .nr(4)
25397 .kr(1)
25398 .sr(1)
25399 .m(4)
25400 .n(n)
25401 .k(k)
25402 .cn_stride(7)
25403 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25404 }
25405 }
25406 }
25407
25408 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_div_4_strided_a) {
25409 for (uint32_t n = 8; n <= 12; n += 4) {
25410 for (size_t k = 1; k <= 5; k += 2) {
25411 GemmMicrokernelTester()
25412 .mr(4)
25413 .nr(4)
25414 .kr(1)
25415 .sr(1)
25416 .m(4)
25417 .n(n)
25418 .k(k)
25419 .a_stride(7)
25420 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25421 }
25422 }
25423 }
25424
25425 TEST(F32_GEMMINC_MINMAX_4X4__WASM, n_div_4_subtile) {
25426 for (uint32_t n = 8; n <= 12; n += 4) {
25427 for (size_t k = 1; k <= 5; k += 2) {
25428 for (uint32_t m = 1; m <= 4; m++) {
25429 GemmMicrokernelTester()
25430 .mr(4)
25431 .nr(4)
25432 .kr(1)
25433 .sr(1)
25434 .m(m)
25435 .n(n)
25436 .k(k)
25437 .iterations(1)
25438 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25439 }
25440 }
25441 }
25442 }
25443
25444 TEST(F32_GEMMINC_MINMAX_4X4__WASM, strided_cm_subtile) {
25445 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025446 for (uint32_t n = 1; n <= 4; n++) {
25447 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025448 GemmMicrokernelTester()
25449 .mr(4)
25450 .nr(4)
25451 .kr(1)
25452 .sr(1)
25453 .m(m)
25454 .n(n)
25455 .k(k)
25456 .cm_stride(7)
25457 .iterations(1)
25458 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25459 }
25460 }
25461 }
25462 }
25463
25464 TEST(F32_GEMMINC_MINMAX_4X4__WASM, qmin) {
25465 GemmMicrokernelTester()
25466 .mr(4)
25467 .nr(4)
25468 .kr(1)
25469 .sr(1)
25470 .m(4)
25471 .n(4)
25472 .k(1)
25473 .qmin(128)
25474 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25475 }
25476
25477 TEST(F32_GEMMINC_MINMAX_4X4__WASM, qmax) {
25478 GemmMicrokernelTester()
25479 .mr(4)
25480 .nr(4)
25481 .kr(1)
25482 .sr(1)
25483 .m(4)
25484 .n(4)
25485 .k(1)
25486 .qmax(128)
25487 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25488 }
25489
25490 TEST(F32_GEMMINC_MINMAX_4X4__WASM, strided_cm) {
25491 GemmMicrokernelTester()
25492 .mr(4)
25493 .nr(4)
25494 .kr(1)
25495 .sr(1)
25496 .m(4)
25497 .n(4)
25498 .k(1)
25499 .cm_stride(7)
25500 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
25501 }
25502#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25503
25504
25505TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1) {
25506 GemmMicrokernelTester()
25507 .mr(2)
25508 .nr(4)
25509 .kr(1)
25510 .sr(1)
25511 .m(2)
25512 .n(4)
25513 .k(1)
25514 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25515}
25516
25517TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, strided_cn) {
25518 GemmMicrokernelTester()
25519 .mr(2)
25520 .nr(4)
25521 .kr(1)
25522 .sr(1)
25523 .m(2)
25524 .n(4)
25525 .k(1)
25526 .cn_stride(7)
25527 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25528}
25529
25530TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1_strided_a) {
25531 GemmMicrokernelTester()
25532 .mr(2)
25533 .nr(4)
25534 .kr(1)
25535 .sr(1)
25536 .m(2)
25537 .n(4)
25538 .k(1)
25539 .a_stride(3)
25540 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25541}
25542
25543TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025544 for (uint32_t n = 1; n <= 4; n++) {
25545 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025546 GemmMicrokernelTester()
25547 .mr(2)
25548 .nr(4)
25549 .kr(1)
25550 .sr(1)
25551 .m(m)
25552 .n(n)
25553 .k(1)
25554 .iterations(1)
25555 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25556 }
25557 }
25558}
25559
25560TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
25561 for (uint32_t m = 1; m <= 2; m++) {
25562 GemmMicrokernelTester()
25563 .mr(2)
25564 .nr(4)
25565 .kr(1)
25566 .sr(1)
25567 .m(m)
25568 .n(4)
25569 .k(1)
25570 .iterations(1)
25571 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25572 }
25573}
25574
25575TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
25576 for (uint32_t n = 1; n <= 4; n++) {
25577 GemmMicrokernelTester()
25578 .mr(2)
25579 .nr(4)
25580 .kr(1)
25581 .sr(1)
25582 .m(2)
25583 .n(n)
25584 .k(1)
25585 .iterations(1)
25586 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25587 }
25588}
25589
25590TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_gt_1) {
25591 for (size_t k = 2; k < 10; k++) {
25592 GemmMicrokernelTester()
25593 .mr(2)
25594 .nr(4)
25595 .kr(1)
25596 .sr(1)
25597 .m(2)
25598 .n(4)
25599 .k(k)
25600 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25601 }
25602}
25603
25604TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_gt_1_strided_a) {
25605 for (size_t k = 2; k < 10; k++) {
25606 GemmMicrokernelTester()
25607 .mr(2)
25608 .nr(4)
25609 .kr(1)
25610 .sr(1)
25611 .m(2)
25612 .n(4)
25613 .k(k)
25614 .a_stride(11)
25615 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25616 }
25617}
25618
25619TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
25620 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025621 for (uint32_t n = 1; n <= 4; n++) {
25622 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025623 GemmMicrokernelTester()
25624 .mr(2)
25625 .nr(4)
25626 .kr(1)
25627 .sr(1)
25628 .m(m)
25629 .n(n)
25630 .k(k)
25631 .iterations(1)
25632 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25633 }
25634 }
25635 }
25636}
25637
25638TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_gt_4) {
25639 for (uint32_t n = 5; n < 8; n++) {
25640 for (size_t k = 1; k <= 5; k += 2) {
25641 GemmMicrokernelTester()
25642 .mr(2)
25643 .nr(4)
25644 .kr(1)
25645 .sr(1)
25646 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025647 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025648 .k(k)
25649 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25650 }
25651 }
25652}
25653
25654TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
25655 for (uint32_t n = 5; n < 8; n++) {
25656 for (size_t k = 1; k <= 5; k += 2) {
25657 GemmMicrokernelTester()
25658 .mr(2)
25659 .nr(4)
25660 .kr(1)
25661 .sr(1)
25662 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025663 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025664 .k(k)
25665 .cn_stride(7)
25666 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25667 }
25668 }
25669}
25670
25671TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_gt_4_strided_a) {
25672 for (uint32_t n = 5; n < 8; n++) {
25673 for (size_t k = 1; k <= 5; k += 2) {
25674 GemmMicrokernelTester()
25675 .mr(2)
25676 .nr(4)
25677 .kr(1)
25678 .sr(1)
25679 .m(2)
25680 .n(n)
25681 .k(k)
25682 .a_stride(7)
25683 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25684 }
25685 }
25686}
25687
25688TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
25689 for (uint32_t n = 5; n < 8; n++) {
25690 for (size_t k = 1; k <= 5; k += 2) {
25691 for (uint32_t m = 1; m <= 2; m++) {
25692 GemmMicrokernelTester()
25693 .mr(2)
25694 .nr(4)
25695 .kr(1)
25696 .sr(1)
25697 .m(m)
25698 .n(n)
25699 .k(k)
25700 .iterations(1)
25701 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25702 }
25703 }
25704 }
25705}
25706
25707TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_div_4) {
25708 for (uint32_t n = 8; n <= 12; n += 4) {
25709 for (size_t k = 1; k <= 5; k += 2) {
25710 GemmMicrokernelTester()
25711 .mr(2)
25712 .nr(4)
25713 .kr(1)
25714 .sr(1)
25715 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025716 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025717 .k(k)
25718 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25719 }
25720 }
25721}
25722
25723TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
25724 for (uint32_t n = 8; n <= 12; n += 4) {
25725 for (size_t k = 1; k <= 5; k += 2) {
25726 GemmMicrokernelTester()
25727 .mr(2)
25728 .nr(4)
25729 .kr(1)
25730 .sr(1)
25731 .m(2)
25732 .n(n)
25733 .k(k)
25734 .cn_stride(7)
25735 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25736 }
25737 }
25738}
25739
25740TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_div_4_strided_a) {
25741 for (uint32_t n = 8; n <= 12; n += 4) {
25742 for (size_t k = 1; k <= 5; k += 2) {
25743 GemmMicrokernelTester()
25744 .mr(2)
25745 .nr(4)
25746 .kr(1)
25747 .sr(1)
25748 .m(2)
25749 .n(n)
25750 .k(k)
25751 .a_stride(7)
25752 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25753 }
25754 }
25755}
25756
25757TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, n_div_4_subtile) {
25758 for (uint32_t n = 8; n <= 12; n += 4) {
25759 for (size_t k = 1; k <= 5; k += 2) {
25760 for (uint32_t m = 1; m <= 2; m++) {
25761 GemmMicrokernelTester()
25762 .mr(2)
25763 .nr(4)
25764 .kr(1)
25765 .sr(1)
25766 .m(m)
25767 .n(n)
25768 .k(k)
25769 .iterations(1)
25770 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25771 }
25772 }
25773 }
25774}
25775
25776TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, strided_cm_subtile) {
25777 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025778 for (uint32_t n = 1; n <= 4; n++) {
25779 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025780 GemmMicrokernelTester()
25781 .mr(2)
25782 .nr(4)
25783 .kr(1)
25784 .sr(1)
25785 .m(m)
25786 .n(n)
25787 .k(k)
25788 .cm_stride(7)
25789 .iterations(1)
25790 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25791 }
25792 }
25793 }
25794}
25795
25796TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, qmin) {
25797 GemmMicrokernelTester()
25798 .mr(2)
25799 .nr(4)
25800 .kr(1)
25801 .sr(1)
25802 .m(2)
25803 .n(4)
25804 .k(1)
25805 .qmin(128)
25806 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25807}
25808
25809TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, qmax) {
25810 GemmMicrokernelTester()
25811 .mr(2)
25812 .nr(4)
25813 .kr(1)
25814 .sr(1)
25815 .m(2)
25816 .n(4)
25817 .k(1)
25818 .qmax(128)
25819 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25820}
25821
25822TEST(F32_GEMMINC_MINMAX_2X4__SCALAR, strided_cm) {
25823 GemmMicrokernelTester()
25824 .mr(2)
25825 .nr(4)
25826 .kr(1)
25827 .sr(1)
25828 .m(2)
25829 .n(4)
25830 .k(1)
25831 .cm_stride(7)
25832 .Test(xnn_f32_gemminc_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
25833}