blob: 451b6c35cdbb1cb44f68e06d905ce7a9b8dfd3e3 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
Marat Dukhan355ab432020-04-09 19:01:52 -07007// Specification: test/f32-spmm-minmax.yaml
XNNPACK Teamb455b122019-09-27 18:10:33 -07008// Generator: tools/generate-spmm-test.py
9
10
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <gtest/gtest.h>
12
Marat Dukhan1dadbf72019-10-01 10:46:20 -070013#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/isa-checks.h>
15
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/spmm.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017#include "spmm-microkernel-tester.h"
18
19
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -080020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5b86c432020-12-06 19:15:03 -080021 TEST(F32_SPMM_MINMAX_4X1__NEON, k_eq_1) {
22 TEST_REQUIRES_ARM_NEON;
23 SpMMMicrokernelTester()
24 .mr(4)
25 .nr(1)
26 .m(4)
27 .n(1)
28 .k(1)
29 .sparsity(0.0f)
30 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
31 }
32
33 TEST(F32_SPMM_MINMAX_4X1__NEON, k_gt_1) {
34 TEST_REQUIRES_ARM_NEON;
35 for (size_t k = 2; k < 10; k++) {
36 SpMMMicrokernelTester()
37 .mr(4)
38 .nr(1)
39 .m(4)
40 .n(1)
41 .k(k)
42 .sparsity(0.0f)
43 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
44 }
45 }
46
47 TEST(F32_SPMM_MINMAX_4X1__NEON, n_gt_1) {
48 TEST_REQUIRES_ARM_NEON;
49 for (uint32_t n = 2; n < 10; n++) {
50 for (size_t k = 1; k <= 5; k += 2) {
51 SpMMMicrokernelTester()
52 .mr(4)
53 .nr(1)
54 .m(4)
55 .n(n)
56 .k(k)
57 .sparsity(0.0f)
58 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
59 }
60 }
61 }
62
63 TEST(F32_SPMM_MINMAX_4X1__NEON, m_lt_4) {
64 TEST_REQUIRES_ARM_NEON;
65 for (uint32_t m = 1; m < 4; m++) {
66 for (uint32_t n = 1; n < 10; n += 2) {
67 for (size_t k = 1; k <= 5; k += 2) {
68 SpMMMicrokernelTester()
69 .mr(4)
70 .nr(1)
71 .m(m)
72 .n(n)
73 .k(k)
74 .sparsity(0.0f)
75 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
76 }
77 }
78 }
79 }
80
81 TEST(F32_SPMM_MINMAX_4X1__NEON, m_div_4) {
82 TEST_REQUIRES_ARM_NEON;
83 for (uint32_t m = 8; m <= 12; m += 4) {
84 for (uint32_t n = 1; n < 10; n += 2) {
85 for (size_t k = 1; k <= 5; k += 2) {
86 SpMMMicrokernelTester()
87 .mr(4)
88 .nr(1)
89 .m(m)
90 .n(n)
91 .k(k)
92 .sparsity(0.0f)
93 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
94 }
95 }
96 }
97 }
98
99 TEST(F32_SPMM_MINMAX_4X1__NEON, m_gt_4) {
100 TEST_REQUIRES_ARM_NEON;
101 for (uint32_t m = 5; m < 8; m++) {
102 for (uint32_t n = 1; n < 10; n += 2) {
103 for (size_t k = 1; k <= 5; k += 2) {
104 SpMMMicrokernelTester()
105 .mr(4)
106 .nr(1)
107 .m(m)
108 .n(n)
109 .k(k)
110 .sparsity(0.0f)
111 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
112 }
113 }
114 }
115 }
116
117 TEST(F32_SPMM_MINMAX_4X1__NEON, output_stride) {
118 TEST_REQUIRES_ARM_NEON;
119 for (uint32_t n = 1; n < 10; n += 2) {
120 for (size_t k = 1; k <= 5; k += 2) {
121 SpMMMicrokernelTester()
122 .mr(4)
123 .nr(1)
124 .m(8)
125 .n(n)
126 .k(k)
127 .output_stride(11)
128 .sparsity(0.0f)
129 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
130 }
131 }
132 }
133
134 TEST(F32_SPMM_MINMAX_4X1__NEON, qmin) {
135 TEST_REQUIRES_ARM_NEON;
136 for (uint32_t n = 1; n < 10; n += 2) {
137 for (size_t k = 1; k <= 5; k += 2) {
138 SpMMMicrokernelTester()
139 .mr(4)
140 .nr(1)
141 .m(8)
142 .n(n)
143 .k(k)
144 .sparsity(0.0f)
145 .qmin(128)
146 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
147 }
148 }
149 }
150
151 TEST(F32_SPMM_MINMAX_4X1__NEON, qmax) {
152 TEST_REQUIRES_ARM_NEON;
153 for (uint32_t n = 1; n < 10; n += 2) {
154 for (size_t k = 1; k <= 5; k += 2) {
155 SpMMMicrokernelTester()
156 .mr(4)
157 .nr(1)
158 .m(8)
159 .n(n)
160 .k(k)
161 .sparsity(0.0f)
162 .qmax(128)
163 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
164 }
165 }
166 }
167
168 TEST(F32_SPMM_MINMAX_4X1__NEON, half_sparse) {
169 TEST_REQUIRES_ARM_NEON;
170 for (uint32_t n = 1; n < 10; n += 2) {
171 for (size_t k = 1; k <= 5; k += 2) {
172 SpMMMicrokernelTester()
173 .mr(4)
174 .nr(1)
175 .m(8)
176 .n(n)
177 .k(k)
178 .sparsity(0.5f)
179 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
180 }
181 }
182 }
183
184 TEST(F32_SPMM_MINMAX_4X1__NEON, zero_weights) {
185 TEST_REQUIRES_ARM_NEON;
186 for (uint32_t n = 1; n < 10; n += 2) {
187 for (size_t k = 1; k <= 5; k += 2) {
188 SpMMMicrokernelTester()
189 .mr(4)
190 .nr(1)
191 .m(8)
192 .n(n)
193 .k(k)
194 .sparsity(1.0f)
195 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon);
196 }
197 }
198 }
199#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
200
201
202#if XNN_ARCH_ARM || XNN_ARCH_ARM64
203 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, k_eq_1) {
204 TEST_REQUIRES_ARM_NEON;
205 SpMMMicrokernelTester()
206 .mr(4)
207 .nr(1)
208 .m(4)
209 .n(1)
210 .k(1)
211 .sparsity(0.0f)
212 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
213 }
214
215 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, k_gt_1) {
216 TEST_REQUIRES_ARM_NEON;
217 for (size_t k = 2; k < 10; k++) {
218 SpMMMicrokernelTester()
219 .mr(4)
220 .nr(1)
221 .m(4)
222 .n(1)
223 .k(k)
224 .sparsity(0.0f)
225 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
226 }
227 }
228
229 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, n_gt_1) {
230 TEST_REQUIRES_ARM_NEON;
231 for (uint32_t n = 2; n < 10; n++) {
232 for (size_t k = 1; k <= 5; k += 2) {
233 SpMMMicrokernelTester()
234 .mr(4)
235 .nr(1)
236 .m(4)
237 .n(n)
238 .k(k)
239 .sparsity(0.0f)
240 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
241 }
242 }
243 }
244
245 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, m_lt_4) {
246 TEST_REQUIRES_ARM_NEON;
247 for (uint32_t m = 1; m < 4; m++) {
248 for (uint32_t n = 1; n < 10; n += 2) {
249 for (size_t k = 1; k <= 5; k += 2) {
250 SpMMMicrokernelTester()
251 .mr(4)
252 .nr(1)
253 .m(m)
254 .n(n)
255 .k(k)
256 .sparsity(0.0f)
257 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
258 }
259 }
260 }
261 }
262
263 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, m_div_4) {
264 TEST_REQUIRES_ARM_NEON;
265 for (uint32_t m = 8; m <= 12; m += 4) {
266 for (uint32_t n = 1; n < 10; n += 2) {
267 for (size_t k = 1; k <= 5; k += 2) {
268 SpMMMicrokernelTester()
269 .mr(4)
270 .nr(1)
271 .m(m)
272 .n(n)
273 .k(k)
274 .sparsity(0.0f)
275 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
276 }
277 }
278 }
279 }
280
281 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, m_gt_4) {
282 TEST_REQUIRES_ARM_NEON;
283 for (uint32_t m = 5; m < 8; m++) {
284 for (uint32_t n = 1; n < 10; n += 2) {
285 for (size_t k = 1; k <= 5; k += 2) {
286 SpMMMicrokernelTester()
287 .mr(4)
288 .nr(1)
289 .m(m)
290 .n(n)
291 .k(k)
292 .sparsity(0.0f)
293 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
294 }
295 }
296 }
297 }
298
299 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, output_stride) {
300 TEST_REQUIRES_ARM_NEON;
301 for (uint32_t n = 1; n < 10; n += 2) {
302 for (size_t k = 1; k <= 5; k += 2) {
303 SpMMMicrokernelTester()
304 .mr(4)
305 .nr(1)
306 .m(8)
307 .n(n)
308 .k(k)
309 .output_stride(11)
310 .sparsity(0.0f)
311 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
312 }
313 }
314 }
315
316 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, qmin) {
317 TEST_REQUIRES_ARM_NEON;
318 for (uint32_t n = 1; n < 10; n += 2) {
319 for (size_t k = 1; k <= 5; k += 2) {
320 SpMMMicrokernelTester()
321 .mr(4)
322 .nr(1)
323 .m(8)
324 .n(n)
325 .k(k)
326 .sparsity(0.0f)
327 .qmin(128)
328 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
329 }
330 }
331 }
332
333 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, qmax) {
334 TEST_REQUIRES_ARM_NEON;
335 for (uint32_t n = 1; n < 10; n += 2) {
336 for (size_t k = 1; k <= 5; k += 2) {
337 SpMMMicrokernelTester()
338 .mr(4)
339 .nr(1)
340 .m(8)
341 .n(n)
342 .k(k)
343 .sparsity(0.0f)
344 .qmax(128)
345 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
346 }
347 }
348 }
349
350 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, half_sparse) {
351 TEST_REQUIRES_ARM_NEON;
352 for (uint32_t n = 1; n < 10; n += 2) {
353 for (size_t k = 1; k <= 5; k += 2) {
354 SpMMMicrokernelTester()
355 .mr(4)
356 .nr(1)
357 .m(8)
358 .n(n)
359 .k(k)
360 .sparsity(0.5f)
361 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
362 }
363 }
364 }
365
366 TEST(F32_SPMM_MINMAX_4X1__NEON_PIPELINED, zero_weights) {
367 TEST_REQUIRES_ARM_NEON;
368 for (uint32_t n = 1; n < 10; n += 2) {
369 for (size_t k = 1; k <= 5; k += 2) {
370 SpMMMicrokernelTester()
371 .mr(4)
372 .nr(1)
373 .m(8)
374 .n(n)
375 .k(k)
376 .sparsity(1.0f)
377 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined);
378 }
379 }
380 }
381#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
382
383
384#if XNN_ARCH_ARM || XNN_ARCH_ARM64
385 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, k_eq_2) {
386 TEST_REQUIRES_ARM_NEON;
387 SpMMMicrokernelTester()
388 .mr(4)
389 .nr(1)
390 .m(4)
391 .n(1)
392 .k(2)
393 .sparsity(0.0f)
394 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
395 }
396
397 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, k_lt_2) {
398 TEST_REQUIRES_ARM_NEON;
399 for (size_t k = 1; k < 2; k++) {
400 SpMMMicrokernelTester()
401 .mr(4)
402 .nr(1)
403 .m(4)
404 .n(1)
405 .k(k)
406 .sparsity(0.0f)
407 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
408 }
409 }
410
411 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, k_gt_2) {
412 TEST_REQUIRES_ARM_NEON;
413 for (size_t k = 3; k < 4; k++) {
414 SpMMMicrokernelTester()
415 .mr(4)
416 .nr(1)
417 .m(4)
418 .n(1)
419 .k(k)
420 .sparsity(0.0f)
421 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
422 }
423 }
424
425 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, k_div_2) {
426 TEST_REQUIRES_ARM_NEON;
427 for (size_t k = 4; k <= 20; k += 2) {
428 SpMMMicrokernelTester()
429 .mr(4)
430 .nr(1)
431 .m(4)
432 .n(1)
433 .k(k)
434 .sparsity(0.0f)
435 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
436 }
437 }
438
439 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, n_gt_1) {
440 TEST_REQUIRES_ARM_NEON;
441 for (uint32_t n = 2; n < 10; n++) {
442 for (size_t k = 1; k <= 10; k += 3) {
443 SpMMMicrokernelTester()
444 .mr(4)
445 .nr(1)
446 .m(4)
447 .n(n)
448 .k(k)
449 .sparsity(0.0f)
450 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
451 }
452 }
453 }
454
455 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, m_lt_4) {
456 TEST_REQUIRES_ARM_NEON;
457 for (uint32_t m = 1; m < 4; m++) {
458 for (uint32_t n = 1; n < 10; n += 2) {
459 for (size_t k = 1; k <= 10; k += 3) {
460 SpMMMicrokernelTester()
461 .mr(4)
462 .nr(1)
463 .m(m)
464 .n(n)
465 .k(k)
466 .sparsity(0.0f)
467 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
468 }
469 }
470 }
471 }
472
473 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, m_div_4) {
474 TEST_REQUIRES_ARM_NEON;
475 for (uint32_t m = 8; m <= 12; m += 4) {
476 for (uint32_t n = 1; n < 10; n += 2) {
477 for (size_t k = 1; k <= 10; k += 3) {
478 SpMMMicrokernelTester()
479 .mr(4)
480 .nr(1)
481 .m(m)
482 .n(n)
483 .k(k)
484 .sparsity(0.0f)
485 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
486 }
487 }
488 }
489 }
490
491 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, m_gt_4) {
492 TEST_REQUIRES_ARM_NEON;
493 for (uint32_t m = 5; m < 8; m++) {
494 for (uint32_t n = 1; n < 10; n += 2) {
495 for (size_t k = 1; k <= 10; k += 3) {
496 SpMMMicrokernelTester()
497 .mr(4)
498 .nr(1)
499 .m(m)
500 .n(n)
501 .k(k)
502 .sparsity(0.0f)
503 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
504 }
505 }
506 }
507 }
508
509 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, output_stride) {
510 TEST_REQUIRES_ARM_NEON;
511 for (uint32_t n = 1; n < 10; n += 2) {
512 for (size_t k = 1; k <= 10; k += 3) {
513 SpMMMicrokernelTester()
514 .mr(4)
515 .nr(1)
516 .m(8)
517 .n(n)
518 .k(k)
519 .output_stride(11)
520 .sparsity(0.0f)
521 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
522 }
523 }
524 }
525
526 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, qmin) {
527 TEST_REQUIRES_ARM_NEON;
528 for (uint32_t n = 1; n < 10; n += 2) {
529 for (size_t k = 1; k <= 10; k += 3) {
530 SpMMMicrokernelTester()
531 .mr(4)
532 .nr(1)
533 .m(8)
534 .n(n)
535 .k(k)
536 .sparsity(0.0f)
537 .qmin(128)
538 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
539 }
540 }
541 }
542
543 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, qmax) {
544 TEST_REQUIRES_ARM_NEON;
545 for (uint32_t n = 1; n < 10; n += 2) {
546 for (size_t k = 1; k <= 10; k += 3) {
547 SpMMMicrokernelTester()
548 .mr(4)
549 .nr(1)
550 .m(8)
551 .n(n)
552 .k(k)
553 .sparsity(0.0f)
554 .qmax(128)
555 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
556 }
557 }
558 }
559
560 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, half_sparse) {
561 TEST_REQUIRES_ARM_NEON;
562 for (uint32_t n = 1; n < 10; n += 2) {
563 for (size_t k = 1; k <= 10; k += 3) {
564 SpMMMicrokernelTester()
565 .mr(4)
566 .nr(1)
567 .m(8)
568 .n(n)
569 .k(k)
570 .sparsity(0.5f)
571 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
572 }
573 }
574 }
575
576 TEST(F32_SPMM_MINMAX_4X1__NEON_X2, zero_weights) {
577 TEST_REQUIRES_ARM_NEON;
578 for (uint32_t n = 1; n < 10; n += 2) {
579 for (size_t k = 1; k <= 10; k += 3) {
580 SpMMMicrokernelTester()
581 .mr(4)
582 .nr(1)
583 .m(8)
584 .n(n)
585 .k(k)
586 .sparsity(1.0f)
587 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neon_x2);
588 }
589 }
590 }
591#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
592
593
594#if XNN_ARCH_ARM || XNN_ARCH_ARM64
595 TEST(F32_SPMM_MINMAX_8X1__NEON, k_eq_1) {
596 TEST_REQUIRES_ARM_NEON;
597 SpMMMicrokernelTester()
598 .mr(8)
599 .nr(1)
600 .m(8)
601 .n(1)
602 .k(1)
603 .sparsity(0.0f)
604 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
605 }
606
607 TEST(F32_SPMM_MINMAX_8X1__NEON, k_gt_1) {
608 TEST_REQUIRES_ARM_NEON;
609 for (size_t k = 2; k < 10; k++) {
610 SpMMMicrokernelTester()
611 .mr(8)
612 .nr(1)
613 .m(8)
614 .n(1)
615 .k(k)
616 .sparsity(0.0f)
617 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
618 }
619 }
620
621 TEST(F32_SPMM_MINMAX_8X1__NEON, n_gt_1) {
622 TEST_REQUIRES_ARM_NEON;
623 for (uint32_t n = 2; n < 10; n++) {
624 for (size_t k = 1; k <= 5; k += 2) {
625 SpMMMicrokernelTester()
626 .mr(8)
627 .nr(1)
628 .m(8)
629 .n(n)
630 .k(k)
631 .sparsity(0.0f)
632 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
633 }
634 }
635 }
636
637 TEST(F32_SPMM_MINMAX_8X1__NEON, m_lt_8) {
638 TEST_REQUIRES_ARM_NEON;
639 for (uint32_t m = 1; m < 8; m++) {
640 for (uint32_t n = 1; n < 10; n += 2) {
641 for (size_t k = 1; k <= 5; k += 2) {
642 SpMMMicrokernelTester()
643 .mr(8)
644 .nr(1)
645 .m(m)
646 .n(n)
647 .k(k)
648 .sparsity(0.0f)
649 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
650 }
651 }
652 }
653 }
654
655 TEST(F32_SPMM_MINMAX_8X1__NEON, m_div_8) {
656 TEST_REQUIRES_ARM_NEON;
657 for (uint32_t m = 16; m <= 24; m += 8) {
658 for (uint32_t n = 1; n < 10; n += 2) {
659 for (size_t k = 1; k <= 5; k += 2) {
660 SpMMMicrokernelTester()
661 .mr(8)
662 .nr(1)
663 .m(m)
664 .n(n)
665 .k(k)
666 .sparsity(0.0f)
667 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
668 }
669 }
670 }
671 }
672
673 TEST(F32_SPMM_MINMAX_8X1__NEON, m_gt_8) {
674 TEST_REQUIRES_ARM_NEON;
675 for (uint32_t m = 9; m < 16; m++) {
676 for (uint32_t n = 1; n < 10; n += 2) {
677 for (size_t k = 1; k <= 5; k += 2) {
678 SpMMMicrokernelTester()
679 .mr(8)
680 .nr(1)
681 .m(m)
682 .n(n)
683 .k(k)
684 .sparsity(0.0f)
685 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
686 }
687 }
688 }
689 }
690
691 TEST(F32_SPMM_MINMAX_8X1__NEON, output_stride) {
692 TEST_REQUIRES_ARM_NEON;
693 for (uint32_t n = 1; n < 10; n += 2) {
694 for (size_t k = 1; k <= 5; k += 2) {
695 SpMMMicrokernelTester()
696 .mr(8)
697 .nr(1)
698 .m(16)
699 .n(n)
700 .k(k)
701 .output_stride(19)
702 .sparsity(0.0f)
703 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
704 }
705 }
706 }
707
708 TEST(F32_SPMM_MINMAX_8X1__NEON, qmin) {
709 TEST_REQUIRES_ARM_NEON;
710 for (uint32_t n = 1; n < 10; n += 2) {
711 for (size_t k = 1; k <= 5; k += 2) {
712 SpMMMicrokernelTester()
713 .mr(8)
714 .nr(1)
715 .m(16)
716 .n(n)
717 .k(k)
718 .sparsity(0.0f)
719 .qmin(128)
720 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
721 }
722 }
723 }
724
725 TEST(F32_SPMM_MINMAX_8X1__NEON, qmax) {
726 TEST_REQUIRES_ARM_NEON;
727 for (uint32_t n = 1; n < 10; n += 2) {
728 for (size_t k = 1; k <= 5; k += 2) {
729 SpMMMicrokernelTester()
730 .mr(8)
731 .nr(1)
732 .m(16)
733 .n(n)
734 .k(k)
735 .sparsity(0.0f)
736 .qmax(128)
737 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
738 }
739 }
740 }
741
742 TEST(F32_SPMM_MINMAX_8X1__NEON, half_sparse) {
743 TEST_REQUIRES_ARM_NEON;
744 for (uint32_t n = 1; n < 10; n += 2) {
745 for (size_t k = 1; k <= 5; k += 2) {
746 SpMMMicrokernelTester()
747 .mr(8)
748 .nr(1)
749 .m(16)
750 .n(n)
751 .k(k)
752 .sparsity(0.5f)
753 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
754 }
755 }
756 }
757
758 TEST(F32_SPMM_MINMAX_8X1__NEON, zero_weights) {
759 TEST_REQUIRES_ARM_NEON;
760 for (uint32_t n = 1; n < 10; n += 2) {
761 for (size_t k = 1; k <= 5; k += 2) {
762 SpMMMicrokernelTester()
763 .mr(8)
764 .nr(1)
765 .m(16)
766 .n(n)
767 .k(k)
768 .sparsity(1.0f)
769 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon);
770 }
771 }
772 }
773#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
774
775
776#if XNN_ARCH_ARM || XNN_ARCH_ARM64
777 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, k_eq_1) {
778 TEST_REQUIRES_ARM_NEON;
779 SpMMMicrokernelTester()
780 .mr(8)
781 .nr(1)
782 .m(8)
783 .n(1)
784 .k(1)
785 .sparsity(0.0f)
786 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
787 }
788
789 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, k_gt_1) {
790 TEST_REQUIRES_ARM_NEON;
791 for (size_t k = 2; k < 10; k++) {
792 SpMMMicrokernelTester()
793 .mr(8)
794 .nr(1)
795 .m(8)
796 .n(1)
797 .k(k)
798 .sparsity(0.0f)
799 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
800 }
801 }
802
803 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, n_gt_1) {
804 TEST_REQUIRES_ARM_NEON;
805 for (uint32_t n = 2; n < 10; n++) {
806 for (size_t k = 1; k <= 5; k += 2) {
807 SpMMMicrokernelTester()
808 .mr(8)
809 .nr(1)
810 .m(8)
811 .n(n)
812 .k(k)
813 .sparsity(0.0f)
814 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
815 }
816 }
817 }
818
819 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, m_lt_8) {
820 TEST_REQUIRES_ARM_NEON;
821 for (uint32_t m = 1; m < 8; m++) {
822 for (uint32_t n = 1; n < 10; n += 2) {
823 for (size_t k = 1; k <= 5; k += 2) {
824 SpMMMicrokernelTester()
825 .mr(8)
826 .nr(1)
827 .m(m)
828 .n(n)
829 .k(k)
830 .sparsity(0.0f)
831 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
832 }
833 }
834 }
835 }
836
837 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, m_div_8) {
838 TEST_REQUIRES_ARM_NEON;
839 for (uint32_t m = 16; m <= 24; m += 8) {
840 for (uint32_t n = 1; n < 10; n += 2) {
841 for (size_t k = 1; k <= 5; k += 2) {
842 SpMMMicrokernelTester()
843 .mr(8)
844 .nr(1)
845 .m(m)
846 .n(n)
847 .k(k)
848 .sparsity(0.0f)
849 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
850 }
851 }
852 }
853 }
854
855 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, m_gt_8) {
856 TEST_REQUIRES_ARM_NEON;
857 for (uint32_t m = 9; m < 16; m++) {
858 for (uint32_t n = 1; n < 10; n += 2) {
859 for (size_t k = 1; k <= 5; k += 2) {
860 SpMMMicrokernelTester()
861 .mr(8)
862 .nr(1)
863 .m(m)
864 .n(n)
865 .k(k)
866 .sparsity(0.0f)
867 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
868 }
869 }
870 }
871 }
872
873 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, output_stride) {
874 TEST_REQUIRES_ARM_NEON;
875 for (uint32_t n = 1; n < 10; n += 2) {
876 for (size_t k = 1; k <= 5; k += 2) {
877 SpMMMicrokernelTester()
878 .mr(8)
879 .nr(1)
880 .m(16)
881 .n(n)
882 .k(k)
883 .output_stride(19)
884 .sparsity(0.0f)
885 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
886 }
887 }
888 }
889
890 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, qmin) {
891 TEST_REQUIRES_ARM_NEON;
892 for (uint32_t n = 1; n < 10; n += 2) {
893 for (size_t k = 1; k <= 5; k += 2) {
894 SpMMMicrokernelTester()
895 .mr(8)
896 .nr(1)
897 .m(16)
898 .n(n)
899 .k(k)
900 .sparsity(0.0f)
901 .qmin(128)
902 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
903 }
904 }
905 }
906
907 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, qmax) {
908 TEST_REQUIRES_ARM_NEON;
909 for (uint32_t n = 1; n < 10; n += 2) {
910 for (size_t k = 1; k <= 5; k += 2) {
911 SpMMMicrokernelTester()
912 .mr(8)
913 .nr(1)
914 .m(16)
915 .n(n)
916 .k(k)
917 .sparsity(0.0f)
918 .qmax(128)
919 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
920 }
921 }
922 }
923
924 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, half_sparse) {
925 TEST_REQUIRES_ARM_NEON;
926 for (uint32_t n = 1; n < 10; n += 2) {
927 for (size_t k = 1; k <= 5; k += 2) {
928 SpMMMicrokernelTester()
929 .mr(8)
930 .nr(1)
931 .m(16)
932 .n(n)
933 .k(k)
934 .sparsity(0.5f)
935 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
936 }
937 }
938 }
939
940 TEST(F32_SPMM_MINMAX_8X1__NEON_PIPELINED, zero_weights) {
941 TEST_REQUIRES_ARM_NEON;
942 for (uint32_t n = 1; n < 10; n += 2) {
943 for (size_t k = 1; k <= 5; k += 2) {
944 SpMMMicrokernelTester()
945 .mr(8)
946 .nr(1)
947 .m(16)
948 .n(n)
949 .k(k)
950 .sparsity(1.0f)
951 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined);
952 }
953 }
954 }
955#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
956
957
958#if XNN_ARCH_ARM || XNN_ARCH_ARM64
959 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, k_eq_2) {
960 TEST_REQUIRES_ARM_NEON;
961 SpMMMicrokernelTester()
962 .mr(8)
963 .nr(1)
964 .m(8)
965 .n(1)
966 .k(2)
967 .sparsity(0.0f)
968 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
969 }
970
971 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, k_lt_2) {
972 TEST_REQUIRES_ARM_NEON;
973 for (size_t k = 1; k < 2; k++) {
974 SpMMMicrokernelTester()
975 .mr(8)
976 .nr(1)
977 .m(8)
978 .n(1)
979 .k(k)
980 .sparsity(0.0f)
981 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
982 }
983 }
984
985 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, k_gt_2) {
986 TEST_REQUIRES_ARM_NEON;
987 for (size_t k = 3; k < 4; k++) {
988 SpMMMicrokernelTester()
989 .mr(8)
990 .nr(1)
991 .m(8)
992 .n(1)
993 .k(k)
994 .sparsity(0.0f)
995 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
996 }
997 }
998
999 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, k_div_2) {
1000 TEST_REQUIRES_ARM_NEON;
1001 for (size_t k = 4; k <= 20; k += 2) {
1002 SpMMMicrokernelTester()
1003 .mr(8)
1004 .nr(1)
1005 .m(8)
1006 .n(1)
1007 .k(k)
1008 .sparsity(0.0f)
1009 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1010 }
1011 }
1012
1013 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, n_gt_1) {
1014 TEST_REQUIRES_ARM_NEON;
1015 for (uint32_t n = 2; n < 10; n++) {
1016 for (size_t k = 1; k <= 10; k += 3) {
1017 SpMMMicrokernelTester()
1018 .mr(8)
1019 .nr(1)
1020 .m(8)
1021 .n(n)
1022 .k(k)
1023 .sparsity(0.0f)
1024 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1025 }
1026 }
1027 }
1028
1029 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, m_lt_8) {
1030 TEST_REQUIRES_ARM_NEON;
1031 for (uint32_t m = 1; m < 8; m++) {
1032 for (uint32_t n = 1; n < 10; n += 2) {
1033 for (size_t k = 1; k <= 10; k += 3) {
1034 SpMMMicrokernelTester()
1035 .mr(8)
1036 .nr(1)
1037 .m(m)
1038 .n(n)
1039 .k(k)
1040 .sparsity(0.0f)
1041 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1042 }
1043 }
1044 }
1045 }
1046
1047 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, m_div_8) {
1048 TEST_REQUIRES_ARM_NEON;
1049 for (uint32_t m = 16; m <= 24; m += 8) {
1050 for (uint32_t n = 1; n < 10; n += 2) {
1051 for (size_t k = 1; k <= 10; k += 3) {
1052 SpMMMicrokernelTester()
1053 .mr(8)
1054 .nr(1)
1055 .m(m)
1056 .n(n)
1057 .k(k)
1058 .sparsity(0.0f)
1059 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1060 }
1061 }
1062 }
1063 }
1064
1065 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, m_gt_8) {
1066 TEST_REQUIRES_ARM_NEON;
1067 for (uint32_t m = 9; m < 16; m++) {
1068 for (uint32_t n = 1; n < 10; n += 2) {
1069 for (size_t k = 1; k <= 10; k += 3) {
1070 SpMMMicrokernelTester()
1071 .mr(8)
1072 .nr(1)
1073 .m(m)
1074 .n(n)
1075 .k(k)
1076 .sparsity(0.0f)
1077 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1078 }
1079 }
1080 }
1081 }
1082
1083 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, output_stride) {
1084 TEST_REQUIRES_ARM_NEON;
1085 for (uint32_t n = 1; n < 10; n += 2) {
1086 for (size_t k = 1; k <= 10; k += 3) {
1087 SpMMMicrokernelTester()
1088 .mr(8)
1089 .nr(1)
1090 .m(16)
1091 .n(n)
1092 .k(k)
1093 .output_stride(19)
1094 .sparsity(0.0f)
1095 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1096 }
1097 }
1098 }
1099
1100 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, qmin) {
1101 TEST_REQUIRES_ARM_NEON;
1102 for (uint32_t n = 1; n < 10; n += 2) {
1103 for (size_t k = 1; k <= 10; k += 3) {
1104 SpMMMicrokernelTester()
1105 .mr(8)
1106 .nr(1)
1107 .m(16)
1108 .n(n)
1109 .k(k)
1110 .sparsity(0.0f)
1111 .qmin(128)
1112 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1113 }
1114 }
1115 }
1116
1117 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, qmax) {
1118 TEST_REQUIRES_ARM_NEON;
1119 for (uint32_t n = 1; n < 10; n += 2) {
1120 for (size_t k = 1; k <= 10; k += 3) {
1121 SpMMMicrokernelTester()
1122 .mr(8)
1123 .nr(1)
1124 .m(16)
1125 .n(n)
1126 .k(k)
1127 .sparsity(0.0f)
1128 .qmax(128)
1129 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1130 }
1131 }
1132 }
1133
1134 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, half_sparse) {
1135 TEST_REQUIRES_ARM_NEON;
1136 for (uint32_t n = 1; n < 10; n += 2) {
1137 for (size_t k = 1; k <= 10; k += 3) {
1138 SpMMMicrokernelTester()
1139 .mr(8)
1140 .nr(1)
1141 .m(16)
1142 .n(n)
1143 .k(k)
1144 .sparsity(0.5f)
1145 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1146 }
1147 }
1148 }
1149
1150 TEST(F32_SPMM_MINMAX_8X1__NEON_X2, zero_weights) {
1151 TEST_REQUIRES_ARM_NEON;
1152 for (uint32_t n = 1; n < 10; n += 2) {
1153 for (size_t k = 1; k <= 10; k += 3) {
1154 SpMMMicrokernelTester()
1155 .mr(8)
1156 .nr(1)
1157 .m(16)
1158 .n(n)
1159 .k(k)
1160 .sparsity(1.0f)
1161 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neon_x2);
1162 }
1163 }
1164 }
1165#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1166
1167
1168#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1169 TEST(F32_SPMM_MINMAX_12X1__NEON, k_eq_1) {
1170 TEST_REQUIRES_ARM_NEON;
1171 SpMMMicrokernelTester()
1172 .mr(12)
1173 .nr(1)
1174 .m(12)
1175 .n(1)
1176 .k(1)
1177 .sparsity(0.0f)
1178 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1179 }
1180
1181 TEST(F32_SPMM_MINMAX_12X1__NEON, k_gt_1) {
1182 TEST_REQUIRES_ARM_NEON;
1183 for (size_t k = 2; k < 10; k++) {
1184 SpMMMicrokernelTester()
1185 .mr(12)
1186 .nr(1)
1187 .m(12)
1188 .n(1)
1189 .k(k)
1190 .sparsity(0.0f)
1191 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1192 }
1193 }
1194
1195 TEST(F32_SPMM_MINMAX_12X1__NEON, n_gt_1) {
1196 TEST_REQUIRES_ARM_NEON;
1197 for (uint32_t n = 2; n < 10; n++) {
1198 for (size_t k = 1; k <= 5; k += 2) {
1199 SpMMMicrokernelTester()
1200 .mr(12)
1201 .nr(1)
1202 .m(12)
1203 .n(n)
1204 .k(k)
1205 .sparsity(0.0f)
1206 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1207 }
1208 }
1209 }
1210
1211 TEST(F32_SPMM_MINMAX_12X1__NEON, m_lt_12) {
1212 TEST_REQUIRES_ARM_NEON;
1213 for (uint32_t m = 1; m < 12; m++) {
1214 for (uint32_t n = 1; n < 10; n += 2) {
1215 for (size_t k = 1; k <= 5; k += 2) {
1216 SpMMMicrokernelTester()
1217 .mr(12)
1218 .nr(1)
1219 .m(m)
1220 .n(n)
1221 .k(k)
1222 .sparsity(0.0f)
1223 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1224 }
1225 }
1226 }
1227 }
1228
1229 TEST(F32_SPMM_MINMAX_12X1__NEON, m_div_12) {
1230 TEST_REQUIRES_ARM_NEON;
1231 for (uint32_t m = 24; m <= 36; m += 12) {
1232 for (uint32_t n = 1; n < 10; n += 2) {
1233 for (size_t k = 1; k <= 5; k += 2) {
1234 SpMMMicrokernelTester()
1235 .mr(12)
1236 .nr(1)
1237 .m(m)
1238 .n(n)
1239 .k(k)
1240 .sparsity(0.0f)
1241 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1242 }
1243 }
1244 }
1245 }
1246
1247 TEST(F32_SPMM_MINMAX_12X1__NEON, m_gt_12) {
1248 TEST_REQUIRES_ARM_NEON;
1249 for (uint32_t m = 13; m < 24; m++) {
1250 for (uint32_t n = 1; n < 10; n += 2) {
1251 for (size_t k = 1; k <= 5; k += 2) {
1252 SpMMMicrokernelTester()
1253 .mr(12)
1254 .nr(1)
1255 .m(m)
1256 .n(n)
1257 .k(k)
1258 .sparsity(0.0f)
1259 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1260 }
1261 }
1262 }
1263 }
1264
1265 TEST(F32_SPMM_MINMAX_12X1__NEON, output_stride) {
1266 TEST_REQUIRES_ARM_NEON;
1267 for (uint32_t n = 1; n < 10; n += 2) {
1268 for (size_t k = 1; k <= 5; k += 2) {
1269 SpMMMicrokernelTester()
1270 .mr(12)
1271 .nr(1)
1272 .m(24)
1273 .n(n)
1274 .k(k)
1275 .output_stride(29)
1276 .sparsity(0.0f)
1277 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1278 }
1279 }
1280 }
1281
1282 TEST(F32_SPMM_MINMAX_12X1__NEON, qmin) {
1283 TEST_REQUIRES_ARM_NEON;
1284 for (uint32_t n = 1; n < 10; n += 2) {
1285 for (size_t k = 1; k <= 5; k += 2) {
1286 SpMMMicrokernelTester()
1287 .mr(12)
1288 .nr(1)
1289 .m(24)
1290 .n(n)
1291 .k(k)
1292 .sparsity(0.0f)
1293 .qmin(128)
1294 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1295 }
1296 }
1297 }
1298
1299 TEST(F32_SPMM_MINMAX_12X1__NEON, qmax) {
1300 TEST_REQUIRES_ARM_NEON;
1301 for (uint32_t n = 1; n < 10; n += 2) {
1302 for (size_t k = 1; k <= 5; k += 2) {
1303 SpMMMicrokernelTester()
1304 .mr(12)
1305 .nr(1)
1306 .m(24)
1307 .n(n)
1308 .k(k)
1309 .sparsity(0.0f)
1310 .qmax(128)
1311 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1312 }
1313 }
1314 }
1315
1316 TEST(F32_SPMM_MINMAX_12X1__NEON, half_sparse) {
1317 TEST_REQUIRES_ARM_NEON;
1318 for (uint32_t n = 1; n < 10; n += 2) {
1319 for (size_t k = 1; k <= 5; k += 2) {
1320 SpMMMicrokernelTester()
1321 .mr(12)
1322 .nr(1)
1323 .m(24)
1324 .n(n)
1325 .k(k)
1326 .sparsity(0.5f)
1327 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1328 }
1329 }
1330 }
1331
1332 TEST(F32_SPMM_MINMAX_12X1__NEON, zero_weights) {
1333 TEST_REQUIRES_ARM_NEON;
1334 for (uint32_t n = 1; n < 10; n += 2) {
1335 for (size_t k = 1; k <= 5; k += 2) {
1336 SpMMMicrokernelTester()
1337 .mr(12)
1338 .nr(1)
1339 .m(24)
1340 .n(n)
1341 .k(k)
1342 .sparsity(1.0f)
1343 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neon);
1344 }
1345 }
1346 }
1347#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1348
1349
1350#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1351 TEST(F32_SPMM_MINMAX_16X1__NEON, k_eq_1) {
1352 TEST_REQUIRES_ARM_NEON;
1353 SpMMMicrokernelTester()
1354 .mr(16)
1355 .nr(1)
1356 .m(16)
1357 .n(1)
1358 .k(1)
1359 .sparsity(0.0f)
1360 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1361 }
1362
1363 TEST(F32_SPMM_MINMAX_16X1__NEON, k_gt_1) {
1364 TEST_REQUIRES_ARM_NEON;
1365 for (size_t k = 2; k < 10; k++) {
1366 SpMMMicrokernelTester()
1367 .mr(16)
1368 .nr(1)
1369 .m(16)
1370 .n(1)
1371 .k(k)
1372 .sparsity(0.0f)
1373 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1374 }
1375 }
1376
1377 TEST(F32_SPMM_MINMAX_16X1__NEON, n_gt_1) {
1378 TEST_REQUIRES_ARM_NEON;
1379 for (uint32_t n = 2; n < 10; n++) {
1380 for (size_t k = 1; k <= 5; k += 2) {
1381 SpMMMicrokernelTester()
1382 .mr(16)
1383 .nr(1)
1384 .m(16)
1385 .n(n)
1386 .k(k)
1387 .sparsity(0.0f)
1388 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1389 }
1390 }
1391 }
1392
1393 TEST(F32_SPMM_MINMAX_16X1__NEON, m_lt_16) {
1394 TEST_REQUIRES_ARM_NEON;
1395 for (uint32_t m = 1; m < 16; m++) {
1396 for (uint32_t n = 1; n < 10; n += 2) {
1397 for (size_t k = 1; k <= 5; k += 2) {
1398 SpMMMicrokernelTester()
1399 .mr(16)
1400 .nr(1)
1401 .m(m)
1402 .n(n)
1403 .k(k)
1404 .sparsity(0.0f)
1405 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1406 }
1407 }
1408 }
1409 }
1410
1411 TEST(F32_SPMM_MINMAX_16X1__NEON, m_div_16) {
1412 TEST_REQUIRES_ARM_NEON;
1413 for (uint32_t m = 32; m <= 48; m += 16) {
1414 for (uint32_t n = 1; n < 10; n += 2) {
1415 for (size_t k = 1; k <= 5; k += 2) {
1416 SpMMMicrokernelTester()
1417 .mr(16)
1418 .nr(1)
1419 .m(m)
1420 .n(n)
1421 .k(k)
1422 .sparsity(0.0f)
1423 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1424 }
1425 }
1426 }
1427 }
1428
1429 TEST(F32_SPMM_MINMAX_16X1__NEON, m_gt_16) {
1430 TEST_REQUIRES_ARM_NEON;
1431 for (uint32_t m = 17; m < 32; m++) {
1432 for (uint32_t n = 1; n < 10; n += 2) {
1433 for (size_t k = 1; k <= 5; k += 2) {
1434 SpMMMicrokernelTester()
1435 .mr(16)
1436 .nr(1)
1437 .m(m)
1438 .n(n)
1439 .k(k)
1440 .sparsity(0.0f)
1441 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1442 }
1443 }
1444 }
1445 }
1446
1447 TEST(F32_SPMM_MINMAX_16X1__NEON, output_stride) {
1448 TEST_REQUIRES_ARM_NEON;
1449 for (uint32_t n = 1; n < 10; n += 2) {
1450 for (size_t k = 1; k <= 5; k += 2) {
1451 SpMMMicrokernelTester()
1452 .mr(16)
1453 .nr(1)
1454 .m(32)
1455 .n(n)
1456 .k(k)
1457 .output_stride(37)
1458 .sparsity(0.0f)
1459 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1460 }
1461 }
1462 }
1463
1464 TEST(F32_SPMM_MINMAX_16X1__NEON, qmin) {
1465 TEST_REQUIRES_ARM_NEON;
1466 for (uint32_t n = 1; n < 10; n += 2) {
1467 for (size_t k = 1; k <= 5; k += 2) {
1468 SpMMMicrokernelTester()
1469 .mr(16)
1470 .nr(1)
1471 .m(32)
1472 .n(n)
1473 .k(k)
1474 .sparsity(0.0f)
1475 .qmin(128)
1476 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1477 }
1478 }
1479 }
1480
1481 TEST(F32_SPMM_MINMAX_16X1__NEON, qmax) {
1482 TEST_REQUIRES_ARM_NEON;
1483 for (uint32_t n = 1; n < 10; n += 2) {
1484 for (size_t k = 1; k <= 5; k += 2) {
1485 SpMMMicrokernelTester()
1486 .mr(16)
1487 .nr(1)
1488 .m(32)
1489 .n(n)
1490 .k(k)
1491 .sparsity(0.0f)
1492 .qmax(128)
1493 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1494 }
1495 }
1496 }
1497
1498 TEST(F32_SPMM_MINMAX_16X1__NEON, half_sparse) {
1499 TEST_REQUIRES_ARM_NEON;
1500 for (uint32_t n = 1; n < 10; n += 2) {
1501 for (size_t k = 1; k <= 5; k += 2) {
1502 SpMMMicrokernelTester()
1503 .mr(16)
1504 .nr(1)
1505 .m(32)
1506 .n(n)
1507 .k(k)
1508 .sparsity(0.5f)
1509 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1510 }
1511 }
1512 }
1513
1514 TEST(F32_SPMM_MINMAX_16X1__NEON, zero_weights) {
1515 TEST_REQUIRES_ARM_NEON;
1516 for (uint32_t n = 1; n < 10; n += 2) {
1517 for (size_t k = 1; k <= 5; k += 2) {
1518 SpMMMicrokernelTester()
1519 .mr(16)
1520 .nr(1)
1521 .m(32)
1522 .n(n)
1523 .k(k)
1524 .sparsity(1.0f)
1525 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon);
1526 }
1527 }
1528 }
1529#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1530
1531
1532#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1533 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, k_eq_1) {
1534 TEST_REQUIRES_ARM_NEON;
1535 SpMMMicrokernelTester()
1536 .mr(16)
1537 .nr(1)
1538 .m(16)
1539 .n(1)
1540 .k(1)
1541 .sparsity(0.0f)
1542 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1543 }
1544
1545 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, k_gt_1) {
1546 TEST_REQUIRES_ARM_NEON;
1547 for (size_t k = 2; k < 10; k++) {
1548 SpMMMicrokernelTester()
1549 .mr(16)
1550 .nr(1)
1551 .m(16)
1552 .n(1)
1553 .k(k)
1554 .sparsity(0.0f)
1555 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1556 }
1557 }
1558
1559 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, n_gt_1) {
1560 TEST_REQUIRES_ARM_NEON;
1561 for (uint32_t n = 2; n < 10; n++) {
1562 for (size_t k = 1; k <= 5; k += 2) {
1563 SpMMMicrokernelTester()
1564 .mr(16)
1565 .nr(1)
1566 .m(16)
1567 .n(n)
1568 .k(k)
1569 .sparsity(0.0f)
1570 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1571 }
1572 }
1573 }
1574
1575 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, m_lt_16) {
1576 TEST_REQUIRES_ARM_NEON;
1577 for (uint32_t m = 1; m < 16; m++) {
1578 for (uint32_t n = 1; n < 10; n += 2) {
1579 for (size_t k = 1; k <= 5; k += 2) {
1580 SpMMMicrokernelTester()
1581 .mr(16)
1582 .nr(1)
1583 .m(m)
1584 .n(n)
1585 .k(k)
1586 .sparsity(0.0f)
1587 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1588 }
1589 }
1590 }
1591 }
1592
1593 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, m_div_16) {
1594 TEST_REQUIRES_ARM_NEON;
1595 for (uint32_t m = 32; m <= 48; m += 16) {
1596 for (uint32_t n = 1; n < 10; n += 2) {
1597 for (size_t k = 1; k <= 5; k += 2) {
1598 SpMMMicrokernelTester()
1599 .mr(16)
1600 .nr(1)
1601 .m(m)
1602 .n(n)
1603 .k(k)
1604 .sparsity(0.0f)
1605 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1606 }
1607 }
1608 }
1609 }
1610
1611 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, m_gt_16) {
1612 TEST_REQUIRES_ARM_NEON;
1613 for (uint32_t m = 17; m < 32; m++) {
1614 for (uint32_t n = 1; n < 10; n += 2) {
1615 for (size_t k = 1; k <= 5; k += 2) {
1616 SpMMMicrokernelTester()
1617 .mr(16)
1618 .nr(1)
1619 .m(m)
1620 .n(n)
1621 .k(k)
1622 .sparsity(0.0f)
1623 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1624 }
1625 }
1626 }
1627 }
1628
1629 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, output_stride) {
1630 TEST_REQUIRES_ARM_NEON;
1631 for (uint32_t n = 1; n < 10; n += 2) {
1632 for (size_t k = 1; k <= 5; k += 2) {
1633 SpMMMicrokernelTester()
1634 .mr(16)
1635 .nr(1)
1636 .m(32)
1637 .n(n)
1638 .k(k)
1639 .output_stride(37)
1640 .sparsity(0.0f)
1641 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1642 }
1643 }
1644 }
1645
1646 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, qmin) {
1647 TEST_REQUIRES_ARM_NEON;
1648 for (uint32_t n = 1; n < 10; n += 2) {
1649 for (size_t k = 1; k <= 5; k += 2) {
1650 SpMMMicrokernelTester()
1651 .mr(16)
1652 .nr(1)
1653 .m(32)
1654 .n(n)
1655 .k(k)
1656 .sparsity(0.0f)
1657 .qmin(128)
1658 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1659 }
1660 }
1661 }
1662
1663 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, qmax) {
1664 TEST_REQUIRES_ARM_NEON;
1665 for (uint32_t n = 1; n < 10; n += 2) {
1666 for (size_t k = 1; k <= 5; k += 2) {
1667 SpMMMicrokernelTester()
1668 .mr(16)
1669 .nr(1)
1670 .m(32)
1671 .n(n)
1672 .k(k)
1673 .sparsity(0.0f)
1674 .qmax(128)
1675 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1676 }
1677 }
1678 }
1679
1680 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, half_sparse) {
1681 TEST_REQUIRES_ARM_NEON;
1682 for (uint32_t n = 1; n < 10; n += 2) {
1683 for (size_t k = 1; k <= 5; k += 2) {
1684 SpMMMicrokernelTester()
1685 .mr(16)
1686 .nr(1)
1687 .m(32)
1688 .n(n)
1689 .k(k)
1690 .sparsity(0.5f)
1691 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1692 }
1693 }
1694 }
1695
1696 TEST(F32_SPMM_MINMAX_16X1__NEON_PIPELINED, zero_weights) {
1697 TEST_REQUIRES_ARM_NEON;
1698 for (uint32_t n = 1; n < 10; n += 2) {
1699 for (size_t k = 1; k <= 5; k += 2) {
1700 SpMMMicrokernelTester()
1701 .mr(16)
1702 .nr(1)
1703 .m(32)
1704 .n(n)
1705 .k(k)
1706 .sparsity(1.0f)
1707 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined);
1708 }
1709 }
1710 }
1711#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1712
1713
1714#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1715 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, k_eq_2) {
1716 TEST_REQUIRES_ARM_NEON;
1717 SpMMMicrokernelTester()
1718 .mr(16)
1719 .nr(1)
1720 .m(16)
1721 .n(1)
1722 .k(2)
1723 .sparsity(0.0f)
1724 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1725 }
1726
1727 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, k_lt_2) {
1728 TEST_REQUIRES_ARM_NEON;
1729 for (size_t k = 1; k < 2; k++) {
1730 SpMMMicrokernelTester()
1731 .mr(16)
1732 .nr(1)
1733 .m(16)
1734 .n(1)
1735 .k(k)
1736 .sparsity(0.0f)
1737 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1738 }
1739 }
1740
1741 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, k_gt_2) {
1742 TEST_REQUIRES_ARM_NEON;
1743 for (size_t k = 3; k < 4; k++) {
1744 SpMMMicrokernelTester()
1745 .mr(16)
1746 .nr(1)
1747 .m(16)
1748 .n(1)
1749 .k(k)
1750 .sparsity(0.0f)
1751 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1752 }
1753 }
1754
1755 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, k_div_2) {
1756 TEST_REQUIRES_ARM_NEON;
1757 for (size_t k = 4; k <= 20; k += 2) {
1758 SpMMMicrokernelTester()
1759 .mr(16)
1760 .nr(1)
1761 .m(16)
1762 .n(1)
1763 .k(k)
1764 .sparsity(0.0f)
1765 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1766 }
1767 }
1768
1769 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, n_gt_1) {
1770 TEST_REQUIRES_ARM_NEON;
1771 for (uint32_t n = 2; n < 10; n++) {
1772 for (size_t k = 1; k <= 10; k += 3) {
1773 SpMMMicrokernelTester()
1774 .mr(16)
1775 .nr(1)
1776 .m(16)
1777 .n(n)
1778 .k(k)
1779 .sparsity(0.0f)
1780 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1781 }
1782 }
1783 }
1784
1785 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, m_lt_16) {
1786 TEST_REQUIRES_ARM_NEON;
1787 for (uint32_t m = 1; m < 16; m++) {
1788 for (uint32_t n = 1; n < 10; n += 2) {
1789 for (size_t k = 1; k <= 10; k += 3) {
1790 SpMMMicrokernelTester()
1791 .mr(16)
1792 .nr(1)
1793 .m(m)
1794 .n(n)
1795 .k(k)
1796 .sparsity(0.0f)
1797 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1798 }
1799 }
1800 }
1801 }
1802
1803 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, m_div_16) {
1804 TEST_REQUIRES_ARM_NEON;
1805 for (uint32_t m = 32; m <= 48; m += 16) {
1806 for (uint32_t n = 1; n < 10; n += 2) {
1807 for (size_t k = 1; k <= 10; k += 3) {
1808 SpMMMicrokernelTester()
1809 .mr(16)
1810 .nr(1)
1811 .m(m)
1812 .n(n)
1813 .k(k)
1814 .sparsity(0.0f)
1815 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1816 }
1817 }
1818 }
1819 }
1820
1821 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, m_gt_16) {
1822 TEST_REQUIRES_ARM_NEON;
1823 for (uint32_t m = 17; m < 32; m++) {
1824 for (uint32_t n = 1; n < 10; n += 2) {
1825 for (size_t k = 1; k <= 10; k += 3) {
1826 SpMMMicrokernelTester()
1827 .mr(16)
1828 .nr(1)
1829 .m(m)
1830 .n(n)
1831 .k(k)
1832 .sparsity(0.0f)
1833 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1834 }
1835 }
1836 }
1837 }
1838
1839 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, output_stride) {
1840 TEST_REQUIRES_ARM_NEON;
1841 for (uint32_t n = 1; n < 10; n += 2) {
1842 for (size_t k = 1; k <= 10; k += 3) {
1843 SpMMMicrokernelTester()
1844 .mr(16)
1845 .nr(1)
1846 .m(32)
1847 .n(n)
1848 .k(k)
1849 .output_stride(37)
1850 .sparsity(0.0f)
1851 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1852 }
1853 }
1854 }
1855
1856 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, qmin) {
1857 TEST_REQUIRES_ARM_NEON;
1858 for (uint32_t n = 1; n < 10; n += 2) {
1859 for (size_t k = 1; k <= 10; k += 3) {
1860 SpMMMicrokernelTester()
1861 .mr(16)
1862 .nr(1)
1863 .m(32)
1864 .n(n)
1865 .k(k)
1866 .sparsity(0.0f)
1867 .qmin(128)
1868 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1869 }
1870 }
1871 }
1872
1873 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, qmax) {
1874 TEST_REQUIRES_ARM_NEON;
1875 for (uint32_t n = 1; n < 10; n += 2) {
1876 for (size_t k = 1; k <= 10; k += 3) {
1877 SpMMMicrokernelTester()
1878 .mr(16)
1879 .nr(1)
1880 .m(32)
1881 .n(n)
1882 .k(k)
1883 .sparsity(0.0f)
1884 .qmax(128)
1885 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1886 }
1887 }
1888 }
1889
1890 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, half_sparse) {
1891 TEST_REQUIRES_ARM_NEON;
1892 for (uint32_t n = 1; n < 10; n += 2) {
1893 for (size_t k = 1; k <= 10; k += 3) {
1894 SpMMMicrokernelTester()
1895 .mr(16)
1896 .nr(1)
1897 .m(32)
1898 .n(n)
1899 .k(k)
1900 .sparsity(0.5f)
1901 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1902 }
1903 }
1904 }
1905
1906 TEST(F32_SPMM_MINMAX_16X1__NEON_X2, zero_weights) {
1907 TEST_REQUIRES_ARM_NEON;
1908 for (uint32_t n = 1; n < 10; n += 2) {
1909 for (size_t k = 1; k <= 10; k += 3) {
1910 SpMMMicrokernelTester()
1911 .mr(16)
1912 .nr(1)
1913 .m(32)
1914 .n(n)
1915 .k(k)
1916 .sparsity(1.0f)
1917 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neon_x2);
1918 }
1919 }
1920 }
1921#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1922
1923
1924#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1925 TEST(F32_SPMM_MINMAX_32X1__NEON, k_eq_1) {
1926 TEST_REQUIRES_ARM_NEON;
1927 SpMMMicrokernelTester()
1928 .mr(32)
1929 .nr(1)
1930 .m(32)
1931 .n(1)
1932 .k(1)
1933 .sparsity(0.0f)
1934 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
1935 }
1936
1937 TEST(F32_SPMM_MINMAX_32X1__NEON, k_gt_1) {
1938 TEST_REQUIRES_ARM_NEON;
1939 for (size_t k = 2; k < 10; k++) {
1940 SpMMMicrokernelTester()
1941 .mr(32)
1942 .nr(1)
1943 .m(32)
1944 .n(1)
1945 .k(k)
1946 .sparsity(0.0f)
1947 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
1948 }
1949 }
1950
1951 TEST(F32_SPMM_MINMAX_32X1__NEON, n_gt_1) {
1952 TEST_REQUIRES_ARM_NEON;
1953 for (uint32_t n = 2; n < 10; n++) {
1954 for (size_t k = 1; k <= 5; k += 2) {
1955 SpMMMicrokernelTester()
1956 .mr(32)
1957 .nr(1)
1958 .m(32)
1959 .n(n)
1960 .k(k)
1961 .sparsity(0.0f)
1962 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
1963 }
1964 }
1965 }
1966
1967 TEST(F32_SPMM_MINMAX_32X1__NEON, m_lt_32) {
1968 TEST_REQUIRES_ARM_NEON;
1969 for (uint32_t m = 1; m < 32; m++) {
1970 for (uint32_t n = 1; n < 10; n += 2) {
1971 for (size_t k = 1; k <= 5; k += 2) {
1972 SpMMMicrokernelTester()
1973 .mr(32)
1974 .nr(1)
1975 .m(m)
1976 .n(n)
1977 .k(k)
1978 .sparsity(0.0f)
1979 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
1980 }
1981 }
1982 }
1983 }
1984
1985 TEST(F32_SPMM_MINMAX_32X1__NEON, m_div_32) {
1986 TEST_REQUIRES_ARM_NEON;
1987 for (uint32_t m = 64; m <= 96; m += 32) {
1988 for (uint32_t n = 1; n < 10; n += 2) {
1989 for (size_t k = 1; k <= 5; k += 2) {
1990 SpMMMicrokernelTester()
1991 .mr(32)
1992 .nr(1)
1993 .m(m)
1994 .n(n)
1995 .k(k)
1996 .sparsity(0.0f)
1997 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
1998 }
1999 }
2000 }
2001 }
2002
2003 TEST(F32_SPMM_MINMAX_32X1__NEON, m_gt_32) {
2004 TEST_REQUIRES_ARM_NEON;
2005 for (uint32_t m = 33; m < 64; m++) {
2006 for (uint32_t n = 1; n < 10; n += 2) {
2007 for (size_t k = 1; k <= 5; k += 2) {
2008 SpMMMicrokernelTester()
2009 .mr(32)
2010 .nr(1)
2011 .m(m)
2012 .n(n)
2013 .k(k)
2014 .sparsity(0.0f)
2015 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
2016 }
2017 }
2018 }
2019 }
2020
2021 TEST(F32_SPMM_MINMAX_32X1__NEON, output_stride) {
2022 TEST_REQUIRES_ARM_NEON;
2023 for (uint32_t n = 1; n < 10; n += 2) {
2024 for (size_t k = 1; k <= 5; k += 2) {
2025 SpMMMicrokernelTester()
2026 .mr(32)
2027 .nr(1)
2028 .m(64)
2029 .n(n)
2030 .k(k)
2031 .output_stride(67)
2032 .sparsity(0.0f)
2033 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
2034 }
2035 }
2036 }
2037
2038 TEST(F32_SPMM_MINMAX_32X1__NEON, qmin) {
2039 TEST_REQUIRES_ARM_NEON;
2040 for (uint32_t n = 1; n < 10; n += 2) {
2041 for (size_t k = 1; k <= 5; k += 2) {
2042 SpMMMicrokernelTester()
2043 .mr(32)
2044 .nr(1)
2045 .m(64)
2046 .n(n)
2047 .k(k)
2048 .sparsity(0.0f)
2049 .qmin(128)
2050 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
2051 }
2052 }
2053 }
2054
2055 TEST(F32_SPMM_MINMAX_32X1__NEON, qmax) {
2056 TEST_REQUIRES_ARM_NEON;
2057 for (uint32_t n = 1; n < 10; n += 2) {
2058 for (size_t k = 1; k <= 5; k += 2) {
2059 SpMMMicrokernelTester()
2060 .mr(32)
2061 .nr(1)
2062 .m(64)
2063 .n(n)
2064 .k(k)
2065 .sparsity(0.0f)
2066 .qmax(128)
2067 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
2068 }
2069 }
2070 }
2071
2072 TEST(F32_SPMM_MINMAX_32X1__NEON, half_sparse) {
2073 TEST_REQUIRES_ARM_NEON;
2074 for (uint32_t n = 1; n < 10; n += 2) {
2075 for (size_t k = 1; k <= 5; k += 2) {
2076 SpMMMicrokernelTester()
2077 .mr(32)
2078 .nr(1)
2079 .m(64)
2080 .n(n)
2081 .k(k)
2082 .sparsity(0.5f)
2083 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
2084 }
2085 }
2086 }
2087
2088 TEST(F32_SPMM_MINMAX_32X1__NEON, zero_weights) {
2089 TEST_REQUIRES_ARM_NEON;
2090 for (uint32_t n = 1; n < 10; n += 2) {
2091 for (size_t k = 1; k <= 5; k += 2) {
2092 SpMMMicrokernelTester()
2093 .mr(32)
2094 .nr(1)
2095 .m(64)
2096 .n(n)
2097 .k(k)
2098 .sparsity(1.0f)
2099 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon);
2100 }
2101 }
2102 }
2103#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2104
2105
2106#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2107 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, k_eq_1) {
2108 TEST_REQUIRES_ARM_NEON;
2109 SpMMMicrokernelTester()
2110 .mr(32)
2111 .nr(1)
2112 .m(32)
2113 .n(1)
2114 .k(1)
2115 .sparsity(0.0f)
2116 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2117 }
2118
2119 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, k_gt_1) {
2120 TEST_REQUIRES_ARM_NEON;
2121 for (size_t k = 2; k < 10; k++) {
2122 SpMMMicrokernelTester()
2123 .mr(32)
2124 .nr(1)
2125 .m(32)
2126 .n(1)
2127 .k(k)
2128 .sparsity(0.0f)
2129 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2130 }
2131 }
2132
2133 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, n_gt_1) {
2134 TEST_REQUIRES_ARM_NEON;
2135 for (uint32_t n = 2; n < 10; n++) {
2136 for (size_t k = 1; k <= 5; k += 2) {
2137 SpMMMicrokernelTester()
2138 .mr(32)
2139 .nr(1)
2140 .m(32)
2141 .n(n)
2142 .k(k)
2143 .sparsity(0.0f)
2144 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2145 }
2146 }
2147 }
2148
2149 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, m_lt_32) {
2150 TEST_REQUIRES_ARM_NEON;
2151 for (uint32_t m = 1; m < 32; m++) {
2152 for (uint32_t n = 1; n < 10; n += 2) {
2153 for (size_t k = 1; k <= 5; k += 2) {
2154 SpMMMicrokernelTester()
2155 .mr(32)
2156 .nr(1)
2157 .m(m)
2158 .n(n)
2159 .k(k)
2160 .sparsity(0.0f)
2161 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2162 }
2163 }
2164 }
2165 }
2166
2167 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, m_div_32) {
2168 TEST_REQUIRES_ARM_NEON;
2169 for (uint32_t m = 64; m <= 96; m += 32) {
2170 for (uint32_t n = 1; n < 10; n += 2) {
2171 for (size_t k = 1; k <= 5; k += 2) {
2172 SpMMMicrokernelTester()
2173 .mr(32)
2174 .nr(1)
2175 .m(m)
2176 .n(n)
2177 .k(k)
2178 .sparsity(0.0f)
2179 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2180 }
2181 }
2182 }
2183 }
2184
2185 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, m_gt_32) {
2186 TEST_REQUIRES_ARM_NEON;
2187 for (uint32_t m = 33; m < 64; m++) {
2188 for (uint32_t n = 1; n < 10; n += 2) {
2189 for (size_t k = 1; k <= 5; k += 2) {
2190 SpMMMicrokernelTester()
2191 .mr(32)
2192 .nr(1)
2193 .m(m)
2194 .n(n)
2195 .k(k)
2196 .sparsity(0.0f)
2197 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2198 }
2199 }
2200 }
2201 }
2202
2203 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, output_stride) {
2204 TEST_REQUIRES_ARM_NEON;
2205 for (uint32_t n = 1; n < 10; n += 2) {
2206 for (size_t k = 1; k <= 5; k += 2) {
2207 SpMMMicrokernelTester()
2208 .mr(32)
2209 .nr(1)
2210 .m(64)
2211 .n(n)
2212 .k(k)
2213 .output_stride(67)
2214 .sparsity(0.0f)
2215 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2216 }
2217 }
2218 }
2219
2220 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, qmin) {
2221 TEST_REQUIRES_ARM_NEON;
2222 for (uint32_t n = 1; n < 10; n += 2) {
2223 for (size_t k = 1; k <= 5; k += 2) {
2224 SpMMMicrokernelTester()
2225 .mr(32)
2226 .nr(1)
2227 .m(64)
2228 .n(n)
2229 .k(k)
2230 .sparsity(0.0f)
2231 .qmin(128)
2232 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2233 }
2234 }
2235 }
2236
2237 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, qmax) {
2238 TEST_REQUIRES_ARM_NEON;
2239 for (uint32_t n = 1; n < 10; n += 2) {
2240 for (size_t k = 1; k <= 5; k += 2) {
2241 SpMMMicrokernelTester()
2242 .mr(32)
2243 .nr(1)
2244 .m(64)
2245 .n(n)
2246 .k(k)
2247 .sparsity(0.0f)
2248 .qmax(128)
2249 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2250 }
2251 }
2252 }
2253
2254 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, half_sparse) {
2255 TEST_REQUIRES_ARM_NEON;
2256 for (uint32_t n = 1; n < 10; n += 2) {
2257 for (size_t k = 1; k <= 5; k += 2) {
2258 SpMMMicrokernelTester()
2259 .mr(32)
2260 .nr(1)
2261 .m(64)
2262 .n(n)
2263 .k(k)
2264 .sparsity(0.5f)
2265 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2266 }
2267 }
2268 }
2269
2270 TEST(F32_SPMM_MINMAX_32X1__NEON_PIPELINED, zero_weights) {
2271 TEST_REQUIRES_ARM_NEON;
2272 for (uint32_t n = 1; n < 10; n += 2) {
2273 for (size_t k = 1; k <= 5; k += 2) {
2274 SpMMMicrokernelTester()
2275 .mr(32)
2276 .nr(1)
2277 .m(64)
2278 .n(n)
2279 .k(k)
2280 .sparsity(1.0f)
2281 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined);
2282 }
2283 }
2284 }
2285#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2286
2287
2288#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2289 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, k_eq_2) {
2290 TEST_REQUIRES_ARM_NEON;
2291 SpMMMicrokernelTester()
2292 .mr(32)
2293 .nr(1)
2294 .m(32)
2295 .n(1)
2296 .k(2)
2297 .sparsity(0.0f)
2298 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2299 }
2300
2301 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, k_lt_2) {
2302 TEST_REQUIRES_ARM_NEON;
2303 for (size_t k = 1; k < 2; k++) {
2304 SpMMMicrokernelTester()
2305 .mr(32)
2306 .nr(1)
2307 .m(32)
2308 .n(1)
2309 .k(k)
2310 .sparsity(0.0f)
2311 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2312 }
2313 }
2314
2315 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, k_gt_2) {
2316 TEST_REQUIRES_ARM_NEON;
2317 for (size_t k = 3; k < 4; k++) {
2318 SpMMMicrokernelTester()
2319 .mr(32)
2320 .nr(1)
2321 .m(32)
2322 .n(1)
2323 .k(k)
2324 .sparsity(0.0f)
2325 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2326 }
2327 }
2328
2329 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, k_div_2) {
2330 TEST_REQUIRES_ARM_NEON;
2331 for (size_t k = 4; k <= 20; k += 2) {
2332 SpMMMicrokernelTester()
2333 .mr(32)
2334 .nr(1)
2335 .m(32)
2336 .n(1)
2337 .k(k)
2338 .sparsity(0.0f)
2339 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2340 }
2341 }
2342
2343 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, n_gt_1) {
2344 TEST_REQUIRES_ARM_NEON;
2345 for (uint32_t n = 2; n < 10; n++) {
2346 for (size_t k = 1; k <= 10; k += 3) {
2347 SpMMMicrokernelTester()
2348 .mr(32)
2349 .nr(1)
2350 .m(32)
2351 .n(n)
2352 .k(k)
2353 .sparsity(0.0f)
2354 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2355 }
2356 }
2357 }
2358
2359 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, m_lt_32) {
2360 TEST_REQUIRES_ARM_NEON;
2361 for (uint32_t m = 1; m < 32; m++) {
2362 for (uint32_t n = 1; n < 10; n += 2) {
2363 for (size_t k = 1; k <= 10; k += 3) {
2364 SpMMMicrokernelTester()
2365 .mr(32)
2366 .nr(1)
2367 .m(m)
2368 .n(n)
2369 .k(k)
2370 .sparsity(0.0f)
2371 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2372 }
2373 }
2374 }
2375 }
2376
2377 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, m_div_32) {
2378 TEST_REQUIRES_ARM_NEON;
2379 for (uint32_t m = 64; m <= 96; m += 32) {
2380 for (uint32_t n = 1; n < 10; n += 2) {
2381 for (size_t k = 1; k <= 10; k += 3) {
2382 SpMMMicrokernelTester()
2383 .mr(32)
2384 .nr(1)
2385 .m(m)
2386 .n(n)
2387 .k(k)
2388 .sparsity(0.0f)
2389 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2390 }
2391 }
2392 }
2393 }
2394
2395 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, m_gt_32) {
2396 TEST_REQUIRES_ARM_NEON;
2397 for (uint32_t m = 33; m < 64; m++) {
2398 for (uint32_t n = 1; n < 10; n += 2) {
2399 for (size_t k = 1; k <= 10; k += 3) {
2400 SpMMMicrokernelTester()
2401 .mr(32)
2402 .nr(1)
2403 .m(m)
2404 .n(n)
2405 .k(k)
2406 .sparsity(0.0f)
2407 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2408 }
2409 }
2410 }
2411 }
2412
2413 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, output_stride) {
2414 TEST_REQUIRES_ARM_NEON;
2415 for (uint32_t n = 1; n < 10; n += 2) {
2416 for (size_t k = 1; k <= 10; k += 3) {
2417 SpMMMicrokernelTester()
2418 .mr(32)
2419 .nr(1)
2420 .m(64)
2421 .n(n)
2422 .k(k)
2423 .output_stride(67)
2424 .sparsity(0.0f)
2425 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2426 }
2427 }
2428 }
2429
2430 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, qmin) {
2431 TEST_REQUIRES_ARM_NEON;
2432 for (uint32_t n = 1; n < 10; n += 2) {
2433 for (size_t k = 1; k <= 10; k += 3) {
2434 SpMMMicrokernelTester()
2435 .mr(32)
2436 .nr(1)
2437 .m(64)
2438 .n(n)
2439 .k(k)
2440 .sparsity(0.0f)
2441 .qmin(128)
2442 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2443 }
2444 }
2445 }
2446
2447 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, qmax) {
2448 TEST_REQUIRES_ARM_NEON;
2449 for (uint32_t n = 1; n < 10; n += 2) {
2450 for (size_t k = 1; k <= 10; k += 3) {
2451 SpMMMicrokernelTester()
2452 .mr(32)
2453 .nr(1)
2454 .m(64)
2455 .n(n)
2456 .k(k)
2457 .sparsity(0.0f)
2458 .qmax(128)
2459 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2460 }
2461 }
2462 }
2463
2464 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, half_sparse) {
2465 TEST_REQUIRES_ARM_NEON;
2466 for (uint32_t n = 1; n < 10; n += 2) {
2467 for (size_t k = 1; k <= 10; k += 3) {
2468 SpMMMicrokernelTester()
2469 .mr(32)
2470 .nr(1)
2471 .m(64)
2472 .n(n)
2473 .k(k)
2474 .sparsity(0.5f)
2475 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2476 }
2477 }
2478 }
2479
2480 TEST(F32_SPMM_MINMAX_32X1__NEON_X2, zero_weights) {
2481 TEST_REQUIRES_ARM_NEON;
2482 for (uint32_t n = 1; n < 10; n += 2) {
2483 for (size_t k = 1; k <= 10; k += 3) {
2484 SpMMMicrokernelTester()
2485 .mr(32)
2486 .nr(1)
2487 .m(64)
2488 .n(n)
2489 .k(k)
2490 .sparsity(1.0f)
2491 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neon_x2);
2492 }
2493 }
2494 }
2495#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2496
2497
2498#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07002499 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002500 TEST_REQUIRES_ARM_NEON_FMA;
2501 SpMMMicrokernelTester()
2502 .mr(4)
2503 .nr(1)
2504 .m(4)
2505 .n(1)
2506 .k(1)
2507 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002508 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002509 }
2510
Marat Dukhan355ab432020-04-09 19:01:52 -07002511 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002512 TEST_REQUIRES_ARM_NEON_FMA;
2513 for (size_t k = 2; k < 10; k++) {
2514 SpMMMicrokernelTester()
2515 .mr(4)
2516 .nr(1)
2517 .m(4)
2518 .n(1)
2519 .k(k)
2520 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002521 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002522 }
2523 }
2524
Marat Dukhan355ab432020-04-09 19:01:52 -07002525 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002526 TEST_REQUIRES_ARM_NEON_FMA;
2527 for (uint32_t n = 2; n < 10; n++) {
2528 for (size_t k = 1; k <= 5; k += 2) {
2529 SpMMMicrokernelTester()
2530 .mr(4)
2531 .nr(1)
2532 .m(4)
2533 .n(n)
2534 .k(k)
2535 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002536 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002537 }
2538 }
2539 }
2540
Marat Dukhan355ab432020-04-09 19:01:52 -07002541 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, m_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002542 TEST_REQUIRES_ARM_NEON_FMA;
2543 for (uint32_t m = 1; m < 4; m++) {
2544 for (uint32_t n = 1; n < 10; n += 2) {
2545 for (size_t k = 1; k <= 5; k += 2) {
2546 SpMMMicrokernelTester()
2547 .mr(4)
2548 .nr(1)
2549 .m(m)
2550 .n(n)
2551 .k(k)
2552 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002553 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002554 }
2555 }
2556 }
2557 }
2558
Marat Dukhan355ab432020-04-09 19:01:52 -07002559 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, m_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002560 TEST_REQUIRES_ARM_NEON_FMA;
2561 for (uint32_t m = 8; m <= 12; m += 4) {
2562 for (uint32_t n = 1; n < 10; n += 2) {
2563 for (size_t k = 1; k <= 5; k += 2) {
2564 SpMMMicrokernelTester()
2565 .mr(4)
2566 .nr(1)
2567 .m(m)
2568 .n(n)
2569 .k(k)
2570 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002571 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002572 }
2573 }
2574 }
2575 }
2576
Marat Dukhan355ab432020-04-09 19:01:52 -07002577 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, m_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002578 TEST_REQUIRES_ARM_NEON_FMA;
2579 for (uint32_t m = 5; m < 8; m++) {
2580 for (uint32_t n = 1; n < 10; n += 2) {
2581 for (size_t k = 1; k <= 5; k += 2) {
2582 SpMMMicrokernelTester()
2583 .mr(4)
2584 .nr(1)
2585 .m(m)
2586 .n(n)
2587 .k(k)
2588 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002589 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002590 }
2591 }
2592 }
2593 }
2594
Marat Dukhane8bfcc82020-11-16 12:28:13 -08002595 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, output_stride) {
2596 TEST_REQUIRES_ARM_NEON_FMA;
2597 for (uint32_t n = 1; n < 10; n += 2) {
2598 for (size_t k = 1; k <= 5; k += 2) {
2599 SpMMMicrokernelTester()
2600 .mr(4)
2601 .nr(1)
2602 .m(8)
2603 .n(n)
2604 .k(k)
2605 .output_stride(11)
2606 .sparsity(0.0f)
2607 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
2608 }
2609 }
2610 }
2611
Marat Dukhan355ab432020-04-09 19:01:52 -07002612 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002613 TEST_REQUIRES_ARM_NEON_FMA;
2614 for (uint32_t n = 1; n < 10; n += 2) {
2615 for (size_t k = 1; k <= 5; k += 2) {
2616 SpMMMicrokernelTester()
2617 .mr(4)
2618 .nr(1)
2619 .m(8)
2620 .n(n)
2621 .k(k)
2622 .sparsity(0.0f)
2623 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07002624 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002625 }
2626 }
2627 }
2628
Marat Dukhan355ab432020-04-09 19:01:52 -07002629 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002630 TEST_REQUIRES_ARM_NEON_FMA;
2631 for (uint32_t n = 1; n < 10; n += 2) {
2632 for (size_t k = 1; k <= 5; k += 2) {
2633 SpMMMicrokernelTester()
2634 .mr(4)
2635 .nr(1)
2636 .m(8)
2637 .n(n)
2638 .k(k)
2639 .sparsity(0.0f)
2640 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07002641 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002642 }
2643 }
2644 }
2645
Marat Dukhan355ab432020-04-09 19:01:52 -07002646 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002647 TEST_REQUIRES_ARM_NEON_FMA;
2648 for (uint32_t n = 1; n < 10; n += 2) {
2649 for (size_t k = 1; k <= 5; k += 2) {
2650 SpMMMicrokernelTester()
2651 .mr(4)
2652 .nr(1)
2653 .m(8)
2654 .n(n)
2655 .k(k)
2656 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002657 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002658 }
2659 }
2660 }
2661
Marat Dukhan355ab432020-04-09 19:01:52 -07002662 TEST(F32_SPMM_MINMAX_4X1__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002663 TEST_REQUIRES_ARM_NEON_FMA;
2664 for (uint32_t n = 1; n < 10; n += 2) {
2665 for (size_t k = 1; k <= 5; k += 2) {
2666 SpMMMicrokernelTester()
2667 .mr(4)
2668 .nr(1)
2669 .m(8)
2670 .n(n)
2671 .k(k)
2672 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002673 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002674 }
2675 }
2676 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08002677#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002678
2679
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002680#if XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07002681 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002682 TEST_REQUIRES_ARM_NEON_FMA;
2683 SpMMMicrokernelTester()
2684 .mr(4)
2685 .nr(2)
2686 .m(4)
2687 .n(2)
2688 .k(1)
2689 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002690 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002691 }
2692
Marat Dukhan355ab432020-04-09 19:01:52 -07002693 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, k_eq_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002694 TEST_REQUIRES_ARM_NEON_FMA;
2695 for (uint32_t n = 1; n <= 2; n++) {
2696 SpMMMicrokernelTester()
2697 .mr(4)
2698 .nr(2)
2699 .m(4)
2700 .n(n)
2701 .k(1)
2702 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002703 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002704 }
2705 }
2706
Marat Dukhan355ab432020-04-09 19:01:52 -07002707 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002708 TEST_REQUIRES_ARM_NEON_FMA;
2709 for (size_t k = 2; k < 10; k++) {
2710 SpMMMicrokernelTester()
2711 .mr(4)
2712 .nr(2)
2713 .m(4)
2714 .n(2)
2715 .k(k)
2716 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002717 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002718 }
2719 }
2720
Marat Dukhan355ab432020-04-09 19:01:52 -07002721 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, k_gt_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002722 TEST_REQUIRES_ARM_NEON_FMA;
2723 for (size_t k = 2; k < 10; k++) {
2724 for (uint32_t n = 1; n <= 2; n++) {
2725 SpMMMicrokernelTester()
2726 .mr(4)
2727 .nr(2)
2728 .m(4)
2729 .n(n)
2730 .k(k)
2731 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002732 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002733 }
2734 }
2735 }
2736
Marat Dukhan355ab432020-04-09 19:01:52 -07002737 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, n_gt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002738 TEST_REQUIRES_ARM_NEON_FMA;
2739 for (uint32_t n = 3; n < 10; n++) {
2740 for (size_t k = 1; k <= 5; k += 2) {
2741 SpMMMicrokernelTester()
2742 .mr(4)
2743 .nr(2)
2744 .m(4)
2745 .n(n)
2746 .k(k)
2747 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002748 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002749 }
2750 }
2751 }
2752
Marat Dukhan355ab432020-04-09 19:01:52 -07002753 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, n_div_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002754 TEST_REQUIRES_ARM_NEON_FMA;
2755 for (uint32_t n = 4; n <= 6; n += 2) {
2756 for (size_t k = 1; k <= 5; k += 2) {
2757 SpMMMicrokernelTester()
2758 .mr(4)
2759 .nr(2)
2760 .m(4)
2761 .n(n)
2762 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -07002763 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002764 }
2765 }
2766 }
2767
Marat Dukhan355ab432020-04-09 19:01:52 -07002768 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, m_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002769 TEST_REQUIRES_ARM_NEON_FMA;
2770 for (uint32_t m = 1; m < 4; m++) {
2771 for (uint32_t n = 1; n < 10; n += 3) {
2772 for (size_t k = 1; k <= 5; k += 2) {
2773 SpMMMicrokernelTester()
2774 .mr(4)
2775 .nr(2)
2776 .m(m)
2777 .n(n)
2778 .k(k)
2779 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002780 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002781 }
2782 }
2783 }
2784 }
2785
Marat Dukhan355ab432020-04-09 19:01:52 -07002786 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, m_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002787 TEST_REQUIRES_ARM_NEON_FMA;
2788 for (uint32_t m = 8; m <= 12; m += 4) {
2789 for (uint32_t n = 1; n < 10; n += 3) {
2790 for (size_t k = 1; k <= 5; k += 2) {
2791 SpMMMicrokernelTester()
2792 .mr(4)
2793 .nr(2)
2794 .m(m)
2795 .n(n)
2796 .k(k)
2797 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002798 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002799 }
2800 }
2801 }
2802 }
2803
Marat Dukhan355ab432020-04-09 19:01:52 -07002804 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, m_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002805 TEST_REQUIRES_ARM_NEON_FMA;
2806 for (uint32_t m = 5; m < 8; m++) {
2807 for (uint32_t n = 1; n < 10; n += 3) {
2808 for (size_t k = 1; k <= 5; k += 2) {
2809 SpMMMicrokernelTester()
2810 .mr(4)
2811 .nr(2)
2812 .m(m)
2813 .n(n)
2814 .k(k)
2815 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002816 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002817 }
2818 }
2819 }
2820 }
2821
Marat Dukhane8bfcc82020-11-16 12:28:13 -08002822 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, output_stride) {
2823 TEST_REQUIRES_ARM_NEON_FMA;
2824 for (uint32_t n = 1; n < 10; n += 3) {
2825 for (size_t k = 1; k <= 5; k += 2) {
2826 SpMMMicrokernelTester()
2827 .mr(4)
2828 .nr(2)
2829 .m(8)
2830 .n(n)
2831 .k(k)
2832 .output_stride(11)
2833 .sparsity(0.0f)
2834 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
2835 }
2836 }
2837 }
2838
Marat Dukhan355ab432020-04-09 19:01:52 -07002839 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002840 TEST_REQUIRES_ARM_NEON_FMA;
2841 for (uint32_t n = 1; n < 10; n += 3) {
2842 for (size_t k = 1; k <= 5; k += 2) {
2843 SpMMMicrokernelTester()
2844 .mr(4)
2845 .nr(2)
2846 .m(8)
2847 .n(n)
2848 .k(k)
2849 .sparsity(0.0f)
2850 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07002851 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002852 }
2853 }
2854 }
2855
Marat Dukhan355ab432020-04-09 19:01:52 -07002856 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002857 TEST_REQUIRES_ARM_NEON_FMA;
2858 for (uint32_t n = 1; n < 10; n += 3) {
2859 for (size_t k = 1; k <= 5; k += 2) {
2860 SpMMMicrokernelTester()
2861 .mr(4)
2862 .nr(2)
2863 .m(8)
2864 .n(n)
2865 .k(k)
2866 .sparsity(0.0f)
2867 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07002868 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002869 }
2870 }
2871 }
2872
Marat Dukhan355ab432020-04-09 19:01:52 -07002873 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002874 TEST_REQUIRES_ARM_NEON_FMA;
2875 for (uint32_t n = 1; n < 10; n += 3) {
2876 for (size_t k = 1; k <= 5; k += 2) {
2877 SpMMMicrokernelTester()
2878 .mr(4)
2879 .nr(2)
2880 .m(8)
2881 .n(n)
2882 .k(k)
2883 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002884 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002885 }
2886 }
2887 }
2888
Marat Dukhan355ab432020-04-09 19:01:52 -07002889 TEST(F32_SPMM_MINMAX_4X2__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002890 TEST_REQUIRES_ARM_NEON_FMA;
2891 for (uint32_t n = 1; n < 10; n += 3) {
2892 for (size_t k = 1; k <= 5; k += 2) {
2893 SpMMMicrokernelTester()
2894 .mr(4)
2895 .nr(2)
2896 .m(8)
2897 .n(n)
2898 .k(k)
2899 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002900 .Test(xnn_f32_spmm_minmax_ukernel_4x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002901 }
2902 }
2903 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002904#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002905
2906
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002907#if XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07002908 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002909 TEST_REQUIRES_ARM_NEON_FMA;
2910 SpMMMicrokernelTester()
2911 .mr(4)
2912 .nr(4)
2913 .m(4)
2914 .n(4)
2915 .k(1)
2916 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002917 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002918 }
2919
Marat Dukhan355ab432020-04-09 19:01:52 -07002920 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, k_eq_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002921 TEST_REQUIRES_ARM_NEON_FMA;
2922 for (uint32_t n = 1; n <= 4; n++) {
2923 SpMMMicrokernelTester()
2924 .mr(4)
2925 .nr(4)
2926 .m(4)
2927 .n(n)
2928 .k(1)
2929 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002930 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002931 }
2932 }
2933
Marat Dukhan355ab432020-04-09 19:01:52 -07002934 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002935 TEST_REQUIRES_ARM_NEON_FMA;
2936 for (size_t k = 2; k < 10; k++) {
2937 SpMMMicrokernelTester()
2938 .mr(4)
2939 .nr(4)
2940 .m(4)
2941 .n(4)
2942 .k(k)
2943 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002944 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002945 }
2946 }
2947
Marat Dukhan355ab432020-04-09 19:01:52 -07002948 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, k_gt_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002949 TEST_REQUIRES_ARM_NEON_FMA;
2950 for (size_t k = 2; k < 10; k++) {
2951 for (uint32_t n = 1; n <= 4; n++) {
2952 SpMMMicrokernelTester()
2953 .mr(4)
2954 .nr(4)
2955 .m(4)
2956 .n(n)
2957 .k(k)
2958 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002959 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002960 }
2961 }
2962 }
2963
Marat Dukhan355ab432020-04-09 19:01:52 -07002964 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, n_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002965 TEST_REQUIRES_ARM_NEON_FMA;
2966 for (uint32_t n = 5; n < 10; n++) {
2967 for (size_t k = 1; k <= 5; k += 2) {
2968 SpMMMicrokernelTester()
2969 .mr(4)
2970 .nr(4)
2971 .m(4)
2972 .n(n)
2973 .k(k)
2974 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07002975 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002976 }
2977 }
2978 }
2979
Marat Dukhan355ab432020-04-09 19:01:52 -07002980 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, n_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002981 TEST_REQUIRES_ARM_NEON_FMA;
2982 for (uint32_t n = 8; n <= 12; n += 4) {
2983 for (size_t k = 1; k <= 5; k += 2) {
2984 SpMMMicrokernelTester()
2985 .mr(4)
2986 .nr(4)
2987 .m(4)
2988 .n(n)
2989 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -07002990 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002991 }
2992 }
2993 }
2994
Marat Dukhan355ab432020-04-09 19:01:52 -07002995 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, m_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002996 TEST_REQUIRES_ARM_NEON_FMA;
2997 for (uint32_t m = 1; m < 4; m++) {
2998 for (uint32_t n = 1; n < 20; n += 5) {
2999 for (size_t k = 1; k <= 5; k += 2) {
3000 SpMMMicrokernelTester()
3001 .mr(4)
3002 .nr(4)
3003 .m(m)
3004 .n(n)
3005 .k(k)
3006 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003007 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003008 }
3009 }
3010 }
3011 }
3012
Marat Dukhan355ab432020-04-09 19:01:52 -07003013 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, m_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003014 TEST_REQUIRES_ARM_NEON_FMA;
3015 for (uint32_t m = 8; m <= 12; m += 4) {
3016 for (uint32_t n = 1; n < 20; n += 5) {
3017 for (size_t k = 1; k <= 5; k += 2) {
3018 SpMMMicrokernelTester()
3019 .mr(4)
3020 .nr(4)
3021 .m(m)
3022 .n(n)
3023 .k(k)
3024 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003025 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003026 }
3027 }
3028 }
3029 }
3030
Marat Dukhan355ab432020-04-09 19:01:52 -07003031 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, m_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003032 TEST_REQUIRES_ARM_NEON_FMA;
3033 for (uint32_t m = 5; m < 8; m++) {
3034 for (uint32_t n = 1; n < 20; n += 5) {
3035 for (size_t k = 1; k <= 5; k += 2) {
3036 SpMMMicrokernelTester()
3037 .mr(4)
3038 .nr(4)
3039 .m(m)
3040 .n(n)
3041 .k(k)
3042 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003043 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003044 }
3045 }
3046 }
3047 }
3048
Marat Dukhane8bfcc82020-11-16 12:28:13 -08003049 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, output_stride) {
3050 TEST_REQUIRES_ARM_NEON_FMA;
3051 for (uint32_t n = 1; n < 20; n += 5) {
3052 for (size_t k = 1; k <= 5; k += 2) {
3053 SpMMMicrokernelTester()
3054 .mr(4)
3055 .nr(4)
3056 .m(8)
3057 .n(n)
3058 .k(k)
3059 .output_stride(11)
3060 .sparsity(0.0f)
3061 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
3062 }
3063 }
3064 }
3065
Marat Dukhan355ab432020-04-09 19:01:52 -07003066 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003067 TEST_REQUIRES_ARM_NEON_FMA;
3068 for (uint32_t n = 1; n < 20; n += 5) {
3069 for (size_t k = 1; k <= 5; k += 2) {
3070 SpMMMicrokernelTester()
3071 .mr(4)
3072 .nr(4)
3073 .m(8)
3074 .n(n)
3075 .k(k)
3076 .sparsity(0.0f)
3077 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07003078 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003079 }
3080 }
3081 }
3082
Marat Dukhan355ab432020-04-09 19:01:52 -07003083 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003084 TEST_REQUIRES_ARM_NEON_FMA;
3085 for (uint32_t n = 1; n < 20; n += 5) {
3086 for (size_t k = 1; k <= 5; k += 2) {
3087 SpMMMicrokernelTester()
3088 .mr(4)
3089 .nr(4)
3090 .m(8)
3091 .n(n)
3092 .k(k)
3093 .sparsity(0.0f)
3094 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07003095 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003096 }
3097 }
3098 }
3099
Marat Dukhan355ab432020-04-09 19:01:52 -07003100 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003101 TEST_REQUIRES_ARM_NEON_FMA;
3102 for (uint32_t n = 1; n < 20; n += 5) {
3103 for (size_t k = 1; k <= 5; k += 2) {
3104 SpMMMicrokernelTester()
3105 .mr(4)
3106 .nr(4)
3107 .m(8)
3108 .n(n)
3109 .k(k)
3110 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003111 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003112 }
3113 }
3114 }
3115
Marat Dukhan355ab432020-04-09 19:01:52 -07003116 TEST(F32_SPMM_MINMAX_4X4__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003117 TEST_REQUIRES_ARM_NEON_FMA;
3118 for (uint32_t n = 1; n < 20; n += 5) {
3119 for (size_t k = 1; k <= 5; k += 2) {
3120 SpMMMicrokernelTester()
3121 .mr(4)
3122 .nr(4)
3123 .m(8)
3124 .n(n)
3125 .k(k)
3126 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003127 .Test(xnn_f32_spmm_minmax_ukernel_4x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003128 }
3129 }
3130 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003131#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003132
3133
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08003134#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07003135 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003136 TEST_REQUIRES_ARM_NEON_FMA;
3137 SpMMMicrokernelTester()
3138 .mr(4)
3139 .nr(1)
3140 .m(4)
3141 .n(1)
3142 .k(1)
3143 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003144 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003145 }
3146
Marat Dukhan355ab432020-04-09 19:01:52 -07003147 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003148 TEST_REQUIRES_ARM_NEON_FMA;
3149 for (size_t k = 2; k < 10; k++) {
3150 SpMMMicrokernelTester()
3151 .mr(4)
3152 .nr(1)
3153 .m(4)
3154 .n(1)
3155 .k(k)
3156 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003157 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003158 }
3159 }
3160
Marat Dukhan355ab432020-04-09 19:01:52 -07003161 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003162 TEST_REQUIRES_ARM_NEON_FMA;
3163 for (uint32_t n = 2; n < 10; n++) {
3164 for (size_t k = 1; k <= 5; k += 2) {
3165 SpMMMicrokernelTester()
3166 .mr(4)
3167 .nr(1)
3168 .m(4)
3169 .n(n)
3170 .k(k)
3171 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003172 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003173 }
3174 }
3175 }
3176
Marat Dukhan355ab432020-04-09 19:01:52 -07003177 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, m_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003178 TEST_REQUIRES_ARM_NEON_FMA;
3179 for (uint32_t m = 1; m < 4; m++) {
3180 for (uint32_t n = 1; n < 10; n += 2) {
3181 for (size_t k = 1; k <= 5; k += 2) {
3182 SpMMMicrokernelTester()
3183 .mr(4)
3184 .nr(1)
3185 .m(m)
3186 .n(n)
3187 .k(k)
3188 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003189 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003190 }
3191 }
3192 }
3193 }
3194
Marat Dukhan355ab432020-04-09 19:01:52 -07003195 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, m_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003196 TEST_REQUIRES_ARM_NEON_FMA;
3197 for (uint32_t m = 8; m <= 12; m += 4) {
3198 for (uint32_t n = 1; n < 10; n += 2) {
3199 for (size_t k = 1; k <= 5; k += 2) {
3200 SpMMMicrokernelTester()
3201 .mr(4)
3202 .nr(1)
3203 .m(m)
3204 .n(n)
3205 .k(k)
3206 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003207 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003208 }
3209 }
3210 }
3211 }
3212
Marat Dukhan355ab432020-04-09 19:01:52 -07003213 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, m_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003214 TEST_REQUIRES_ARM_NEON_FMA;
3215 for (uint32_t m = 5; m < 8; m++) {
3216 for (uint32_t n = 1; n < 10; n += 2) {
3217 for (size_t k = 1; k <= 5; k += 2) {
3218 SpMMMicrokernelTester()
3219 .mr(4)
3220 .nr(1)
3221 .m(m)
3222 .n(n)
3223 .k(k)
3224 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003225 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003226 }
3227 }
3228 }
3229 }
3230
Marat Dukhane8bfcc82020-11-16 12:28:13 -08003231 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, output_stride) {
3232 TEST_REQUIRES_ARM_NEON_FMA;
3233 for (uint32_t n = 1; n < 10; n += 2) {
3234 for (size_t k = 1; k <= 5; k += 2) {
3235 SpMMMicrokernelTester()
3236 .mr(4)
3237 .nr(1)
3238 .m(8)
3239 .n(n)
3240 .k(k)
3241 .output_stride(11)
3242 .sparsity(0.0f)
3243 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
3244 }
3245 }
3246 }
3247
Marat Dukhan355ab432020-04-09 19:01:52 -07003248 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003249 TEST_REQUIRES_ARM_NEON_FMA;
3250 for (uint32_t n = 1; n < 10; n += 2) {
3251 for (size_t k = 1; k <= 5; k += 2) {
3252 SpMMMicrokernelTester()
3253 .mr(4)
3254 .nr(1)
3255 .m(8)
3256 .n(n)
3257 .k(k)
3258 .sparsity(0.0f)
3259 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07003260 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003261 }
3262 }
3263 }
3264
Marat Dukhan355ab432020-04-09 19:01:52 -07003265 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003266 TEST_REQUIRES_ARM_NEON_FMA;
3267 for (uint32_t n = 1; n < 10; n += 2) {
3268 for (size_t k = 1; k <= 5; k += 2) {
3269 SpMMMicrokernelTester()
3270 .mr(4)
3271 .nr(1)
3272 .m(8)
3273 .n(n)
3274 .k(k)
3275 .sparsity(0.0f)
3276 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07003277 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003278 }
3279 }
3280 }
3281
Marat Dukhan355ab432020-04-09 19:01:52 -07003282 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003283 TEST_REQUIRES_ARM_NEON_FMA;
3284 for (uint32_t n = 1; n < 10; n += 2) {
3285 for (size_t k = 1; k <= 5; k += 2) {
3286 SpMMMicrokernelTester()
3287 .mr(4)
3288 .nr(1)
3289 .m(8)
3290 .n(n)
3291 .k(k)
3292 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003293 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003294 }
3295 }
3296 }
3297
Marat Dukhan355ab432020-04-09 19:01:52 -07003298 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_PIPELINED, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003299 TEST_REQUIRES_ARM_NEON_FMA;
3300 for (uint32_t n = 1; n < 10; n += 2) {
3301 for (size_t k = 1; k <= 5; k += 2) {
3302 SpMMMicrokernelTester()
3303 .mr(4)
3304 .nr(1)
3305 .m(8)
3306 .n(n)
3307 .k(k)
3308 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003309 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003310 }
3311 }
3312 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08003313#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003314
3315
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08003316#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchardbeca6522020-10-30 22:34:35 -07003317 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, k_eq_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003318 TEST_REQUIRES_ARM_NEON_FMA;
3319 SpMMMicrokernelTester()
3320 .mr(4)
3321 .nr(1)
3322 .m(4)
3323 .n(1)
3324 .k(2)
3325 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003326 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003327 }
3328
Frank Barchardbeca6522020-10-30 22:34:35 -07003329 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, k_lt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003330 TEST_REQUIRES_ARM_NEON_FMA;
3331 for (size_t k = 1; k < 2; k++) {
3332 SpMMMicrokernelTester()
3333 .mr(4)
3334 .nr(1)
3335 .m(4)
3336 .n(1)
3337 .k(k)
3338 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003339 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003340 }
3341 }
3342
Frank Barchardbeca6522020-10-30 22:34:35 -07003343 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, k_gt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003344 TEST_REQUIRES_ARM_NEON_FMA;
3345 for (size_t k = 3; k < 4; k++) {
3346 SpMMMicrokernelTester()
3347 .mr(4)
3348 .nr(1)
3349 .m(4)
3350 .n(1)
3351 .k(k)
3352 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003353 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003354 }
3355 }
3356
Frank Barchardbeca6522020-10-30 22:34:35 -07003357 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, k_div_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003358 TEST_REQUIRES_ARM_NEON_FMA;
3359 for (size_t k = 4; k <= 20; k += 2) {
3360 SpMMMicrokernelTester()
3361 .mr(4)
3362 .nr(1)
3363 .m(4)
3364 .n(1)
3365 .k(k)
3366 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003367 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003368 }
3369 }
3370
Frank Barchardbeca6522020-10-30 22:34:35 -07003371 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003372 TEST_REQUIRES_ARM_NEON_FMA;
3373 for (uint32_t n = 2; n < 10; n++) {
3374 for (size_t k = 1; k <= 10; k += 3) {
3375 SpMMMicrokernelTester()
3376 .mr(4)
3377 .nr(1)
3378 .m(4)
3379 .n(n)
3380 .k(k)
3381 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003382 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003383 }
3384 }
3385 }
3386
Frank Barchardbeca6522020-10-30 22:34:35 -07003387 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, m_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003388 TEST_REQUIRES_ARM_NEON_FMA;
3389 for (uint32_t m = 1; m < 4; m++) {
3390 for (uint32_t n = 1; n < 10; n += 2) {
3391 for (size_t k = 1; k <= 10; k += 3) {
3392 SpMMMicrokernelTester()
3393 .mr(4)
3394 .nr(1)
3395 .m(m)
3396 .n(n)
3397 .k(k)
3398 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003399 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003400 }
3401 }
3402 }
3403 }
3404
Frank Barchardbeca6522020-10-30 22:34:35 -07003405 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, m_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003406 TEST_REQUIRES_ARM_NEON_FMA;
3407 for (uint32_t m = 8; m <= 12; m += 4) {
3408 for (uint32_t n = 1; n < 10; n += 2) {
3409 for (size_t k = 1; k <= 10; k += 3) {
3410 SpMMMicrokernelTester()
3411 .mr(4)
3412 .nr(1)
3413 .m(m)
3414 .n(n)
3415 .k(k)
3416 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003417 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003418 }
3419 }
3420 }
3421 }
3422
Frank Barchardbeca6522020-10-30 22:34:35 -07003423 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, m_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003424 TEST_REQUIRES_ARM_NEON_FMA;
3425 for (uint32_t m = 5; m < 8; m++) {
3426 for (uint32_t n = 1; n < 10; n += 2) {
3427 for (size_t k = 1; k <= 10; k += 3) {
3428 SpMMMicrokernelTester()
3429 .mr(4)
3430 .nr(1)
3431 .m(m)
3432 .n(n)
3433 .k(k)
3434 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003435 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003436 }
3437 }
3438 }
3439 }
3440
Marat Dukhane8bfcc82020-11-16 12:28:13 -08003441 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, output_stride) {
3442 TEST_REQUIRES_ARM_NEON_FMA;
3443 for (uint32_t n = 1; n < 10; n += 2) {
3444 for (size_t k = 1; k <= 10; k += 3) {
3445 SpMMMicrokernelTester()
3446 .mr(4)
3447 .nr(1)
3448 .m(8)
3449 .n(n)
3450 .k(k)
3451 .output_stride(11)
3452 .sparsity(0.0f)
3453 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
3454 }
3455 }
3456 }
3457
Frank Barchardbeca6522020-10-30 22:34:35 -07003458 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003459 TEST_REQUIRES_ARM_NEON_FMA;
3460 for (uint32_t n = 1; n < 10; n += 2) {
3461 for (size_t k = 1; k <= 10; k += 3) {
3462 SpMMMicrokernelTester()
3463 .mr(4)
3464 .nr(1)
3465 .m(8)
3466 .n(n)
3467 .k(k)
3468 .sparsity(0.0f)
3469 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07003470 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003471 }
3472 }
3473 }
3474
Frank Barchardbeca6522020-10-30 22:34:35 -07003475 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003476 TEST_REQUIRES_ARM_NEON_FMA;
3477 for (uint32_t n = 1; n < 10; n += 2) {
3478 for (size_t k = 1; k <= 10; k += 3) {
3479 SpMMMicrokernelTester()
3480 .mr(4)
3481 .nr(1)
3482 .m(8)
3483 .n(n)
3484 .k(k)
3485 .sparsity(0.0f)
3486 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07003487 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003488 }
3489 }
3490 }
3491
Frank Barchardbeca6522020-10-30 22:34:35 -07003492 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003493 TEST_REQUIRES_ARM_NEON_FMA;
3494 for (uint32_t n = 1; n < 10; n += 2) {
3495 for (size_t k = 1; k <= 10; k += 3) {
3496 SpMMMicrokernelTester()
3497 .mr(4)
3498 .nr(1)
3499 .m(8)
3500 .n(n)
3501 .k(k)
3502 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003503 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003504 }
3505 }
3506 }
3507
Frank Barchardbeca6522020-10-30 22:34:35 -07003508 TEST(F32_SPMM_MINMAX_4X1__NEONFMA_X2, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003509 TEST_REQUIRES_ARM_NEON_FMA;
3510 for (uint32_t n = 1; n < 10; n += 2) {
3511 for (size_t k = 1; k <= 10; k += 3) {
3512 SpMMMicrokernelTester()
3513 .mr(4)
3514 .nr(1)
3515 .m(8)
3516 .n(n)
3517 .k(k)
3518 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07003519 .Test(xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003520 }
3521 }
3522 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08003523#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003524
3525
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08003526#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07003527 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003528 TEST_REQUIRES_ARM_NEON_FMA;
3529 SpMMMicrokernelTester()
3530 .mr(8)
3531 .nr(1)
3532 .m(8)
3533 .n(1)
3534 .k(1)
3535 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003536 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003537 }
3538
Marat Dukhan355ab432020-04-09 19:01:52 -07003539 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003540 TEST_REQUIRES_ARM_NEON_FMA;
3541 for (size_t k = 2; k < 10; k++) {
3542 SpMMMicrokernelTester()
3543 .mr(8)
3544 .nr(1)
3545 .m(8)
3546 .n(1)
3547 .k(k)
3548 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003549 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003550 }
3551 }
3552
Marat Dukhan355ab432020-04-09 19:01:52 -07003553 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003554 TEST_REQUIRES_ARM_NEON_FMA;
3555 for (uint32_t n = 2; n < 10; n++) {
3556 for (size_t k = 1; k <= 5; k += 2) {
3557 SpMMMicrokernelTester()
3558 .mr(8)
3559 .nr(1)
3560 .m(8)
3561 .n(n)
3562 .k(k)
3563 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003564 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003565 }
3566 }
3567 }
3568
Marat Dukhan355ab432020-04-09 19:01:52 -07003569 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, m_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003570 TEST_REQUIRES_ARM_NEON_FMA;
3571 for (uint32_t m = 1; m < 8; m++) {
3572 for (uint32_t n = 1; n < 10; n += 2) {
3573 for (size_t k = 1; k <= 5; k += 2) {
3574 SpMMMicrokernelTester()
3575 .mr(8)
3576 .nr(1)
3577 .m(m)
3578 .n(n)
3579 .k(k)
3580 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003581 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003582 }
3583 }
3584 }
3585 }
3586
Marat Dukhan355ab432020-04-09 19:01:52 -07003587 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, m_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003588 TEST_REQUIRES_ARM_NEON_FMA;
3589 for (uint32_t m = 16; m <= 24; m += 8) {
3590 for (uint32_t n = 1; n < 10; n += 2) {
3591 for (size_t k = 1; k <= 5; k += 2) {
3592 SpMMMicrokernelTester()
3593 .mr(8)
3594 .nr(1)
3595 .m(m)
3596 .n(n)
3597 .k(k)
3598 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003599 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003600 }
3601 }
3602 }
3603 }
3604
Marat Dukhan355ab432020-04-09 19:01:52 -07003605 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, m_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003606 TEST_REQUIRES_ARM_NEON_FMA;
3607 for (uint32_t m = 9; m < 16; m++) {
3608 for (uint32_t n = 1; n < 10; n += 2) {
3609 for (size_t k = 1; k <= 5; k += 2) {
3610 SpMMMicrokernelTester()
3611 .mr(8)
3612 .nr(1)
3613 .m(m)
3614 .n(n)
3615 .k(k)
3616 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003617 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003618 }
3619 }
3620 }
3621 }
3622
Marat Dukhane8bfcc82020-11-16 12:28:13 -08003623 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, output_stride) {
3624 TEST_REQUIRES_ARM_NEON_FMA;
3625 for (uint32_t n = 1; n < 10; n += 2) {
3626 for (size_t k = 1; k <= 5; k += 2) {
3627 SpMMMicrokernelTester()
3628 .mr(8)
3629 .nr(1)
3630 .m(16)
3631 .n(n)
3632 .k(k)
3633 .output_stride(19)
3634 .sparsity(0.0f)
3635 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
3636 }
3637 }
3638 }
3639
Marat Dukhan355ab432020-04-09 19:01:52 -07003640 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003641 TEST_REQUIRES_ARM_NEON_FMA;
3642 for (uint32_t n = 1; n < 10; n += 2) {
3643 for (size_t k = 1; k <= 5; k += 2) {
3644 SpMMMicrokernelTester()
3645 .mr(8)
3646 .nr(1)
3647 .m(16)
3648 .n(n)
3649 .k(k)
3650 .sparsity(0.0f)
3651 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07003652 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003653 }
3654 }
3655 }
3656
Marat Dukhan355ab432020-04-09 19:01:52 -07003657 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003658 TEST_REQUIRES_ARM_NEON_FMA;
3659 for (uint32_t n = 1; n < 10; n += 2) {
3660 for (size_t k = 1; k <= 5; k += 2) {
3661 SpMMMicrokernelTester()
3662 .mr(8)
3663 .nr(1)
3664 .m(16)
3665 .n(n)
3666 .k(k)
3667 .sparsity(0.0f)
3668 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07003669 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003670 }
3671 }
3672 }
3673
Marat Dukhan355ab432020-04-09 19:01:52 -07003674 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003675 TEST_REQUIRES_ARM_NEON_FMA;
3676 for (uint32_t n = 1; n < 10; n += 2) {
3677 for (size_t k = 1; k <= 5; k += 2) {
3678 SpMMMicrokernelTester()
3679 .mr(8)
3680 .nr(1)
3681 .m(16)
3682 .n(n)
3683 .k(k)
3684 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003685 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003686 }
3687 }
3688 }
3689
Marat Dukhan355ab432020-04-09 19:01:52 -07003690 TEST(F32_SPMM_MINMAX_8X1__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003691 TEST_REQUIRES_ARM_NEON_FMA;
3692 for (uint32_t n = 1; n < 10; n += 2) {
3693 for (size_t k = 1; k <= 5; k += 2) {
3694 SpMMMicrokernelTester()
3695 .mr(8)
3696 .nr(1)
3697 .m(16)
3698 .n(n)
3699 .k(k)
3700 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003701 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003702 }
3703 }
3704 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08003705#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003706
3707
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003708#if XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07003709 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003710 TEST_REQUIRES_ARM_NEON_FMA;
3711 SpMMMicrokernelTester()
3712 .mr(8)
3713 .nr(2)
3714 .m(8)
3715 .n(2)
3716 .k(1)
3717 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003718 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003719 }
3720
Marat Dukhan355ab432020-04-09 19:01:52 -07003721 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, k_eq_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003722 TEST_REQUIRES_ARM_NEON_FMA;
3723 for (uint32_t n = 1; n <= 2; n++) {
3724 SpMMMicrokernelTester()
3725 .mr(8)
3726 .nr(2)
3727 .m(8)
3728 .n(n)
3729 .k(1)
3730 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003731 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003732 }
3733 }
3734
Marat Dukhan355ab432020-04-09 19:01:52 -07003735 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003736 TEST_REQUIRES_ARM_NEON_FMA;
3737 for (size_t k = 2; k < 10; k++) {
3738 SpMMMicrokernelTester()
3739 .mr(8)
3740 .nr(2)
3741 .m(8)
3742 .n(2)
3743 .k(k)
3744 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003745 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003746 }
3747 }
3748
Marat Dukhan355ab432020-04-09 19:01:52 -07003749 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, k_gt_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003750 TEST_REQUIRES_ARM_NEON_FMA;
3751 for (size_t k = 2; k < 10; k++) {
3752 for (uint32_t n = 1; n <= 2; n++) {
3753 SpMMMicrokernelTester()
3754 .mr(8)
3755 .nr(2)
3756 .m(8)
3757 .n(n)
3758 .k(k)
3759 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003760 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003761 }
3762 }
3763 }
3764
Marat Dukhan355ab432020-04-09 19:01:52 -07003765 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, n_gt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003766 TEST_REQUIRES_ARM_NEON_FMA;
3767 for (uint32_t n = 3; n < 10; n++) {
3768 for (size_t k = 1; k <= 5; k += 2) {
3769 SpMMMicrokernelTester()
3770 .mr(8)
3771 .nr(2)
3772 .m(8)
3773 .n(n)
3774 .k(k)
3775 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003776 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003777 }
3778 }
3779 }
3780
Marat Dukhan355ab432020-04-09 19:01:52 -07003781 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, n_div_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003782 TEST_REQUIRES_ARM_NEON_FMA;
3783 for (uint32_t n = 4; n <= 6; n += 2) {
3784 for (size_t k = 1; k <= 5; k += 2) {
3785 SpMMMicrokernelTester()
3786 .mr(8)
3787 .nr(2)
3788 .m(8)
3789 .n(n)
3790 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -07003791 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003792 }
3793 }
3794 }
3795
Marat Dukhan355ab432020-04-09 19:01:52 -07003796 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, m_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003797 TEST_REQUIRES_ARM_NEON_FMA;
3798 for (uint32_t m = 1; m < 8; m++) {
3799 for (uint32_t n = 1; n < 10; n += 3) {
3800 for (size_t k = 1; k <= 5; k += 2) {
3801 SpMMMicrokernelTester()
3802 .mr(8)
3803 .nr(2)
3804 .m(m)
3805 .n(n)
3806 .k(k)
3807 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003808 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003809 }
3810 }
3811 }
3812 }
3813
Marat Dukhan355ab432020-04-09 19:01:52 -07003814 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, m_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003815 TEST_REQUIRES_ARM_NEON_FMA;
3816 for (uint32_t m = 16; m <= 24; m += 8) {
3817 for (uint32_t n = 1; n < 10; n += 3) {
3818 for (size_t k = 1; k <= 5; k += 2) {
3819 SpMMMicrokernelTester()
3820 .mr(8)
3821 .nr(2)
3822 .m(m)
3823 .n(n)
3824 .k(k)
3825 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003826 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003827 }
3828 }
3829 }
3830 }
3831
Marat Dukhan355ab432020-04-09 19:01:52 -07003832 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, m_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003833 TEST_REQUIRES_ARM_NEON_FMA;
3834 for (uint32_t m = 9; m < 16; m++) {
3835 for (uint32_t n = 1; n < 10; n += 3) {
3836 for (size_t k = 1; k <= 5; k += 2) {
3837 SpMMMicrokernelTester()
3838 .mr(8)
3839 .nr(2)
3840 .m(m)
3841 .n(n)
3842 .k(k)
3843 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003844 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003845 }
3846 }
3847 }
3848 }
3849
Marat Dukhane8bfcc82020-11-16 12:28:13 -08003850 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, output_stride) {
3851 TEST_REQUIRES_ARM_NEON_FMA;
3852 for (uint32_t n = 1; n < 10; n += 3) {
3853 for (size_t k = 1; k <= 5; k += 2) {
3854 SpMMMicrokernelTester()
3855 .mr(8)
3856 .nr(2)
3857 .m(16)
3858 .n(n)
3859 .k(k)
3860 .output_stride(19)
3861 .sparsity(0.0f)
3862 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
3863 }
3864 }
3865 }
3866
Marat Dukhan355ab432020-04-09 19:01:52 -07003867 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003868 TEST_REQUIRES_ARM_NEON_FMA;
3869 for (uint32_t n = 1; n < 10; n += 3) {
3870 for (size_t k = 1; k <= 5; k += 2) {
3871 SpMMMicrokernelTester()
3872 .mr(8)
3873 .nr(2)
3874 .m(16)
3875 .n(n)
3876 .k(k)
3877 .sparsity(0.0f)
3878 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07003879 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003880 }
3881 }
3882 }
3883
Marat Dukhan355ab432020-04-09 19:01:52 -07003884 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003885 TEST_REQUIRES_ARM_NEON_FMA;
3886 for (uint32_t n = 1; n < 10; n += 3) {
3887 for (size_t k = 1; k <= 5; k += 2) {
3888 SpMMMicrokernelTester()
3889 .mr(8)
3890 .nr(2)
3891 .m(16)
3892 .n(n)
3893 .k(k)
3894 .sparsity(0.0f)
3895 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07003896 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003897 }
3898 }
3899 }
3900
Marat Dukhan355ab432020-04-09 19:01:52 -07003901 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003902 TEST_REQUIRES_ARM_NEON_FMA;
3903 for (uint32_t n = 1; n < 10; n += 3) {
3904 for (size_t k = 1; k <= 5; k += 2) {
3905 SpMMMicrokernelTester()
3906 .mr(8)
3907 .nr(2)
3908 .m(16)
3909 .n(n)
3910 .k(k)
3911 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003912 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003913 }
3914 }
3915 }
3916
Marat Dukhan355ab432020-04-09 19:01:52 -07003917 TEST(F32_SPMM_MINMAX_8X2__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003918 TEST_REQUIRES_ARM_NEON_FMA;
3919 for (uint32_t n = 1; n < 10; n += 3) {
3920 for (size_t k = 1; k <= 5; k += 2) {
3921 SpMMMicrokernelTester()
3922 .mr(8)
3923 .nr(2)
3924 .m(16)
3925 .n(n)
3926 .k(k)
3927 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003928 .Test(xnn_f32_spmm_minmax_ukernel_8x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003929 }
3930 }
3931 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003932#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003933
3934
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003935#if XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07003936 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003937 TEST_REQUIRES_ARM_NEON_FMA;
3938 SpMMMicrokernelTester()
3939 .mr(8)
3940 .nr(4)
3941 .m(8)
3942 .n(4)
3943 .k(1)
3944 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003945 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003946 }
3947
Marat Dukhan355ab432020-04-09 19:01:52 -07003948 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, k_eq_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003949 TEST_REQUIRES_ARM_NEON_FMA;
3950 for (uint32_t n = 1; n <= 4; n++) {
3951 SpMMMicrokernelTester()
3952 .mr(8)
3953 .nr(4)
3954 .m(8)
3955 .n(n)
3956 .k(1)
3957 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003958 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003959 }
3960 }
3961
Marat Dukhan355ab432020-04-09 19:01:52 -07003962 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003963 TEST_REQUIRES_ARM_NEON_FMA;
3964 for (size_t k = 2; k < 10; k++) {
3965 SpMMMicrokernelTester()
3966 .mr(8)
3967 .nr(4)
3968 .m(8)
3969 .n(4)
3970 .k(k)
3971 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003972 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003973 }
3974 }
3975
Marat Dukhan355ab432020-04-09 19:01:52 -07003976 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, k_gt_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003977 TEST_REQUIRES_ARM_NEON_FMA;
3978 for (size_t k = 2; k < 10; k++) {
3979 for (uint32_t n = 1; n <= 4; n++) {
3980 SpMMMicrokernelTester()
3981 .mr(8)
3982 .nr(4)
3983 .m(8)
3984 .n(n)
3985 .k(k)
3986 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07003987 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003988 }
3989 }
3990 }
3991
Marat Dukhan355ab432020-04-09 19:01:52 -07003992 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, n_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003993 TEST_REQUIRES_ARM_NEON_FMA;
3994 for (uint32_t n = 5; n < 10; n++) {
3995 for (size_t k = 1; k <= 5; k += 2) {
3996 SpMMMicrokernelTester()
3997 .mr(8)
3998 .nr(4)
3999 .m(8)
4000 .n(n)
4001 .k(k)
4002 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004003 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004004 }
4005 }
4006 }
4007
Marat Dukhan355ab432020-04-09 19:01:52 -07004008 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, n_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004009 TEST_REQUIRES_ARM_NEON_FMA;
4010 for (uint32_t n = 8; n <= 12; n += 4) {
4011 for (size_t k = 1; k <= 5; k += 2) {
4012 SpMMMicrokernelTester()
4013 .mr(8)
4014 .nr(4)
4015 .m(8)
4016 .n(n)
4017 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -07004018 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004019 }
4020 }
4021 }
4022
Marat Dukhan355ab432020-04-09 19:01:52 -07004023 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, m_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004024 TEST_REQUIRES_ARM_NEON_FMA;
4025 for (uint32_t m = 1; m < 8; m++) {
4026 for (uint32_t n = 1; n < 20; n += 5) {
4027 for (size_t k = 1; k <= 5; k += 2) {
4028 SpMMMicrokernelTester()
4029 .mr(8)
4030 .nr(4)
4031 .m(m)
4032 .n(n)
4033 .k(k)
4034 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004035 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004036 }
4037 }
4038 }
4039 }
4040
Marat Dukhan355ab432020-04-09 19:01:52 -07004041 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, m_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004042 TEST_REQUIRES_ARM_NEON_FMA;
4043 for (uint32_t m = 16; m <= 24; m += 8) {
4044 for (uint32_t n = 1; n < 20; n += 5) {
4045 for (size_t k = 1; k <= 5; k += 2) {
4046 SpMMMicrokernelTester()
4047 .mr(8)
4048 .nr(4)
4049 .m(m)
4050 .n(n)
4051 .k(k)
4052 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004053 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004054 }
4055 }
4056 }
4057 }
4058
Marat Dukhan355ab432020-04-09 19:01:52 -07004059 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, m_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004060 TEST_REQUIRES_ARM_NEON_FMA;
4061 for (uint32_t m = 9; m < 16; m++) {
4062 for (uint32_t n = 1; n < 20; n += 5) {
4063 for (size_t k = 1; k <= 5; k += 2) {
4064 SpMMMicrokernelTester()
4065 .mr(8)
4066 .nr(4)
4067 .m(m)
4068 .n(n)
4069 .k(k)
4070 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004071 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004072 }
4073 }
4074 }
4075 }
4076
Marat Dukhane8bfcc82020-11-16 12:28:13 -08004077 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, output_stride) {
4078 TEST_REQUIRES_ARM_NEON_FMA;
4079 for (uint32_t n = 1; n < 20; n += 5) {
4080 for (size_t k = 1; k <= 5; k += 2) {
4081 SpMMMicrokernelTester()
4082 .mr(8)
4083 .nr(4)
4084 .m(16)
4085 .n(n)
4086 .k(k)
4087 .output_stride(19)
4088 .sparsity(0.0f)
4089 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
4090 }
4091 }
4092 }
4093
Marat Dukhan355ab432020-04-09 19:01:52 -07004094 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004095 TEST_REQUIRES_ARM_NEON_FMA;
4096 for (uint32_t n = 1; n < 20; n += 5) {
4097 for (size_t k = 1; k <= 5; k += 2) {
4098 SpMMMicrokernelTester()
4099 .mr(8)
4100 .nr(4)
4101 .m(16)
4102 .n(n)
4103 .k(k)
4104 .sparsity(0.0f)
4105 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07004106 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004107 }
4108 }
4109 }
4110
Marat Dukhan355ab432020-04-09 19:01:52 -07004111 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004112 TEST_REQUIRES_ARM_NEON_FMA;
4113 for (uint32_t n = 1; n < 20; n += 5) {
4114 for (size_t k = 1; k <= 5; k += 2) {
4115 SpMMMicrokernelTester()
4116 .mr(8)
4117 .nr(4)
4118 .m(16)
4119 .n(n)
4120 .k(k)
4121 .sparsity(0.0f)
4122 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07004123 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004124 }
4125 }
4126 }
4127
Marat Dukhan355ab432020-04-09 19:01:52 -07004128 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004129 TEST_REQUIRES_ARM_NEON_FMA;
4130 for (uint32_t n = 1; n < 20; n += 5) {
4131 for (size_t k = 1; k <= 5; k += 2) {
4132 SpMMMicrokernelTester()
4133 .mr(8)
4134 .nr(4)
4135 .m(16)
4136 .n(n)
4137 .k(k)
4138 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004139 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004140 }
4141 }
4142 }
4143
Marat Dukhan355ab432020-04-09 19:01:52 -07004144 TEST(F32_SPMM_MINMAX_8X4__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004145 TEST_REQUIRES_ARM_NEON_FMA;
4146 for (uint32_t n = 1; n < 20; n += 5) {
4147 for (size_t k = 1; k <= 5; k += 2) {
4148 SpMMMicrokernelTester()
4149 .mr(8)
4150 .nr(4)
4151 .m(16)
4152 .n(n)
4153 .k(k)
4154 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004155 .Test(xnn_f32_spmm_minmax_ukernel_8x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004156 }
4157 }
4158 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07004159#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07004160
4161
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08004162#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07004163 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004164 TEST_REQUIRES_ARM_NEON_FMA;
4165 SpMMMicrokernelTester()
4166 .mr(8)
4167 .nr(1)
4168 .m(8)
4169 .n(1)
4170 .k(1)
4171 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004172 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004173 }
4174
Marat Dukhan355ab432020-04-09 19:01:52 -07004175 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004176 TEST_REQUIRES_ARM_NEON_FMA;
4177 for (size_t k = 2; k < 10; k++) {
4178 SpMMMicrokernelTester()
4179 .mr(8)
4180 .nr(1)
4181 .m(8)
4182 .n(1)
4183 .k(k)
4184 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004185 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004186 }
4187 }
4188
Marat Dukhan355ab432020-04-09 19:01:52 -07004189 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004190 TEST_REQUIRES_ARM_NEON_FMA;
4191 for (uint32_t n = 2; n < 10; n++) {
4192 for (size_t k = 1; k <= 5; k += 2) {
4193 SpMMMicrokernelTester()
4194 .mr(8)
4195 .nr(1)
4196 .m(8)
4197 .n(n)
4198 .k(k)
4199 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004200 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004201 }
4202 }
4203 }
4204
Marat Dukhan355ab432020-04-09 19:01:52 -07004205 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, m_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004206 TEST_REQUIRES_ARM_NEON_FMA;
4207 for (uint32_t m = 1; m < 8; m++) {
4208 for (uint32_t n = 1; n < 10; n += 2) {
4209 for (size_t k = 1; k <= 5; k += 2) {
4210 SpMMMicrokernelTester()
4211 .mr(8)
4212 .nr(1)
4213 .m(m)
4214 .n(n)
4215 .k(k)
4216 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004217 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004218 }
4219 }
4220 }
4221 }
4222
Marat Dukhan355ab432020-04-09 19:01:52 -07004223 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, m_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004224 TEST_REQUIRES_ARM_NEON_FMA;
4225 for (uint32_t m = 16; m <= 24; m += 8) {
4226 for (uint32_t n = 1; n < 10; n += 2) {
4227 for (size_t k = 1; k <= 5; k += 2) {
4228 SpMMMicrokernelTester()
4229 .mr(8)
4230 .nr(1)
4231 .m(m)
4232 .n(n)
4233 .k(k)
4234 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004235 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004236 }
4237 }
4238 }
4239 }
4240
Marat Dukhan355ab432020-04-09 19:01:52 -07004241 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, m_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004242 TEST_REQUIRES_ARM_NEON_FMA;
4243 for (uint32_t m = 9; m < 16; m++) {
4244 for (uint32_t n = 1; n < 10; n += 2) {
4245 for (size_t k = 1; k <= 5; k += 2) {
4246 SpMMMicrokernelTester()
4247 .mr(8)
4248 .nr(1)
4249 .m(m)
4250 .n(n)
4251 .k(k)
4252 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004253 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004254 }
4255 }
4256 }
4257 }
4258
Marat Dukhane8bfcc82020-11-16 12:28:13 -08004259 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, output_stride) {
4260 TEST_REQUIRES_ARM_NEON_FMA;
4261 for (uint32_t n = 1; n < 10; n += 2) {
4262 for (size_t k = 1; k <= 5; k += 2) {
4263 SpMMMicrokernelTester()
4264 .mr(8)
4265 .nr(1)
4266 .m(16)
4267 .n(n)
4268 .k(k)
4269 .output_stride(19)
4270 .sparsity(0.0f)
4271 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
4272 }
4273 }
4274 }
4275
Marat Dukhan355ab432020-04-09 19:01:52 -07004276 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004277 TEST_REQUIRES_ARM_NEON_FMA;
4278 for (uint32_t n = 1; n < 10; n += 2) {
4279 for (size_t k = 1; k <= 5; k += 2) {
4280 SpMMMicrokernelTester()
4281 .mr(8)
4282 .nr(1)
4283 .m(16)
4284 .n(n)
4285 .k(k)
4286 .sparsity(0.0f)
4287 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07004288 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004289 }
4290 }
4291 }
4292
Marat Dukhan355ab432020-04-09 19:01:52 -07004293 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004294 TEST_REQUIRES_ARM_NEON_FMA;
4295 for (uint32_t n = 1; n < 10; n += 2) {
4296 for (size_t k = 1; k <= 5; k += 2) {
4297 SpMMMicrokernelTester()
4298 .mr(8)
4299 .nr(1)
4300 .m(16)
4301 .n(n)
4302 .k(k)
4303 .sparsity(0.0f)
4304 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07004305 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004306 }
4307 }
4308 }
4309
Marat Dukhan355ab432020-04-09 19:01:52 -07004310 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004311 TEST_REQUIRES_ARM_NEON_FMA;
4312 for (uint32_t n = 1; n < 10; n += 2) {
4313 for (size_t k = 1; k <= 5; k += 2) {
4314 SpMMMicrokernelTester()
4315 .mr(8)
4316 .nr(1)
4317 .m(16)
4318 .n(n)
4319 .k(k)
4320 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004321 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004322 }
4323 }
4324 }
4325
Marat Dukhan355ab432020-04-09 19:01:52 -07004326 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_PIPELINED, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004327 TEST_REQUIRES_ARM_NEON_FMA;
4328 for (uint32_t n = 1; n < 10; n += 2) {
4329 for (size_t k = 1; k <= 5; k += 2) {
4330 SpMMMicrokernelTester()
4331 .mr(8)
4332 .nr(1)
4333 .m(16)
4334 .n(n)
4335 .k(k)
4336 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004337 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004338 }
4339 }
4340 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08004341#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07004342
4343
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08004344#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchardbeca6522020-10-30 22:34:35 -07004345 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, k_eq_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004346 TEST_REQUIRES_ARM_NEON_FMA;
4347 SpMMMicrokernelTester()
4348 .mr(8)
4349 .nr(1)
4350 .m(8)
4351 .n(1)
4352 .k(2)
4353 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004354 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004355 }
4356
Frank Barchardbeca6522020-10-30 22:34:35 -07004357 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, k_lt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004358 TEST_REQUIRES_ARM_NEON_FMA;
4359 for (size_t k = 1; k < 2; k++) {
4360 SpMMMicrokernelTester()
4361 .mr(8)
4362 .nr(1)
4363 .m(8)
4364 .n(1)
4365 .k(k)
4366 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004367 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004368 }
4369 }
4370
Frank Barchardbeca6522020-10-30 22:34:35 -07004371 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, k_gt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004372 TEST_REQUIRES_ARM_NEON_FMA;
4373 for (size_t k = 3; k < 4; k++) {
4374 SpMMMicrokernelTester()
4375 .mr(8)
4376 .nr(1)
4377 .m(8)
4378 .n(1)
4379 .k(k)
4380 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004381 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004382 }
4383 }
4384
Frank Barchardbeca6522020-10-30 22:34:35 -07004385 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, k_div_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004386 TEST_REQUIRES_ARM_NEON_FMA;
4387 for (size_t k = 4; k <= 20; k += 2) {
4388 SpMMMicrokernelTester()
4389 .mr(8)
4390 .nr(1)
4391 .m(8)
4392 .n(1)
4393 .k(k)
4394 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004395 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004396 }
4397 }
4398
Frank Barchardbeca6522020-10-30 22:34:35 -07004399 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004400 TEST_REQUIRES_ARM_NEON_FMA;
4401 for (uint32_t n = 2; n < 10; n++) {
4402 for (size_t k = 1; k <= 10; k += 3) {
4403 SpMMMicrokernelTester()
4404 .mr(8)
4405 .nr(1)
4406 .m(8)
4407 .n(n)
4408 .k(k)
4409 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004410 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004411 }
4412 }
4413 }
4414
Frank Barchardbeca6522020-10-30 22:34:35 -07004415 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, m_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004416 TEST_REQUIRES_ARM_NEON_FMA;
4417 for (uint32_t m = 1; m < 8; m++) {
4418 for (uint32_t n = 1; n < 10; n += 2) {
4419 for (size_t k = 1; k <= 10; k += 3) {
4420 SpMMMicrokernelTester()
4421 .mr(8)
4422 .nr(1)
4423 .m(m)
4424 .n(n)
4425 .k(k)
4426 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004427 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004428 }
4429 }
4430 }
4431 }
4432
Frank Barchardbeca6522020-10-30 22:34:35 -07004433 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, m_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004434 TEST_REQUIRES_ARM_NEON_FMA;
4435 for (uint32_t m = 16; m <= 24; m += 8) {
4436 for (uint32_t n = 1; n < 10; n += 2) {
4437 for (size_t k = 1; k <= 10; k += 3) {
4438 SpMMMicrokernelTester()
4439 .mr(8)
4440 .nr(1)
4441 .m(m)
4442 .n(n)
4443 .k(k)
4444 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004445 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004446 }
4447 }
4448 }
4449 }
4450
Frank Barchardbeca6522020-10-30 22:34:35 -07004451 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, m_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004452 TEST_REQUIRES_ARM_NEON_FMA;
4453 for (uint32_t m = 9; m < 16; m++) {
4454 for (uint32_t n = 1; n < 10; n += 2) {
4455 for (size_t k = 1; k <= 10; k += 3) {
4456 SpMMMicrokernelTester()
4457 .mr(8)
4458 .nr(1)
4459 .m(m)
4460 .n(n)
4461 .k(k)
4462 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004463 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004464 }
4465 }
4466 }
4467 }
4468
Marat Dukhane8bfcc82020-11-16 12:28:13 -08004469 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, output_stride) {
4470 TEST_REQUIRES_ARM_NEON_FMA;
4471 for (uint32_t n = 1; n < 10; n += 2) {
4472 for (size_t k = 1; k <= 10; k += 3) {
4473 SpMMMicrokernelTester()
4474 .mr(8)
4475 .nr(1)
4476 .m(16)
4477 .n(n)
4478 .k(k)
4479 .output_stride(19)
4480 .sparsity(0.0f)
4481 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
4482 }
4483 }
4484 }
4485
Frank Barchardbeca6522020-10-30 22:34:35 -07004486 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004487 TEST_REQUIRES_ARM_NEON_FMA;
4488 for (uint32_t n = 1; n < 10; n += 2) {
4489 for (size_t k = 1; k <= 10; k += 3) {
4490 SpMMMicrokernelTester()
4491 .mr(8)
4492 .nr(1)
4493 .m(16)
4494 .n(n)
4495 .k(k)
4496 .sparsity(0.0f)
4497 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07004498 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004499 }
4500 }
4501 }
4502
Frank Barchardbeca6522020-10-30 22:34:35 -07004503 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004504 TEST_REQUIRES_ARM_NEON_FMA;
4505 for (uint32_t n = 1; n < 10; n += 2) {
4506 for (size_t k = 1; k <= 10; k += 3) {
4507 SpMMMicrokernelTester()
4508 .mr(8)
4509 .nr(1)
4510 .m(16)
4511 .n(n)
4512 .k(k)
4513 .sparsity(0.0f)
4514 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07004515 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004516 }
4517 }
4518 }
4519
Frank Barchardbeca6522020-10-30 22:34:35 -07004520 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004521 TEST_REQUIRES_ARM_NEON_FMA;
4522 for (uint32_t n = 1; n < 10; n += 2) {
4523 for (size_t k = 1; k <= 10; k += 3) {
4524 SpMMMicrokernelTester()
4525 .mr(8)
4526 .nr(1)
4527 .m(16)
4528 .n(n)
4529 .k(k)
4530 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004531 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004532 }
4533 }
4534 }
4535
Frank Barchardbeca6522020-10-30 22:34:35 -07004536 TEST(F32_SPMM_MINMAX_8X1__NEONFMA_X2, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004537 TEST_REQUIRES_ARM_NEON_FMA;
4538 for (uint32_t n = 1; n < 10; n += 2) {
4539 for (size_t k = 1; k <= 10; k += 3) {
4540 SpMMMicrokernelTester()
4541 .mr(8)
4542 .nr(1)
4543 .m(16)
4544 .n(n)
4545 .k(k)
4546 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07004547 .Test(xnn_f32_spmm_minmax_ukernel_8x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004548 }
4549 }
4550 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08004551#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07004552
4553
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08004554#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07004555 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004556 TEST_REQUIRES_ARM_NEON_FMA;
4557 SpMMMicrokernelTester()
4558 .mr(12)
4559 .nr(1)
4560 .m(12)
4561 .n(1)
4562 .k(1)
4563 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004564 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004565 }
4566
Marat Dukhan355ab432020-04-09 19:01:52 -07004567 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004568 TEST_REQUIRES_ARM_NEON_FMA;
4569 for (size_t k = 2; k < 10; k++) {
4570 SpMMMicrokernelTester()
4571 .mr(12)
4572 .nr(1)
4573 .m(12)
4574 .n(1)
4575 .k(k)
4576 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004577 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004578 }
4579 }
4580
Marat Dukhan355ab432020-04-09 19:01:52 -07004581 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004582 TEST_REQUIRES_ARM_NEON_FMA;
4583 for (uint32_t n = 2; n < 10; n++) {
4584 for (size_t k = 1; k <= 5; k += 2) {
4585 SpMMMicrokernelTester()
4586 .mr(12)
4587 .nr(1)
4588 .m(12)
4589 .n(n)
4590 .k(k)
4591 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004592 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004593 }
4594 }
4595 }
4596
Marat Dukhan355ab432020-04-09 19:01:52 -07004597 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, m_lt_12) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004598 TEST_REQUIRES_ARM_NEON_FMA;
4599 for (uint32_t m = 1; m < 12; m++) {
4600 for (uint32_t n = 1; n < 10; n += 2) {
4601 for (size_t k = 1; k <= 5; k += 2) {
4602 SpMMMicrokernelTester()
4603 .mr(12)
4604 .nr(1)
4605 .m(m)
4606 .n(n)
4607 .k(k)
4608 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004609 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004610 }
4611 }
4612 }
4613 }
4614
Marat Dukhan355ab432020-04-09 19:01:52 -07004615 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, m_div_12) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004616 TEST_REQUIRES_ARM_NEON_FMA;
4617 for (uint32_t m = 24; m <= 36; m += 12) {
4618 for (uint32_t n = 1; n < 10; n += 2) {
4619 for (size_t k = 1; k <= 5; k += 2) {
4620 SpMMMicrokernelTester()
4621 .mr(12)
4622 .nr(1)
4623 .m(m)
4624 .n(n)
4625 .k(k)
4626 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004627 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004628 }
4629 }
4630 }
4631 }
4632
Marat Dukhan355ab432020-04-09 19:01:52 -07004633 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, m_gt_12) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004634 TEST_REQUIRES_ARM_NEON_FMA;
4635 for (uint32_t m = 13; m < 24; m++) {
4636 for (uint32_t n = 1; n < 10; n += 2) {
4637 for (size_t k = 1; k <= 5; k += 2) {
4638 SpMMMicrokernelTester()
4639 .mr(12)
4640 .nr(1)
4641 .m(m)
4642 .n(n)
4643 .k(k)
4644 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004645 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004646 }
4647 }
4648 }
4649 }
4650
Marat Dukhane8bfcc82020-11-16 12:28:13 -08004651 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, output_stride) {
4652 TEST_REQUIRES_ARM_NEON_FMA;
4653 for (uint32_t n = 1; n < 10; n += 2) {
4654 for (size_t k = 1; k <= 5; k += 2) {
4655 SpMMMicrokernelTester()
4656 .mr(12)
4657 .nr(1)
4658 .m(24)
4659 .n(n)
4660 .k(k)
4661 .output_stride(29)
4662 .sparsity(0.0f)
4663 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
4664 }
4665 }
4666 }
4667
Marat Dukhan355ab432020-04-09 19:01:52 -07004668 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004669 TEST_REQUIRES_ARM_NEON_FMA;
4670 for (uint32_t n = 1; n < 10; n += 2) {
4671 for (size_t k = 1; k <= 5; k += 2) {
4672 SpMMMicrokernelTester()
4673 .mr(12)
4674 .nr(1)
4675 .m(24)
4676 .n(n)
4677 .k(k)
4678 .sparsity(0.0f)
4679 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07004680 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004681 }
4682 }
4683 }
4684
Marat Dukhan355ab432020-04-09 19:01:52 -07004685 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004686 TEST_REQUIRES_ARM_NEON_FMA;
4687 for (uint32_t n = 1; n < 10; n += 2) {
4688 for (size_t k = 1; k <= 5; k += 2) {
4689 SpMMMicrokernelTester()
4690 .mr(12)
4691 .nr(1)
4692 .m(24)
4693 .n(n)
4694 .k(k)
4695 .sparsity(0.0f)
4696 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07004697 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004698 }
4699 }
4700 }
4701
Marat Dukhan355ab432020-04-09 19:01:52 -07004702 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004703 TEST_REQUIRES_ARM_NEON_FMA;
4704 for (uint32_t n = 1; n < 10; n += 2) {
4705 for (size_t k = 1; k <= 5; k += 2) {
4706 SpMMMicrokernelTester()
4707 .mr(12)
4708 .nr(1)
4709 .m(24)
4710 .n(n)
4711 .k(k)
4712 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004713 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004714 }
4715 }
4716 }
4717
Marat Dukhan355ab432020-04-09 19:01:52 -07004718 TEST(F32_SPMM_MINMAX_12X1__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004719 TEST_REQUIRES_ARM_NEON_FMA;
4720 for (uint32_t n = 1; n < 10; n += 2) {
4721 for (size_t k = 1; k <= 5; k += 2) {
4722 SpMMMicrokernelTester()
4723 .mr(12)
4724 .nr(1)
4725 .m(24)
4726 .n(n)
4727 .k(k)
4728 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004729 .Test(xnn_f32_spmm_minmax_ukernel_12x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004730 }
4731 }
4732 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08004733#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07004734
4735
Marat Dukhan1dadbf72019-10-01 10:46:20 -07004736#if XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07004737 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004738 TEST_REQUIRES_ARM_NEON_FMA;
4739 SpMMMicrokernelTester()
4740 .mr(12)
4741 .nr(2)
4742 .m(12)
4743 .n(2)
4744 .k(1)
4745 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004746 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004747 }
4748
Marat Dukhan355ab432020-04-09 19:01:52 -07004749 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, k_eq_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004750 TEST_REQUIRES_ARM_NEON_FMA;
4751 for (uint32_t n = 1; n <= 2; n++) {
4752 SpMMMicrokernelTester()
4753 .mr(12)
4754 .nr(2)
4755 .m(12)
4756 .n(n)
4757 .k(1)
4758 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004759 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004760 }
4761 }
4762
Marat Dukhan355ab432020-04-09 19:01:52 -07004763 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004764 TEST_REQUIRES_ARM_NEON_FMA;
4765 for (size_t k = 2; k < 10; k++) {
4766 SpMMMicrokernelTester()
4767 .mr(12)
4768 .nr(2)
4769 .m(12)
4770 .n(2)
4771 .k(k)
4772 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004773 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004774 }
4775 }
4776
Marat Dukhan355ab432020-04-09 19:01:52 -07004777 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, k_gt_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004778 TEST_REQUIRES_ARM_NEON_FMA;
4779 for (size_t k = 2; k < 10; k++) {
4780 for (uint32_t n = 1; n <= 2; n++) {
4781 SpMMMicrokernelTester()
4782 .mr(12)
4783 .nr(2)
4784 .m(12)
4785 .n(n)
4786 .k(k)
4787 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004788 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004789 }
4790 }
4791 }
4792
Marat Dukhan355ab432020-04-09 19:01:52 -07004793 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, n_gt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004794 TEST_REQUIRES_ARM_NEON_FMA;
4795 for (uint32_t n = 3; n < 10; n++) {
4796 for (size_t k = 1; k <= 5; k += 2) {
4797 SpMMMicrokernelTester()
4798 .mr(12)
4799 .nr(2)
4800 .m(12)
4801 .n(n)
4802 .k(k)
4803 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004804 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004805 }
4806 }
4807 }
4808
Marat Dukhan355ab432020-04-09 19:01:52 -07004809 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, n_div_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004810 TEST_REQUIRES_ARM_NEON_FMA;
4811 for (uint32_t n = 4; n <= 6; n += 2) {
4812 for (size_t k = 1; k <= 5; k += 2) {
4813 SpMMMicrokernelTester()
4814 .mr(12)
4815 .nr(2)
4816 .m(12)
4817 .n(n)
4818 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -07004819 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004820 }
4821 }
4822 }
4823
Marat Dukhan355ab432020-04-09 19:01:52 -07004824 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, m_lt_12) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004825 TEST_REQUIRES_ARM_NEON_FMA;
4826 for (uint32_t m = 1; m < 12; m++) {
4827 for (uint32_t n = 1; n < 10; n += 3) {
4828 for (size_t k = 1; k <= 5; k += 2) {
4829 SpMMMicrokernelTester()
4830 .mr(12)
4831 .nr(2)
4832 .m(m)
4833 .n(n)
4834 .k(k)
4835 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004836 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004837 }
4838 }
4839 }
4840 }
4841
Marat Dukhan355ab432020-04-09 19:01:52 -07004842 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, m_div_12) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004843 TEST_REQUIRES_ARM_NEON_FMA;
4844 for (uint32_t m = 24; m <= 36; m += 12) {
4845 for (uint32_t n = 1; n < 10; n += 3) {
4846 for (size_t k = 1; k <= 5; k += 2) {
4847 SpMMMicrokernelTester()
4848 .mr(12)
4849 .nr(2)
4850 .m(m)
4851 .n(n)
4852 .k(k)
4853 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004854 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004855 }
4856 }
4857 }
4858 }
4859
Marat Dukhan355ab432020-04-09 19:01:52 -07004860 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, m_gt_12) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004861 TEST_REQUIRES_ARM_NEON_FMA;
4862 for (uint32_t m = 13; m < 24; m++) {
4863 for (uint32_t n = 1; n < 10; n += 3) {
4864 for (size_t k = 1; k <= 5; k += 2) {
4865 SpMMMicrokernelTester()
4866 .mr(12)
4867 .nr(2)
4868 .m(m)
4869 .n(n)
4870 .k(k)
4871 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004872 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004873 }
4874 }
4875 }
4876 }
4877
Marat Dukhane8bfcc82020-11-16 12:28:13 -08004878 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, output_stride) {
4879 TEST_REQUIRES_ARM_NEON_FMA;
4880 for (uint32_t n = 1; n < 10; n += 3) {
4881 for (size_t k = 1; k <= 5; k += 2) {
4882 SpMMMicrokernelTester()
4883 .mr(12)
4884 .nr(2)
4885 .m(24)
4886 .n(n)
4887 .k(k)
4888 .output_stride(29)
4889 .sparsity(0.0f)
4890 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
4891 }
4892 }
4893 }
4894
Marat Dukhan355ab432020-04-09 19:01:52 -07004895 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004896 TEST_REQUIRES_ARM_NEON_FMA;
4897 for (uint32_t n = 1; n < 10; n += 3) {
4898 for (size_t k = 1; k <= 5; k += 2) {
4899 SpMMMicrokernelTester()
4900 .mr(12)
4901 .nr(2)
4902 .m(24)
4903 .n(n)
4904 .k(k)
4905 .sparsity(0.0f)
4906 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07004907 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004908 }
4909 }
4910 }
4911
Marat Dukhan355ab432020-04-09 19:01:52 -07004912 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004913 TEST_REQUIRES_ARM_NEON_FMA;
4914 for (uint32_t n = 1; n < 10; n += 3) {
4915 for (size_t k = 1; k <= 5; k += 2) {
4916 SpMMMicrokernelTester()
4917 .mr(12)
4918 .nr(2)
4919 .m(24)
4920 .n(n)
4921 .k(k)
4922 .sparsity(0.0f)
4923 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07004924 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004925 }
4926 }
4927 }
4928
Marat Dukhan355ab432020-04-09 19:01:52 -07004929 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004930 TEST_REQUIRES_ARM_NEON_FMA;
4931 for (uint32_t n = 1; n < 10; n += 3) {
4932 for (size_t k = 1; k <= 5; k += 2) {
4933 SpMMMicrokernelTester()
4934 .mr(12)
4935 .nr(2)
4936 .m(24)
4937 .n(n)
4938 .k(k)
4939 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004940 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004941 }
4942 }
4943 }
4944
Marat Dukhan355ab432020-04-09 19:01:52 -07004945 TEST(F32_SPMM_MINMAX_12X2__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004946 TEST_REQUIRES_ARM_NEON_FMA;
4947 for (uint32_t n = 1; n < 10; n += 3) {
4948 for (size_t k = 1; k <= 5; k += 2) {
4949 SpMMMicrokernelTester()
4950 .mr(12)
4951 .nr(2)
4952 .m(24)
4953 .n(n)
4954 .k(k)
4955 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004956 .Test(xnn_f32_spmm_minmax_ukernel_12x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004957 }
4958 }
4959 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07004960#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07004961
4962
Marat Dukhan1dadbf72019-10-01 10:46:20 -07004963#if XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07004964 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004965 TEST_REQUIRES_ARM_NEON_FMA;
4966 SpMMMicrokernelTester()
4967 .mr(12)
4968 .nr(4)
4969 .m(12)
4970 .n(4)
4971 .k(1)
4972 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004973 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004974 }
4975
Marat Dukhan355ab432020-04-09 19:01:52 -07004976 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, k_eq_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004977 TEST_REQUIRES_ARM_NEON_FMA;
4978 for (uint32_t n = 1; n <= 4; n++) {
4979 SpMMMicrokernelTester()
4980 .mr(12)
4981 .nr(4)
4982 .m(12)
4983 .n(n)
4984 .k(1)
4985 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07004986 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004987 }
4988 }
4989
Marat Dukhan355ab432020-04-09 19:01:52 -07004990 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004991 TEST_REQUIRES_ARM_NEON_FMA;
4992 for (size_t k = 2; k < 10; k++) {
4993 SpMMMicrokernelTester()
4994 .mr(12)
4995 .nr(4)
4996 .m(12)
4997 .n(4)
4998 .k(k)
4999 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005000 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005001 }
5002 }
5003
Marat Dukhan355ab432020-04-09 19:01:52 -07005004 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, k_gt_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005005 TEST_REQUIRES_ARM_NEON_FMA;
5006 for (size_t k = 2; k < 10; k++) {
5007 for (uint32_t n = 1; n <= 4; n++) {
5008 SpMMMicrokernelTester()
5009 .mr(12)
5010 .nr(4)
5011 .m(12)
5012 .n(n)
5013 .k(k)
5014 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005015 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005016 }
5017 }
5018 }
5019
Marat Dukhan355ab432020-04-09 19:01:52 -07005020 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, n_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005021 TEST_REQUIRES_ARM_NEON_FMA;
5022 for (uint32_t n = 5; n < 10; n++) {
5023 for (size_t k = 1; k <= 5; k += 2) {
5024 SpMMMicrokernelTester()
5025 .mr(12)
5026 .nr(4)
5027 .m(12)
5028 .n(n)
5029 .k(k)
5030 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005031 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005032 }
5033 }
5034 }
5035
Marat Dukhan355ab432020-04-09 19:01:52 -07005036 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, n_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005037 TEST_REQUIRES_ARM_NEON_FMA;
5038 for (uint32_t n = 8; n <= 12; n += 4) {
5039 for (size_t k = 1; k <= 5; k += 2) {
5040 SpMMMicrokernelTester()
5041 .mr(12)
5042 .nr(4)
5043 .m(12)
5044 .n(n)
5045 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -07005046 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005047 }
5048 }
5049 }
5050
Marat Dukhan355ab432020-04-09 19:01:52 -07005051 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, m_lt_12) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005052 TEST_REQUIRES_ARM_NEON_FMA;
5053 for (uint32_t m = 1; m < 12; m++) {
5054 for (uint32_t n = 1; n < 20; n += 5) {
5055 for (size_t k = 1; k <= 5; k += 2) {
5056 SpMMMicrokernelTester()
5057 .mr(12)
5058 .nr(4)
5059 .m(m)
5060 .n(n)
5061 .k(k)
5062 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005063 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005064 }
5065 }
5066 }
5067 }
5068
Marat Dukhan355ab432020-04-09 19:01:52 -07005069 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, m_div_12) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005070 TEST_REQUIRES_ARM_NEON_FMA;
5071 for (uint32_t m = 24; m <= 36; m += 12) {
5072 for (uint32_t n = 1; n < 20; n += 5) {
5073 for (size_t k = 1; k <= 5; k += 2) {
5074 SpMMMicrokernelTester()
5075 .mr(12)
5076 .nr(4)
5077 .m(m)
5078 .n(n)
5079 .k(k)
5080 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005081 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005082 }
5083 }
5084 }
5085 }
5086
Marat Dukhan355ab432020-04-09 19:01:52 -07005087 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, m_gt_12) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005088 TEST_REQUIRES_ARM_NEON_FMA;
5089 for (uint32_t m = 13; m < 24; m++) {
5090 for (uint32_t n = 1; n < 20; n += 5) {
5091 for (size_t k = 1; k <= 5; k += 2) {
5092 SpMMMicrokernelTester()
5093 .mr(12)
5094 .nr(4)
5095 .m(m)
5096 .n(n)
5097 .k(k)
5098 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005099 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005100 }
5101 }
5102 }
5103 }
5104
Marat Dukhane8bfcc82020-11-16 12:28:13 -08005105 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, output_stride) {
5106 TEST_REQUIRES_ARM_NEON_FMA;
5107 for (uint32_t n = 1; n < 20; n += 5) {
5108 for (size_t k = 1; k <= 5; k += 2) {
5109 SpMMMicrokernelTester()
5110 .mr(12)
5111 .nr(4)
5112 .m(24)
5113 .n(n)
5114 .k(k)
5115 .output_stride(29)
5116 .sparsity(0.0f)
5117 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
5118 }
5119 }
5120 }
5121
Marat Dukhan355ab432020-04-09 19:01:52 -07005122 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005123 TEST_REQUIRES_ARM_NEON_FMA;
5124 for (uint32_t n = 1; n < 20; n += 5) {
5125 for (size_t k = 1; k <= 5; k += 2) {
5126 SpMMMicrokernelTester()
5127 .mr(12)
5128 .nr(4)
5129 .m(24)
5130 .n(n)
5131 .k(k)
5132 .sparsity(0.0f)
5133 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005134 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005135 }
5136 }
5137 }
5138
Marat Dukhan355ab432020-04-09 19:01:52 -07005139 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005140 TEST_REQUIRES_ARM_NEON_FMA;
5141 for (uint32_t n = 1; n < 20; n += 5) {
5142 for (size_t k = 1; k <= 5; k += 2) {
5143 SpMMMicrokernelTester()
5144 .mr(12)
5145 .nr(4)
5146 .m(24)
5147 .n(n)
5148 .k(k)
5149 .sparsity(0.0f)
5150 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005151 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005152 }
5153 }
5154 }
5155
Marat Dukhan355ab432020-04-09 19:01:52 -07005156 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005157 TEST_REQUIRES_ARM_NEON_FMA;
5158 for (uint32_t n = 1; n < 20; n += 5) {
5159 for (size_t k = 1; k <= 5; k += 2) {
5160 SpMMMicrokernelTester()
5161 .mr(12)
5162 .nr(4)
5163 .m(24)
5164 .n(n)
5165 .k(k)
5166 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005167 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005168 }
5169 }
5170 }
5171
Marat Dukhan355ab432020-04-09 19:01:52 -07005172 TEST(F32_SPMM_MINMAX_12X4__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005173 TEST_REQUIRES_ARM_NEON_FMA;
5174 for (uint32_t n = 1; n < 20; n += 5) {
5175 for (size_t k = 1; k <= 5; k += 2) {
5176 SpMMMicrokernelTester()
5177 .mr(12)
5178 .nr(4)
5179 .m(24)
5180 .n(n)
5181 .k(k)
5182 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005183 .Test(xnn_f32_spmm_minmax_ukernel_12x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005184 }
5185 }
5186 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07005187#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07005188
5189
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08005190#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07005191 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005192 TEST_REQUIRES_ARM_NEON_FMA;
5193 SpMMMicrokernelTester()
5194 .mr(16)
5195 .nr(1)
5196 .m(16)
5197 .n(1)
5198 .k(1)
5199 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005200 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005201 }
5202
Marat Dukhan355ab432020-04-09 19:01:52 -07005203 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005204 TEST_REQUIRES_ARM_NEON_FMA;
5205 for (size_t k = 2; k < 10; k++) {
5206 SpMMMicrokernelTester()
5207 .mr(16)
5208 .nr(1)
5209 .m(16)
5210 .n(1)
5211 .k(k)
5212 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005213 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005214 }
5215 }
5216
Marat Dukhan355ab432020-04-09 19:01:52 -07005217 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005218 TEST_REQUIRES_ARM_NEON_FMA;
5219 for (uint32_t n = 2; n < 10; n++) {
5220 for (size_t k = 1; k <= 5; k += 2) {
5221 SpMMMicrokernelTester()
5222 .mr(16)
5223 .nr(1)
5224 .m(16)
5225 .n(n)
5226 .k(k)
5227 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005228 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005229 }
5230 }
5231 }
5232
Marat Dukhan355ab432020-04-09 19:01:52 -07005233 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, m_lt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005234 TEST_REQUIRES_ARM_NEON_FMA;
5235 for (uint32_t m = 1; m < 16; m++) {
5236 for (uint32_t n = 1; n < 10; n += 2) {
5237 for (size_t k = 1; k <= 5; k += 2) {
5238 SpMMMicrokernelTester()
5239 .mr(16)
5240 .nr(1)
5241 .m(m)
5242 .n(n)
5243 .k(k)
5244 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005245 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005246 }
5247 }
5248 }
5249 }
5250
Marat Dukhan355ab432020-04-09 19:01:52 -07005251 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, m_div_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005252 TEST_REQUIRES_ARM_NEON_FMA;
5253 for (uint32_t m = 32; m <= 48; m += 16) {
5254 for (uint32_t n = 1; n < 10; n += 2) {
5255 for (size_t k = 1; k <= 5; k += 2) {
5256 SpMMMicrokernelTester()
5257 .mr(16)
5258 .nr(1)
5259 .m(m)
5260 .n(n)
5261 .k(k)
5262 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005263 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005264 }
5265 }
5266 }
5267 }
5268
Marat Dukhan355ab432020-04-09 19:01:52 -07005269 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, m_gt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005270 TEST_REQUIRES_ARM_NEON_FMA;
5271 for (uint32_t m = 17; m < 32; m++) {
5272 for (uint32_t n = 1; n < 10; n += 2) {
5273 for (size_t k = 1; k <= 5; k += 2) {
5274 SpMMMicrokernelTester()
5275 .mr(16)
5276 .nr(1)
5277 .m(m)
5278 .n(n)
5279 .k(k)
5280 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005281 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005282 }
5283 }
5284 }
5285 }
5286
Marat Dukhane8bfcc82020-11-16 12:28:13 -08005287 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, output_stride) {
5288 TEST_REQUIRES_ARM_NEON_FMA;
5289 for (uint32_t n = 1; n < 10; n += 2) {
5290 for (size_t k = 1; k <= 5; k += 2) {
5291 SpMMMicrokernelTester()
5292 .mr(16)
5293 .nr(1)
5294 .m(32)
5295 .n(n)
5296 .k(k)
5297 .output_stride(37)
5298 .sparsity(0.0f)
5299 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
5300 }
5301 }
5302 }
5303
Marat Dukhan355ab432020-04-09 19:01:52 -07005304 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005305 TEST_REQUIRES_ARM_NEON_FMA;
5306 for (uint32_t n = 1; n < 10; n += 2) {
5307 for (size_t k = 1; k <= 5; k += 2) {
5308 SpMMMicrokernelTester()
5309 .mr(16)
5310 .nr(1)
5311 .m(32)
5312 .n(n)
5313 .k(k)
5314 .sparsity(0.0f)
5315 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005316 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005317 }
5318 }
5319 }
5320
Marat Dukhan355ab432020-04-09 19:01:52 -07005321 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005322 TEST_REQUIRES_ARM_NEON_FMA;
5323 for (uint32_t n = 1; n < 10; n += 2) {
5324 for (size_t k = 1; k <= 5; k += 2) {
5325 SpMMMicrokernelTester()
5326 .mr(16)
5327 .nr(1)
5328 .m(32)
5329 .n(n)
5330 .k(k)
5331 .sparsity(0.0f)
5332 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005333 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005334 }
5335 }
5336 }
5337
Marat Dukhan355ab432020-04-09 19:01:52 -07005338 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005339 TEST_REQUIRES_ARM_NEON_FMA;
5340 for (uint32_t n = 1; n < 10; n += 2) {
5341 for (size_t k = 1; k <= 5; k += 2) {
5342 SpMMMicrokernelTester()
5343 .mr(16)
5344 .nr(1)
5345 .m(32)
5346 .n(n)
5347 .k(k)
5348 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005349 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005350 }
5351 }
5352 }
5353
Marat Dukhan355ab432020-04-09 19:01:52 -07005354 TEST(F32_SPMM_MINMAX_16X1__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005355 TEST_REQUIRES_ARM_NEON_FMA;
5356 for (uint32_t n = 1; n < 10; n += 2) {
5357 for (size_t k = 1; k <= 5; k += 2) {
5358 SpMMMicrokernelTester()
5359 .mr(16)
5360 .nr(1)
5361 .m(32)
5362 .n(n)
5363 .k(k)
5364 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005365 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005366 }
5367 }
5368 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08005369#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07005370
5371
Marat Dukhan1dadbf72019-10-01 10:46:20 -07005372#if XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07005373 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005374 TEST_REQUIRES_ARM_NEON_FMA;
5375 SpMMMicrokernelTester()
5376 .mr(16)
5377 .nr(2)
5378 .m(16)
5379 .n(2)
5380 .k(1)
5381 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005382 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005383 }
5384
Marat Dukhan355ab432020-04-09 19:01:52 -07005385 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, k_eq_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005386 TEST_REQUIRES_ARM_NEON_FMA;
5387 for (uint32_t n = 1; n <= 2; n++) {
5388 SpMMMicrokernelTester()
5389 .mr(16)
5390 .nr(2)
5391 .m(16)
5392 .n(n)
5393 .k(1)
5394 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005395 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005396 }
5397 }
5398
Marat Dukhan355ab432020-04-09 19:01:52 -07005399 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005400 TEST_REQUIRES_ARM_NEON_FMA;
5401 for (size_t k = 2; k < 10; k++) {
5402 SpMMMicrokernelTester()
5403 .mr(16)
5404 .nr(2)
5405 .m(16)
5406 .n(2)
5407 .k(k)
5408 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005409 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005410 }
5411 }
5412
Marat Dukhan355ab432020-04-09 19:01:52 -07005413 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, k_gt_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005414 TEST_REQUIRES_ARM_NEON_FMA;
5415 for (size_t k = 2; k < 10; k++) {
5416 for (uint32_t n = 1; n <= 2; n++) {
5417 SpMMMicrokernelTester()
5418 .mr(16)
5419 .nr(2)
5420 .m(16)
5421 .n(n)
5422 .k(k)
5423 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005424 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005425 }
5426 }
5427 }
5428
Marat Dukhan355ab432020-04-09 19:01:52 -07005429 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, n_gt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005430 TEST_REQUIRES_ARM_NEON_FMA;
5431 for (uint32_t n = 3; n < 10; n++) {
5432 for (size_t k = 1; k <= 5; k += 2) {
5433 SpMMMicrokernelTester()
5434 .mr(16)
5435 .nr(2)
5436 .m(16)
5437 .n(n)
5438 .k(k)
5439 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005440 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005441 }
5442 }
5443 }
5444
Marat Dukhan355ab432020-04-09 19:01:52 -07005445 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, n_div_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005446 TEST_REQUIRES_ARM_NEON_FMA;
5447 for (uint32_t n = 4; n <= 6; n += 2) {
5448 for (size_t k = 1; k <= 5; k += 2) {
5449 SpMMMicrokernelTester()
5450 .mr(16)
5451 .nr(2)
5452 .m(16)
5453 .n(n)
5454 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -07005455 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005456 }
5457 }
5458 }
5459
Marat Dukhan355ab432020-04-09 19:01:52 -07005460 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, m_lt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005461 TEST_REQUIRES_ARM_NEON_FMA;
5462 for (uint32_t m = 1; m < 16; m++) {
5463 for (uint32_t n = 1; n < 10; n += 3) {
5464 for (size_t k = 1; k <= 5; k += 2) {
5465 SpMMMicrokernelTester()
5466 .mr(16)
5467 .nr(2)
5468 .m(m)
5469 .n(n)
5470 .k(k)
5471 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005472 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005473 }
5474 }
5475 }
5476 }
5477
Marat Dukhan355ab432020-04-09 19:01:52 -07005478 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, m_div_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005479 TEST_REQUIRES_ARM_NEON_FMA;
5480 for (uint32_t m = 32; m <= 48; m += 16) {
5481 for (uint32_t n = 1; n < 10; n += 3) {
5482 for (size_t k = 1; k <= 5; k += 2) {
5483 SpMMMicrokernelTester()
5484 .mr(16)
5485 .nr(2)
5486 .m(m)
5487 .n(n)
5488 .k(k)
5489 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005490 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005491 }
5492 }
5493 }
5494 }
5495
Marat Dukhan355ab432020-04-09 19:01:52 -07005496 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, m_gt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005497 TEST_REQUIRES_ARM_NEON_FMA;
5498 for (uint32_t m = 17; m < 32; m++) {
5499 for (uint32_t n = 1; n < 10; n += 3) {
5500 for (size_t k = 1; k <= 5; k += 2) {
5501 SpMMMicrokernelTester()
5502 .mr(16)
5503 .nr(2)
5504 .m(m)
5505 .n(n)
5506 .k(k)
5507 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005508 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005509 }
5510 }
5511 }
5512 }
5513
Marat Dukhane8bfcc82020-11-16 12:28:13 -08005514 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, output_stride) {
5515 TEST_REQUIRES_ARM_NEON_FMA;
5516 for (uint32_t n = 1; n < 10; n += 3) {
5517 for (size_t k = 1; k <= 5; k += 2) {
5518 SpMMMicrokernelTester()
5519 .mr(16)
5520 .nr(2)
5521 .m(32)
5522 .n(n)
5523 .k(k)
5524 .output_stride(37)
5525 .sparsity(0.0f)
5526 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
5527 }
5528 }
5529 }
5530
Marat Dukhan355ab432020-04-09 19:01:52 -07005531 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005532 TEST_REQUIRES_ARM_NEON_FMA;
5533 for (uint32_t n = 1; n < 10; n += 3) {
5534 for (size_t k = 1; k <= 5; k += 2) {
5535 SpMMMicrokernelTester()
5536 .mr(16)
5537 .nr(2)
5538 .m(32)
5539 .n(n)
5540 .k(k)
5541 .sparsity(0.0f)
5542 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005543 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005544 }
5545 }
5546 }
5547
Marat Dukhan355ab432020-04-09 19:01:52 -07005548 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005549 TEST_REQUIRES_ARM_NEON_FMA;
5550 for (uint32_t n = 1; n < 10; n += 3) {
5551 for (size_t k = 1; k <= 5; k += 2) {
5552 SpMMMicrokernelTester()
5553 .mr(16)
5554 .nr(2)
5555 .m(32)
5556 .n(n)
5557 .k(k)
5558 .sparsity(0.0f)
5559 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005560 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005561 }
5562 }
5563 }
5564
Marat Dukhan355ab432020-04-09 19:01:52 -07005565 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005566 TEST_REQUIRES_ARM_NEON_FMA;
5567 for (uint32_t n = 1; n < 10; n += 3) {
5568 for (size_t k = 1; k <= 5; k += 2) {
5569 SpMMMicrokernelTester()
5570 .mr(16)
5571 .nr(2)
5572 .m(32)
5573 .n(n)
5574 .k(k)
5575 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005576 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005577 }
5578 }
5579 }
5580
Marat Dukhan355ab432020-04-09 19:01:52 -07005581 TEST(F32_SPMM_MINMAX_16X2__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005582 TEST_REQUIRES_ARM_NEON_FMA;
5583 for (uint32_t n = 1; n < 10; n += 3) {
5584 for (size_t k = 1; k <= 5; k += 2) {
5585 SpMMMicrokernelTester()
5586 .mr(16)
5587 .nr(2)
5588 .m(32)
5589 .n(n)
5590 .k(k)
5591 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005592 .Test(xnn_f32_spmm_minmax_ukernel_16x2__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005593 }
5594 }
5595 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07005596#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07005597
5598
Marat Dukhan1dadbf72019-10-01 10:46:20 -07005599#if XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07005600 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005601 TEST_REQUIRES_ARM_NEON_FMA;
5602 SpMMMicrokernelTester()
5603 .mr(16)
5604 .nr(4)
5605 .m(16)
5606 .n(4)
5607 .k(1)
5608 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005609 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005610 }
5611
Marat Dukhan355ab432020-04-09 19:01:52 -07005612 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, k_eq_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005613 TEST_REQUIRES_ARM_NEON_FMA;
5614 for (uint32_t n = 1; n <= 4; n++) {
5615 SpMMMicrokernelTester()
5616 .mr(16)
5617 .nr(4)
5618 .m(16)
5619 .n(n)
5620 .k(1)
5621 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005622 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005623 }
5624 }
5625
Marat Dukhan355ab432020-04-09 19:01:52 -07005626 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005627 TEST_REQUIRES_ARM_NEON_FMA;
5628 for (size_t k = 2; k < 10; k++) {
5629 SpMMMicrokernelTester()
5630 .mr(16)
5631 .nr(4)
5632 .m(16)
5633 .n(4)
5634 .k(k)
5635 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005636 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005637 }
5638 }
5639
Marat Dukhan355ab432020-04-09 19:01:52 -07005640 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, k_gt_1_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005641 TEST_REQUIRES_ARM_NEON_FMA;
5642 for (size_t k = 2; k < 10; k++) {
5643 for (uint32_t n = 1; n <= 4; n++) {
5644 SpMMMicrokernelTester()
5645 .mr(16)
5646 .nr(4)
5647 .m(16)
5648 .n(n)
5649 .k(k)
5650 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005651 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005652 }
5653 }
5654 }
5655
Marat Dukhan355ab432020-04-09 19:01:52 -07005656 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, n_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005657 TEST_REQUIRES_ARM_NEON_FMA;
5658 for (uint32_t n = 5; n < 10; n++) {
5659 for (size_t k = 1; k <= 5; k += 2) {
5660 SpMMMicrokernelTester()
5661 .mr(16)
5662 .nr(4)
5663 .m(16)
5664 .n(n)
5665 .k(k)
5666 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005667 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005668 }
5669 }
5670 }
5671
Marat Dukhan355ab432020-04-09 19:01:52 -07005672 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, n_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005673 TEST_REQUIRES_ARM_NEON_FMA;
5674 for (uint32_t n = 8; n <= 12; n += 4) {
5675 for (size_t k = 1; k <= 5; k += 2) {
5676 SpMMMicrokernelTester()
5677 .mr(16)
5678 .nr(4)
5679 .m(16)
5680 .n(n)
5681 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -07005682 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005683 }
5684 }
5685 }
5686
Marat Dukhan355ab432020-04-09 19:01:52 -07005687 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, m_lt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005688 TEST_REQUIRES_ARM_NEON_FMA;
5689 for (uint32_t m = 1; m < 16; m++) {
5690 for (uint32_t n = 1; n < 20; n += 5) {
5691 for (size_t k = 1; k <= 5; k += 2) {
5692 SpMMMicrokernelTester()
5693 .mr(16)
5694 .nr(4)
5695 .m(m)
5696 .n(n)
5697 .k(k)
5698 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005699 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005700 }
5701 }
5702 }
5703 }
5704
Marat Dukhan355ab432020-04-09 19:01:52 -07005705 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, m_div_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005706 TEST_REQUIRES_ARM_NEON_FMA;
5707 for (uint32_t m = 32; m <= 48; m += 16) {
5708 for (uint32_t n = 1; n < 20; n += 5) {
5709 for (size_t k = 1; k <= 5; k += 2) {
5710 SpMMMicrokernelTester()
5711 .mr(16)
5712 .nr(4)
5713 .m(m)
5714 .n(n)
5715 .k(k)
5716 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005717 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005718 }
5719 }
5720 }
5721 }
5722
Marat Dukhan355ab432020-04-09 19:01:52 -07005723 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, m_gt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005724 TEST_REQUIRES_ARM_NEON_FMA;
5725 for (uint32_t m = 17; m < 32; m++) {
5726 for (uint32_t n = 1; n < 20; n += 5) {
5727 for (size_t k = 1; k <= 5; k += 2) {
5728 SpMMMicrokernelTester()
5729 .mr(16)
5730 .nr(4)
5731 .m(m)
5732 .n(n)
5733 .k(k)
5734 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005735 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005736 }
5737 }
5738 }
5739 }
5740
Marat Dukhane8bfcc82020-11-16 12:28:13 -08005741 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, output_stride) {
5742 TEST_REQUIRES_ARM_NEON_FMA;
5743 for (uint32_t n = 1; n < 20; n += 5) {
5744 for (size_t k = 1; k <= 5; k += 2) {
5745 SpMMMicrokernelTester()
5746 .mr(16)
5747 .nr(4)
5748 .m(32)
5749 .n(n)
5750 .k(k)
5751 .output_stride(37)
5752 .sparsity(0.0f)
5753 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
5754 }
5755 }
5756 }
5757
Marat Dukhan355ab432020-04-09 19:01:52 -07005758 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005759 TEST_REQUIRES_ARM_NEON_FMA;
5760 for (uint32_t n = 1; n < 20; n += 5) {
5761 for (size_t k = 1; k <= 5; k += 2) {
5762 SpMMMicrokernelTester()
5763 .mr(16)
5764 .nr(4)
5765 .m(32)
5766 .n(n)
5767 .k(k)
5768 .sparsity(0.0f)
5769 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005770 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005771 }
5772 }
5773 }
5774
Marat Dukhan355ab432020-04-09 19:01:52 -07005775 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005776 TEST_REQUIRES_ARM_NEON_FMA;
5777 for (uint32_t n = 1; n < 20; n += 5) {
5778 for (size_t k = 1; k <= 5; k += 2) {
5779 SpMMMicrokernelTester()
5780 .mr(16)
5781 .nr(4)
5782 .m(32)
5783 .n(n)
5784 .k(k)
5785 .sparsity(0.0f)
5786 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005787 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005788 }
5789 }
5790 }
5791
Marat Dukhan355ab432020-04-09 19:01:52 -07005792 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005793 TEST_REQUIRES_ARM_NEON_FMA;
5794 for (uint32_t n = 1; n < 20; n += 5) {
5795 for (size_t k = 1; k <= 5; k += 2) {
5796 SpMMMicrokernelTester()
5797 .mr(16)
5798 .nr(4)
5799 .m(32)
5800 .n(n)
5801 .k(k)
5802 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005803 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005804 }
5805 }
5806 }
5807
Marat Dukhan355ab432020-04-09 19:01:52 -07005808 TEST(F32_SPMM_MINMAX_16X4__NEONFMA, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005809 TEST_REQUIRES_ARM_NEON_FMA;
5810 for (uint32_t n = 1; n < 20; n += 5) {
5811 for (size_t k = 1; k <= 5; k += 2) {
5812 SpMMMicrokernelTester()
5813 .mr(16)
5814 .nr(4)
5815 .m(32)
5816 .n(n)
5817 .k(k)
5818 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005819 .Test(xnn_f32_spmm_minmax_ukernel_16x4__neonfma);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005820 }
5821 }
5822 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07005823#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07005824
5825
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08005826#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan355ab432020-04-09 19:01:52 -07005827 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005828 TEST_REQUIRES_ARM_NEON_FMA;
5829 SpMMMicrokernelTester()
5830 .mr(16)
5831 .nr(1)
5832 .m(16)
5833 .n(1)
5834 .k(1)
5835 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005836 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005837 }
5838
Marat Dukhan355ab432020-04-09 19:01:52 -07005839 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005840 TEST_REQUIRES_ARM_NEON_FMA;
5841 for (size_t k = 2; k < 10; k++) {
5842 SpMMMicrokernelTester()
5843 .mr(16)
5844 .nr(1)
5845 .m(16)
5846 .n(1)
5847 .k(k)
5848 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005849 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005850 }
5851 }
5852
Marat Dukhan355ab432020-04-09 19:01:52 -07005853 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005854 TEST_REQUIRES_ARM_NEON_FMA;
5855 for (uint32_t n = 2; n < 10; n++) {
5856 for (size_t k = 1; k <= 5; k += 2) {
5857 SpMMMicrokernelTester()
5858 .mr(16)
5859 .nr(1)
5860 .m(16)
5861 .n(n)
5862 .k(k)
5863 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005864 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005865 }
5866 }
5867 }
5868
Marat Dukhan355ab432020-04-09 19:01:52 -07005869 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, m_lt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005870 TEST_REQUIRES_ARM_NEON_FMA;
5871 for (uint32_t m = 1; m < 16; m++) {
5872 for (uint32_t n = 1; n < 10; n += 2) {
5873 for (size_t k = 1; k <= 5; k += 2) {
5874 SpMMMicrokernelTester()
5875 .mr(16)
5876 .nr(1)
5877 .m(m)
5878 .n(n)
5879 .k(k)
5880 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005881 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005882 }
5883 }
5884 }
5885 }
5886
Marat Dukhan355ab432020-04-09 19:01:52 -07005887 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, m_div_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005888 TEST_REQUIRES_ARM_NEON_FMA;
5889 for (uint32_t m = 32; m <= 48; m += 16) {
5890 for (uint32_t n = 1; n < 10; n += 2) {
5891 for (size_t k = 1; k <= 5; k += 2) {
5892 SpMMMicrokernelTester()
5893 .mr(16)
5894 .nr(1)
5895 .m(m)
5896 .n(n)
5897 .k(k)
5898 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005899 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005900 }
5901 }
5902 }
5903 }
5904
Marat Dukhan355ab432020-04-09 19:01:52 -07005905 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, m_gt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005906 TEST_REQUIRES_ARM_NEON_FMA;
5907 for (uint32_t m = 17; m < 32; m++) {
5908 for (uint32_t n = 1; n < 10; n += 2) {
5909 for (size_t k = 1; k <= 5; k += 2) {
5910 SpMMMicrokernelTester()
5911 .mr(16)
5912 .nr(1)
5913 .m(m)
5914 .n(n)
5915 .k(k)
5916 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005917 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005918 }
5919 }
5920 }
5921 }
5922
Marat Dukhane8bfcc82020-11-16 12:28:13 -08005923 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, output_stride) {
5924 TEST_REQUIRES_ARM_NEON_FMA;
5925 for (uint32_t n = 1; n < 10; n += 2) {
5926 for (size_t k = 1; k <= 5; k += 2) {
5927 SpMMMicrokernelTester()
5928 .mr(16)
5929 .nr(1)
5930 .m(32)
5931 .n(n)
5932 .k(k)
5933 .output_stride(37)
5934 .sparsity(0.0f)
5935 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
5936 }
5937 }
5938 }
5939
Marat Dukhan355ab432020-04-09 19:01:52 -07005940 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005941 TEST_REQUIRES_ARM_NEON_FMA;
5942 for (uint32_t n = 1; n < 10; n += 2) {
5943 for (size_t k = 1; k <= 5; k += 2) {
5944 SpMMMicrokernelTester()
5945 .mr(16)
5946 .nr(1)
5947 .m(32)
5948 .n(n)
5949 .k(k)
5950 .sparsity(0.0f)
5951 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005952 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005953 }
5954 }
5955 }
5956
Marat Dukhan355ab432020-04-09 19:01:52 -07005957 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005958 TEST_REQUIRES_ARM_NEON_FMA;
5959 for (uint32_t n = 1; n < 10; n += 2) {
5960 for (size_t k = 1; k <= 5; k += 2) {
5961 SpMMMicrokernelTester()
5962 .mr(16)
5963 .nr(1)
5964 .m(32)
5965 .n(n)
5966 .k(k)
5967 .sparsity(0.0f)
5968 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07005969 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005970 }
5971 }
5972 }
5973
Marat Dukhan355ab432020-04-09 19:01:52 -07005974 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005975 TEST_REQUIRES_ARM_NEON_FMA;
5976 for (uint32_t n = 1; n < 10; n += 2) {
5977 for (size_t k = 1; k <= 5; k += 2) {
5978 SpMMMicrokernelTester()
5979 .mr(16)
5980 .nr(1)
5981 .m(32)
5982 .n(n)
5983 .k(k)
5984 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07005985 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005986 }
5987 }
5988 }
5989
Marat Dukhan355ab432020-04-09 19:01:52 -07005990 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_PIPELINED, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005991 TEST_REQUIRES_ARM_NEON_FMA;
5992 for (uint32_t n = 1; n < 10; n += 2) {
5993 for (size_t k = 1; k <= 5; k += 2) {
5994 SpMMMicrokernelTester()
5995 .mr(16)
5996 .nr(1)
5997 .m(32)
5998 .n(n)
5999 .k(k)
6000 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07006001 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006002 }
6003 }
6004 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08006005#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07006006
6007
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08006008#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchardbeca6522020-10-30 22:34:35 -07006009 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, k_eq_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006010 TEST_REQUIRES_ARM_NEON_FMA;
6011 SpMMMicrokernelTester()
6012 .mr(16)
6013 .nr(1)
6014 .m(16)
6015 .n(1)
6016 .k(2)
6017 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006018 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006019 }
6020
Frank Barchardbeca6522020-10-30 22:34:35 -07006021 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, k_lt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006022 TEST_REQUIRES_ARM_NEON_FMA;
6023 for (size_t k = 1; k < 2; k++) {
6024 SpMMMicrokernelTester()
6025 .mr(16)
6026 .nr(1)
6027 .m(16)
6028 .n(1)
6029 .k(k)
6030 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006031 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006032 }
6033 }
6034
Frank Barchardbeca6522020-10-30 22:34:35 -07006035 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, k_gt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006036 TEST_REQUIRES_ARM_NEON_FMA;
6037 for (size_t k = 3; k < 4; k++) {
6038 SpMMMicrokernelTester()
6039 .mr(16)
6040 .nr(1)
6041 .m(16)
6042 .n(1)
6043 .k(k)
6044 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006045 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006046 }
6047 }
6048
Frank Barchardbeca6522020-10-30 22:34:35 -07006049 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, k_div_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006050 TEST_REQUIRES_ARM_NEON_FMA;
6051 for (size_t k = 4; k <= 20; k += 2) {
6052 SpMMMicrokernelTester()
6053 .mr(16)
6054 .nr(1)
6055 .m(16)
6056 .n(1)
6057 .k(k)
6058 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006059 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006060 }
6061 }
6062
Frank Barchardbeca6522020-10-30 22:34:35 -07006063 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006064 TEST_REQUIRES_ARM_NEON_FMA;
6065 for (uint32_t n = 2; n < 10; n++) {
6066 for (size_t k = 1; k <= 10; k += 3) {
6067 SpMMMicrokernelTester()
6068 .mr(16)
6069 .nr(1)
6070 .m(16)
6071 .n(n)
6072 .k(k)
6073 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006074 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006075 }
6076 }
6077 }
6078
Frank Barchardbeca6522020-10-30 22:34:35 -07006079 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, m_lt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006080 TEST_REQUIRES_ARM_NEON_FMA;
6081 for (uint32_t m = 1; m < 16; m++) {
6082 for (uint32_t n = 1; n < 10; n += 2) {
6083 for (size_t k = 1; k <= 10; k += 3) {
6084 SpMMMicrokernelTester()
6085 .mr(16)
6086 .nr(1)
6087 .m(m)
6088 .n(n)
6089 .k(k)
6090 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006091 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006092 }
6093 }
6094 }
6095 }
6096
Frank Barchardbeca6522020-10-30 22:34:35 -07006097 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, m_div_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006098 TEST_REQUIRES_ARM_NEON_FMA;
6099 for (uint32_t m = 32; m <= 48; m += 16) {
6100 for (uint32_t n = 1; n < 10; n += 2) {
6101 for (size_t k = 1; k <= 10; k += 3) {
6102 SpMMMicrokernelTester()
6103 .mr(16)
6104 .nr(1)
6105 .m(m)
6106 .n(n)
6107 .k(k)
6108 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006109 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006110 }
6111 }
6112 }
6113 }
6114
Frank Barchardbeca6522020-10-30 22:34:35 -07006115 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, m_gt_16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006116 TEST_REQUIRES_ARM_NEON_FMA;
6117 for (uint32_t m = 17; m < 32; m++) {
6118 for (uint32_t n = 1; n < 10; n += 2) {
6119 for (size_t k = 1; k <= 10; k += 3) {
6120 SpMMMicrokernelTester()
6121 .mr(16)
6122 .nr(1)
6123 .m(m)
6124 .n(n)
6125 .k(k)
6126 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006127 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006128 }
6129 }
6130 }
6131 }
6132
Marat Dukhane8bfcc82020-11-16 12:28:13 -08006133 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, output_stride) {
6134 TEST_REQUIRES_ARM_NEON_FMA;
6135 for (uint32_t n = 1; n < 10; n += 2) {
6136 for (size_t k = 1; k <= 10; k += 3) {
6137 SpMMMicrokernelTester()
6138 .mr(16)
6139 .nr(1)
6140 .m(32)
6141 .n(n)
6142 .k(k)
6143 .output_stride(37)
6144 .sparsity(0.0f)
6145 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
6146 }
6147 }
6148 }
6149
Frank Barchardbeca6522020-10-30 22:34:35 -07006150 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006151 TEST_REQUIRES_ARM_NEON_FMA;
6152 for (uint32_t n = 1; n < 10; n += 2) {
6153 for (size_t k = 1; k <= 10; k += 3) {
6154 SpMMMicrokernelTester()
6155 .mr(16)
6156 .nr(1)
6157 .m(32)
6158 .n(n)
6159 .k(k)
6160 .sparsity(0.0f)
6161 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07006162 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006163 }
6164 }
6165 }
6166
Frank Barchardbeca6522020-10-30 22:34:35 -07006167 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006168 TEST_REQUIRES_ARM_NEON_FMA;
6169 for (uint32_t n = 1; n < 10; n += 2) {
6170 for (size_t k = 1; k <= 10; k += 3) {
6171 SpMMMicrokernelTester()
6172 .mr(16)
6173 .nr(1)
6174 .m(32)
6175 .n(n)
6176 .k(k)
6177 .sparsity(0.0f)
6178 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07006179 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006180 }
6181 }
6182 }
6183
Frank Barchardbeca6522020-10-30 22:34:35 -07006184 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006185 TEST_REQUIRES_ARM_NEON_FMA;
6186 for (uint32_t n = 1; n < 10; n += 2) {
6187 for (size_t k = 1; k <= 10; k += 3) {
6188 SpMMMicrokernelTester()
6189 .mr(16)
6190 .nr(1)
6191 .m(32)
6192 .n(n)
6193 .k(k)
6194 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006195 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006196 }
6197 }
6198 }
6199
Frank Barchardbeca6522020-10-30 22:34:35 -07006200 TEST(F32_SPMM_MINMAX_16X1__NEONFMA_X2, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07006201 TEST_REQUIRES_ARM_NEON_FMA;
6202 for (uint32_t n = 1; n < 10; n += 2) {
6203 for (size_t k = 1; k <= 10; k += 3) {
6204 SpMMMicrokernelTester()
6205 .mr(16)
6206 .nr(1)
6207 .m(32)
6208 .n(n)
6209 .k(k)
6210 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07006211 .Test(xnn_f32_spmm_minmax_ukernel_16x1__neonfma_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07006212 }
6213 }
6214 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08006215#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07006216
6217
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08006218#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard846c0c62020-10-26 15:01:39 -07006219 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, k_eq_1) {
6220 TEST_REQUIRES_ARM_NEON_FMA;
6221 SpMMMicrokernelTester()
6222 .mr(32)
6223 .nr(1)
6224 .m(32)
6225 .n(1)
6226 .k(1)
6227 .sparsity(0.0f)
6228 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6229 }
6230
6231 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, k_gt_1) {
6232 TEST_REQUIRES_ARM_NEON_FMA;
6233 for (size_t k = 2; k < 10; k++) {
6234 SpMMMicrokernelTester()
6235 .mr(32)
6236 .nr(1)
6237 .m(32)
6238 .n(1)
6239 .k(k)
6240 .sparsity(0.0f)
6241 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6242 }
6243 }
6244
6245 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, n_gt_1) {
6246 TEST_REQUIRES_ARM_NEON_FMA;
6247 for (uint32_t n = 2; n < 10; n++) {
6248 for (size_t k = 1; k <= 5; k += 2) {
6249 SpMMMicrokernelTester()
6250 .mr(32)
6251 .nr(1)
6252 .m(32)
6253 .n(n)
6254 .k(k)
6255 .sparsity(0.0f)
6256 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6257 }
6258 }
6259 }
6260
6261 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, m_lt_32) {
6262 TEST_REQUIRES_ARM_NEON_FMA;
6263 for (uint32_t m = 1; m < 32; m++) {
6264 for (uint32_t n = 1; n < 10; n += 2) {
6265 for (size_t k = 1; k <= 5; k += 2) {
6266 SpMMMicrokernelTester()
6267 .mr(32)
6268 .nr(1)
6269 .m(m)
6270 .n(n)
6271 .k(k)
6272 .sparsity(0.0f)
6273 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6274 }
6275 }
6276 }
6277 }
6278
6279 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, m_div_32) {
6280 TEST_REQUIRES_ARM_NEON_FMA;
6281 for (uint32_t m = 64; m <= 96; m += 32) {
6282 for (uint32_t n = 1; n < 10; n += 2) {
6283 for (size_t k = 1; k <= 5; k += 2) {
6284 SpMMMicrokernelTester()
6285 .mr(32)
6286 .nr(1)
6287 .m(m)
6288 .n(n)
6289 .k(k)
6290 .sparsity(0.0f)
6291 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6292 }
6293 }
6294 }
6295 }
6296
6297 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, m_gt_32) {
6298 TEST_REQUIRES_ARM_NEON_FMA;
6299 for (uint32_t m = 33; m < 64; m++) {
6300 for (uint32_t n = 1; n < 10; n += 2) {
6301 for (size_t k = 1; k <= 5; k += 2) {
6302 SpMMMicrokernelTester()
6303 .mr(32)
6304 .nr(1)
6305 .m(m)
6306 .n(n)
6307 .k(k)
6308 .sparsity(0.0f)
6309 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6310 }
6311 }
6312 }
6313 }
6314
Marat Dukhane8bfcc82020-11-16 12:28:13 -08006315 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, output_stride) {
6316 TEST_REQUIRES_ARM_NEON_FMA;
6317 for (uint32_t n = 1; n < 10; n += 2) {
6318 for (size_t k = 1; k <= 5; k += 2) {
6319 SpMMMicrokernelTester()
6320 .mr(32)
6321 .nr(1)
6322 .m(64)
6323 .n(n)
6324 .k(k)
6325 .output_stride(67)
6326 .sparsity(0.0f)
6327 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6328 }
6329 }
6330 }
6331
Frank Barchard846c0c62020-10-26 15:01:39 -07006332 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, qmin) {
6333 TEST_REQUIRES_ARM_NEON_FMA;
6334 for (uint32_t n = 1; n < 10; n += 2) {
6335 for (size_t k = 1; k <= 5; k += 2) {
6336 SpMMMicrokernelTester()
6337 .mr(32)
6338 .nr(1)
6339 .m(64)
6340 .n(n)
6341 .k(k)
6342 .sparsity(0.0f)
6343 .qmin(128)
6344 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6345 }
6346 }
6347 }
6348
6349 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, qmax) {
6350 TEST_REQUIRES_ARM_NEON_FMA;
6351 for (uint32_t n = 1; n < 10; n += 2) {
6352 for (size_t k = 1; k <= 5; k += 2) {
6353 SpMMMicrokernelTester()
6354 .mr(32)
6355 .nr(1)
6356 .m(64)
6357 .n(n)
6358 .k(k)
6359 .sparsity(0.0f)
6360 .qmax(128)
6361 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6362 }
6363 }
6364 }
6365
6366 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, half_sparse) {
6367 TEST_REQUIRES_ARM_NEON_FMA;
6368 for (uint32_t n = 1; n < 10; n += 2) {
6369 for (size_t k = 1; k <= 5; k += 2) {
6370 SpMMMicrokernelTester()
6371 .mr(32)
6372 .nr(1)
6373 .m(64)
6374 .n(n)
6375 .k(k)
6376 .sparsity(0.5f)
6377 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6378 }
6379 }
6380 }
6381
6382 TEST(F32_SPMM_MINMAX_32X1__NEONFMA, zero_weights) {
6383 TEST_REQUIRES_ARM_NEON_FMA;
6384 for (uint32_t n = 1; n < 10; n += 2) {
6385 for (size_t k = 1; k <= 5; k += 2) {
6386 SpMMMicrokernelTester()
6387 .mr(32)
6388 .nr(1)
6389 .m(64)
6390 .n(n)
6391 .k(k)
6392 .sparsity(1.0f)
6393 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma);
6394 }
6395 }
6396 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08006397#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard846c0c62020-10-26 15:01:39 -07006398
6399
6400#if XNN_ARCH_ARM64
6401 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, k_eq_1) {
6402 TEST_REQUIRES_ARM_NEON_FMA;
6403 SpMMMicrokernelTester()
6404 .mr(32)
6405 .nr(2)
6406 .m(32)
6407 .n(2)
6408 .k(1)
6409 .sparsity(0.0f)
6410 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6411 }
6412
6413 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, k_eq_1_subtile) {
6414 TEST_REQUIRES_ARM_NEON_FMA;
6415 for (uint32_t n = 1; n <= 2; n++) {
6416 SpMMMicrokernelTester()
6417 .mr(32)
6418 .nr(2)
6419 .m(32)
6420 .n(n)
6421 .k(1)
6422 .sparsity(0.0f)
6423 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6424 }
6425 }
6426
6427 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, k_gt_1) {
6428 TEST_REQUIRES_ARM_NEON_FMA;
6429 for (size_t k = 2; k < 10; k++) {
6430 SpMMMicrokernelTester()
6431 .mr(32)
6432 .nr(2)
6433 .m(32)
6434 .n(2)
6435 .k(k)
6436 .sparsity(0.0f)
6437 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6438 }
6439 }
6440
6441 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, k_gt_1_subtile) {
6442 TEST_REQUIRES_ARM_NEON_FMA;
6443 for (size_t k = 2; k < 10; k++) {
6444 for (uint32_t n = 1; n <= 2; n++) {
6445 SpMMMicrokernelTester()
6446 .mr(32)
6447 .nr(2)
6448 .m(32)
6449 .n(n)
6450 .k(k)
6451 .sparsity(0.0f)
6452 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6453 }
6454 }
6455 }
6456
6457 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, n_gt_2) {
6458 TEST_REQUIRES_ARM_NEON_FMA;
6459 for (uint32_t n = 3; n < 10; n++) {
6460 for (size_t k = 1; k <= 5; k += 2) {
6461 SpMMMicrokernelTester()
6462 .mr(32)
6463 .nr(2)
6464 .m(32)
6465 .n(n)
6466 .k(k)
6467 .sparsity(0.0f)
6468 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6469 }
6470 }
6471 }
6472
6473 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, n_div_2) {
6474 TEST_REQUIRES_ARM_NEON_FMA;
6475 for (uint32_t n = 4; n <= 6; n += 2) {
6476 for (size_t k = 1; k <= 5; k += 2) {
6477 SpMMMicrokernelTester()
6478 .mr(32)
6479 .nr(2)
6480 .m(32)
6481 .n(n)
6482 .k(k)
6483 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6484 }
6485 }
6486 }
6487
6488 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, m_lt_32) {
6489 TEST_REQUIRES_ARM_NEON_FMA;
6490 for (uint32_t m = 1; m < 32; m++) {
6491 for (uint32_t n = 1; n < 10; n += 3) {
6492 for (size_t k = 1; k <= 5; k += 2) {
6493 SpMMMicrokernelTester()
6494 .mr(32)
6495 .nr(2)
6496 .m(m)
6497 .n(n)
6498 .k(k)
6499 .sparsity(0.0f)
6500 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6501 }
6502 }
6503 }
6504 }
6505
6506 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, m_div_32) {
6507 TEST_REQUIRES_ARM_NEON_FMA;
6508 for (uint32_t m = 64; m <= 96; m += 32) {
6509 for (uint32_t n = 1; n < 10; n += 3) {
6510 for (size_t k = 1; k <= 5; k += 2) {
6511 SpMMMicrokernelTester()
6512 .mr(32)
6513 .nr(2)
6514 .m(m)
6515 .n(n)
6516 .k(k)
6517 .sparsity(0.0f)
6518 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6519 }
6520 }
6521 }
6522 }
6523
6524 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, m_gt_32) {
6525 TEST_REQUIRES_ARM_NEON_FMA;
6526 for (uint32_t m = 33; m < 64; m++) {
6527 for (uint32_t n = 1; n < 10; n += 3) {
6528 for (size_t k = 1; k <= 5; k += 2) {
6529 SpMMMicrokernelTester()
6530 .mr(32)
6531 .nr(2)
6532 .m(m)
6533 .n(n)
6534 .k(k)
6535 .sparsity(0.0f)
6536 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6537 }
6538 }
6539 }
6540 }
6541
Marat Dukhane8bfcc82020-11-16 12:28:13 -08006542 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, output_stride) {
6543 TEST_REQUIRES_ARM_NEON_FMA;
6544 for (uint32_t n = 1; n < 10; n += 3) {
6545 for (size_t k = 1; k <= 5; k += 2) {
6546 SpMMMicrokernelTester()
6547 .mr(32)
6548 .nr(2)
6549 .m(64)
6550 .n(n)
6551 .k(k)
6552 .output_stride(67)
6553 .sparsity(0.0f)
6554 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6555 }
6556 }
6557 }
6558
Frank Barchard846c0c62020-10-26 15:01:39 -07006559 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, qmin) {
6560 TEST_REQUIRES_ARM_NEON_FMA;
6561 for (uint32_t n = 1; n < 10; n += 3) {
6562 for (size_t k = 1; k <= 5; k += 2) {
6563 SpMMMicrokernelTester()
6564 .mr(32)
6565 .nr(2)
6566 .m(64)
6567 .n(n)
6568 .k(k)
6569 .sparsity(0.0f)
6570 .qmin(128)
6571 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6572 }
6573 }
6574 }
6575
6576 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, qmax) {
6577 TEST_REQUIRES_ARM_NEON_FMA;
6578 for (uint32_t n = 1; n < 10; n += 3) {
6579 for (size_t k = 1; k <= 5; k += 2) {
6580 SpMMMicrokernelTester()
6581 .mr(32)
6582 .nr(2)
6583 .m(64)
6584 .n(n)
6585 .k(k)
6586 .sparsity(0.0f)
6587 .qmax(128)
6588 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6589 }
6590 }
6591 }
6592
6593 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, half_sparse) {
6594 TEST_REQUIRES_ARM_NEON_FMA;
6595 for (uint32_t n = 1; n < 10; n += 3) {
6596 for (size_t k = 1; k <= 5; k += 2) {
6597 SpMMMicrokernelTester()
6598 .mr(32)
6599 .nr(2)
6600 .m(64)
6601 .n(n)
6602 .k(k)
6603 .sparsity(0.5f)
6604 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6605 }
6606 }
6607 }
6608
6609 TEST(F32_SPMM_MINMAX_32X2__NEONFMA, zero_weights) {
6610 TEST_REQUIRES_ARM_NEON_FMA;
6611 for (uint32_t n = 1; n < 10; n += 3) {
6612 for (size_t k = 1; k <= 5; k += 2) {
6613 SpMMMicrokernelTester()
6614 .mr(32)
6615 .nr(2)
6616 .m(64)
6617 .n(n)
6618 .k(k)
6619 .sparsity(1.0f)
6620 .Test(xnn_f32_spmm_minmax_ukernel_32x2__neonfma);
6621 }
6622 }
6623 }
6624#endif // XNN_ARCH_ARM64
6625
6626
6627#if XNN_ARCH_ARM64
6628 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, k_eq_1) {
6629 TEST_REQUIRES_ARM_NEON_FMA;
6630 SpMMMicrokernelTester()
6631 .mr(32)
6632 .nr(4)
6633 .m(32)
6634 .n(4)
6635 .k(1)
6636 .sparsity(0.0f)
6637 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6638 }
6639
6640 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, k_eq_1_subtile) {
6641 TEST_REQUIRES_ARM_NEON_FMA;
6642 for (uint32_t n = 1; n <= 4; n++) {
6643 SpMMMicrokernelTester()
6644 .mr(32)
6645 .nr(4)
6646 .m(32)
6647 .n(n)
6648 .k(1)
6649 .sparsity(0.0f)
6650 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6651 }
6652 }
6653
6654 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, k_gt_1) {
6655 TEST_REQUIRES_ARM_NEON_FMA;
6656 for (size_t k = 2; k < 10; k++) {
6657 SpMMMicrokernelTester()
6658 .mr(32)
6659 .nr(4)
6660 .m(32)
6661 .n(4)
6662 .k(k)
6663 .sparsity(0.0f)
6664 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6665 }
6666 }
6667
6668 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, k_gt_1_subtile) {
6669 TEST_REQUIRES_ARM_NEON_FMA;
6670 for (size_t k = 2; k < 10; k++) {
6671 for (uint32_t n = 1; n <= 4; n++) {
6672 SpMMMicrokernelTester()
6673 .mr(32)
6674 .nr(4)
6675 .m(32)
6676 .n(n)
6677 .k(k)
6678 .sparsity(0.0f)
6679 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6680 }
6681 }
6682 }
6683
6684 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, n_gt_4) {
6685 TEST_REQUIRES_ARM_NEON_FMA;
6686 for (uint32_t n = 5; n < 10; n++) {
6687 for (size_t k = 1; k <= 5; k += 2) {
6688 SpMMMicrokernelTester()
6689 .mr(32)
6690 .nr(4)
6691 .m(32)
6692 .n(n)
6693 .k(k)
6694 .sparsity(0.0f)
6695 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6696 }
6697 }
6698 }
6699
6700 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, n_div_4) {
6701 TEST_REQUIRES_ARM_NEON_FMA;
6702 for (uint32_t n = 8; n <= 12; n += 4) {
6703 for (size_t k = 1; k <= 5; k += 2) {
6704 SpMMMicrokernelTester()
6705 .mr(32)
6706 .nr(4)
6707 .m(32)
6708 .n(n)
6709 .k(k)
6710 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6711 }
6712 }
6713 }
6714
6715 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, m_lt_32) {
6716 TEST_REQUIRES_ARM_NEON_FMA;
6717 for (uint32_t m = 1; m < 32; m++) {
6718 for (uint32_t n = 1; n < 20; n += 5) {
6719 for (size_t k = 1; k <= 5; k += 2) {
6720 SpMMMicrokernelTester()
6721 .mr(32)
6722 .nr(4)
6723 .m(m)
6724 .n(n)
6725 .k(k)
6726 .sparsity(0.0f)
6727 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6728 }
6729 }
6730 }
6731 }
6732
6733 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, m_div_32) {
6734 TEST_REQUIRES_ARM_NEON_FMA;
6735 for (uint32_t m = 64; m <= 96; m += 32) {
6736 for (uint32_t n = 1; n < 20; n += 5) {
6737 for (size_t k = 1; k <= 5; k += 2) {
6738 SpMMMicrokernelTester()
6739 .mr(32)
6740 .nr(4)
6741 .m(m)
6742 .n(n)
6743 .k(k)
6744 .sparsity(0.0f)
6745 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6746 }
6747 }
6748 }
6749 }
6750
6751 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, m_gt_32) {
6752 TEST_REQUIRES_ARM_NEON_FMA;
6753 for (uint32_t m = 33; m < 64; m++) {
6754 for (uint32_t n = 1; n < 20; n += 5) {
6755 for (size_t k = 1; k <= 5; k += 2) {
6756 SpMMMicrokernelTester()
6757 .mr(32)
6758 .nr(4)
6759 .m(m)
6760 .n(n)
6761 .k(k)
6762 .sparsity(0.0f)
6763 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6764 }
6765 }
6766 }
6767 }
6768
Marat Dukhane8bfcc82020-11-16 12:28:13 -08006769 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, output_stride) {
6770 TEST_REQUIRES_ARM_NEON_FMA;
6771 for (uint32_t n = 1; n < 20; n += 5) {
6772 for (size_t k = 1; k <= 5; k += 2) {
6773 SpMMMicrokernelTester()
6774 .mr(32)
6775 .nr(4)
6776 .m(64)
6777 .n(n)
6778 .k(k)
6779 .output_stride(67)
6780 .sparsity(0.0f)
6781 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6782 }
6783 }
6784 }
6785
Frank Barchard846c0c62020-10-26 15:01:39 -07006786 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, qmin) {
6787 TEST_REQUIRES_ARM_NEON_FMA;
6788 for (uint32_t n = 1; n < 20; n += 5) {
6789 for (size_t k = 1; k <= 5; k += 2) {
6790 SpMMMicrokernelTester()
6791 .mr(32)
6792 .nr(4)
6793 .m(64)
6794 .n(n)
6795 .k(k)
6796 .sparsity(0.0f)
6797 .qmin(128)
6798 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6799 }
6800 }
6801 }
6802
6803 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, qmax) {
6804 TEST_REQUIRES_ARM_NEON_FMA;
6805 for (uint32_t n = 1; n < 20; n += 5) {
6806 for (size_t k = 1; k <= 5; k += 2) {
6807 SpMMMicrokernelTester()
6808 .mr(32)
6809 .nr(4)
6810 .m(64)
6811 .n(n)
6812 .k(k)
6813 .sparsity(0.0f)
6814 .qmax(128)
6815 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6816 }
6817 }
6818 }
6819
6820 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, half_sparse) {
6821 TEST_REQUIRES_ARM_NEON_FMA;
6822 for (uint32_t n = 1; n < 20; n += 5) {
6823 for (size_t k = 1; k <= 5; k += 2) {
6824 SpMMMicrokernelTester()
6825 .mr(32)
6826 .nr(4)
6827 .m(64)
6828 .n(n)
6829 .k(k)
6830 .sparsity(0.5f)
6831 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6832 }
6833 }
6834 }
6835
6836 TEST(F32_SPMM_MINMAX_32X4__NEONFMA, zero_weights) {
6837 TEST_REQUIRES_ARM_NEON_FMA;
6838 for (uint32_t n = 1; n < 20; n += 5) {
6839 for (size_t k = 1; k <= 5; k += 2) {
6840 SpMMMicrokernelTester()
6841 .mr(32)
6842 .nr(4)
6843 .m(64)
6844 .n(n)
6845 .k(k)
6846 .sparsity(1.0f)
6847 .Test(xnn_f32_spmm_minmax_ukernel_32x4__neonfma);
6848 }
6849 }
6850 }
6851#endif // XNN_ARCH_ARM64
6852
6853
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08006854#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard846c0c62020-10-26 15:01:39 -07006855 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, k_eq_1) {
6856 TEST_REQUIRES_ARM_NEON_FMA;
6857 SpMMMicrokernelTester()
6858 .mr(32)
6859 .nr(1)
6860 .m(32)
6861 .n(1)
6862 .k(1)
6863 .sparsity(0.0f)
6864 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
6865 }
6866
6867 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, k_gt_1) {
6868 TEST_REQUIRES_ARM_NEON_FMA;
6869 for (size_t k = 2; k < 10; k++) {
6870 SpMMMicrokernelTester()
6871 .mr(32)
6872 .nr(1)
6873 .m(32)
6874 .n(1)
6875 .k(k)
6876 .sparsity(0.0f)
6877 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
6878 }
6879 }
6880
6881 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, n_gt_1) {
6882 TEST_REQUIRES_ARM_NEON_FMA;
6883 for (uint32_t n = 2; n < 10; n++) {
6884 for (size_t k = 1; k <= 5; k += 2) {
6885 SpMMMicrokernelTester()
6886 .mr(32)
6887 .nr(1)
6888 .m(32)
6889 .n(n)
6890 .k(k)
6891 .sparsity(0.0f)
6892 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
6893 }
6894 }
6895 }
6896
6897 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, m_lt_32) {
6898 TEST_REQUIRES_ARM_NEON_FMA;
6899 for (uint32_t m = 1; m < 32; m++) {
6900 for (uint32_t n = 1; n < 10; n += 2) {
6901 for (size_t k = 1; k <= 5; k += 2) {
6902 SpMMMicrokernelTester()
6903 .mr(32)
6904 .nr(1)
6905 .m(m)
6906 .n(n)
6907 .k(k)
6908 .sparsity(0.0f)
6909 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
6910 }
6911 }
6912 }
6913 }
6914
6915 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, m_div_32) {
6916 TEST_REQUIRES_ARM_NEON_FMA;
6917 for (uint32_t m = 64; m <= 96; m += 32) {
6918 for (uint32_t n = 1; n < 10; n += 2) {
6919 for (size_t k = 1; k <= 5; k += 2) {
6920 SpMMMicrokernelTester()
6921 .mr(32)
6922 .nr(1)
6923 .m(m)
6924 .n(n)
6925 .k(k)
6926 .sparsity(0.0f)
6927 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
6928 }
6929 }
6930 }
6931 }
6932
6933 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, m_gt_32) {
6934 TEST_REQUIRES_ARM_NEON_FMA;
6935 for (uint32_t m = 33; m < 64; m++) {
6936 for (uint32_t n = 1; n < 10; n += 2) {
6937 for (size_t k = 1; k <= 5; k += 2) {
6938 SpMMMicrokernelTester()
6939 .mr(32)
6940 .nr(1)
6941 .m(m)
6942 .n(n)
6943 .k(k)
6944 .sparsity(0.0f)
6945 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
6946 }
6947 }
6948 }
6949 }
6950
Marat Dukhane8bfcc82020-11-16 12:28:13 -08006951 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, output_stride) {
6952 TEST_REQUIRES_ARM_NEON_FMA;
6953 for (uint32_t n = 1; n < 10; n += 2) {
6954 for (size_t k = 1; k <= 5; k += 2) {
6955 SpMMMicrokernelTester()
6956 .mr(32)
6957 .nr(1)
6958 .m(64)
6959 .n(n)
6960 .k(k)
6961 .output_stride(67)
6962 .sparsity(0.0f)
6963 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
6964 }
6965 }
6966 }
6967
Frank Barchard846c0c62020-10-26 15:01:39 -07006968 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, qmin) {
6969 TEST_REQUIRES_ARM_NEON_FMA;
6970 for (uint32_t n = 1; n < 10; n += 2) {
6971 for (size_t k = 1; k <= 5; k += 2) {
6972 SpMMMicrokernelTester()
6973 .mr(32)
6974 .nr(1)
6975 .m(64)
6976 .n(n)
6977 .k(k)
6978 .sparsity(0.0f)
6979 .qmin(128)
6980 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
6981 }
6982 }
6983 }
6984
6985 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, qmax) {
6986 TEST_REQUIRES_ARM_NEON_FMA;
6987 for (uint32_t n = 1; n < 10; n += 2) {
6988 for (size_t k = 1; k <= 5; k += 2) {
6989 SpMMMicrokernelTester()
6990 .mr(32)
6991 .nr(1)
6992 .m(64)
6993 .n(n)
6994 .k(k)
6995 .sparsity(0.0f)
6996 .qmax(128)
6997 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
6998 }
6999 }
7000 }
7001
7002 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, half_sparse) {
7003 TEST_REQUIRES_ARM_NEON_FMA;
7004 for (uint32_t n = 1; n < 10; n += 2) {
7005 for (size_t k = 1; k <= 5; k += 2) {
7006 SpMMMicrokernelTester()
7007 .mr(32)
7008 .nr(1)
7009 .m(64)
7010 .n(n)
7011 .k(k)
7012 .sparsity(0.5f)
7013 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
7014 }
7015 }
7016 }
7017
7018 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_PIPELINED, zero_weights) {
7019 TEST_REQUIRES_ARM_NEON_FMA;
7020 for (uint32_t n = 1; n < 10; n += 2) {
7021 for (size_t k = 1; k <= 5; k += 2) {
7022 SpMMMicrokernelTester()
7023 .mr(32)
7024 .nr(1)
7025 .m(64)
7026 .n(n)
7027 .k(k)
7028 .sparsity(1.0f)
7029 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined);
7030 }
7031 }
7032 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08007033#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard846c0c62020-10-26 15:01:39 -07007034
7035
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08007036#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchardbeca6522020-10-30 22:34:35 -07007037 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, k_eq_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007038 TEST_REQUIRES_ARM_NEON_FMA;
7039 SpMMMicrokernelTester()
7040 .mr(32)
7041 .nr(1)
7042 .m(32)
7043 .n(1)
7044 .k(2)
7045 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007046 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007047 }
7048
Frank Barchardbeca6522020-10-30 22:34:35 -07007049 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, k_lt_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007050 TEST_REQUIRES_ARM_NEON_FMA;
7051 for (size_t k = 1; k < 2; k++) {
7052 SpMMMicrokernelTester()
7053 .mr(32)
7054 .nr(1)
7055 .m(32)
7056 .n(1)
7057 .k(k)
7058 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007059 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007060 }
7061 }
7062
Frank Barchardbeca6522020-10-30 22:34:35 -07007063 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, k_gt_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007064 TEST_REQUIRES_ARM_NEON_FMA;
7065 for (size_t k = 3; k < 4; k++) {
7066 SpMMMicrokernelTester()
7067 .mr(32)
7068 .nr(1)
7069 .m(32)
7070 .n(1)
7071 .k(k)
7072 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007073 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007074 }
7075 }
7076
Frank Barchardbeca6522020-10-30 22:34:35 -07007077 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, k_div_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007078 TEST_REQUIRES_ARM_NEON_FMA;
7079 for (size_t k = 4; k <= 20; k += 2) {
7080 SpMMMicrokernelTester()
7081 .mr(32)
7082 .nr(1)
7083 .m(32)
7084 .n(1)
7085 .k(k)
7086 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007087 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007088 }
7089 }
7090
Frank Barchardbeca6522020-10-30 22:34:35 -07007091 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, n_gt_1) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007092 TEST_REQUIRES_ARM_NEON_FMA;
7093 for (uint32_t n = 2; n < 10; n++) {
7094 for (size_t k = 1; k <= 10; k += 3) {
7095 SpMMMicrokernelTester()
7096 .mr(32)
7097 .nr(1)
7098 .m(32)
7099 .n(n)
7100 .k(k)
7101 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007102 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007103 }
7104 }
7105 }
7106
Frank Barchardbeca6522020-10-30 22:34:35 -07007107 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, m_lt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007108 TEST_REQUIRES_ARM_NEON_FMA;
7109 for (uint32_t m = 1; m < 32; m++) {
7110 for (uint32_t n = 1; n < 10; n += 2) {
7111 for (size_t k = 1; k <= 10; k += 3) {
7112 SpMMMicrokernelTester()
7113 .mr(32)
7114 .nr(1)
7115 .m(m)
7116 .n(n)
7117 .k(k)
7118 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007119 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007120 }
7121 }
7122 }
7123 }
7124
Frank Barchardbeca6522020-10-30 22:34:35 -07007125 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, m_div_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007126 TEST_REQUIRES_ARM_NEON_FMA;
7127 for (uint32_t m = 64; m <= 96; m += 32) {
7128 for (uint32_t n = 1; n < 10; n += 2) {
7129 for (size_t k = 1; k <= 10; k += 3) {
7130 SpMMMicrokernelTester()
7131 .mr(32)
7132 .nr(1)
7133 .m(m)
7134 .n(n)
7135 .k(k)
7136 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007137 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007138 }
7139 }
7140 }
7141 }
7142
Frank Barchardbeca6522020-10-30 22:34:35 -07007143 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, m_gt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007144 TEST_REQUIRES_ARM_NEON_FMA;
7145 for (uint32_t m = 33; m < 64; m++) {
7146 for (uint32_t n = 1; n < 10; n += 2) {
7147 for (size_t k = 1; k <= 10; k += 3) {
7148 SpMMMicrokernelTester()
7149 .mr(32)
7150 .nr(1)
7151 .m(m)
7152 .n(n)
7153 .k(k)
7154 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007155 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007156 }
7157 }
7158 }
7159 }
7160
Marat Dukhane8bfcc82020-11-16 12:28:13 -08007161 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, output_stride) {
7162 TEST_REQUIRES_ARM_NEON_FMA;
7163 for (uint32_t n = 1; n < 10; n += 2) {
7164 for (size_t k = 1; k <= 10; k += 3) {
7165 SpMMMicrokernelTester()
7166 .mr(32)
7167 .nr(1)
7168 .m(64)
7169 .n(n)
7170 .k(k)
7171 .output_stride(67)
7172 .sparsity(0.0f)
7173 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
7174 }
7175 }
7176 }
7177
Frank Barchardbeca6522020-10-30 22:34:35 -07007178 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, qmin) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007179 TEST_REQUIRES_ARM_NEON_FMA;
7180 for (uint32_t n = 1; n < 10; n += 2) {
7181 for (size_t k = 1; k <= 10; k += 3) {
7182 SpMMMicrokernelTester()
7183 .mr(32)
7184 .nr(1)
7185 .m(64)
7186 .n(n)
7187 .k(k)
7188 .sparsity(0.0f)
7189 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07007190 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007191 }
7192 }
7193 }
7194
Frank Barchardbeca6522020-10-30 22:34:35 -07007195 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, qmax) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007196 TEST_REQUIRES_ARM_NEON_FMA;
7197 for (uint32_t n = 1; n < 10; n += 2) {
7198 for (size_t k = 1; k <= 10; k += 3) {
7199 SpMMMicrokernelTester()
7200 .mr(32)
7201 .nr(1)
7202 .m(64)
7203 .n(n)
7204 .k(k)
7205 .sparsity(0.0f)
7206 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07007207 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007208 }
7209 }
7210 }
7211
Frank Barchardbeca6522020-10-30 22:34:35 -07007212 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, half_sparse) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007213 TEST_REQUIRES_ARM_NEON_FMA;
7214 for (uint32_t n = 1; n < 10; n += 2) {
7215 for (size_t k = 1; k <= 10; k += 3) {
7216 SpMMMicrokernelTester()
7217 .mr(32)
7218 .nr(1)
7219 .m(64)
7220 .n(n)
7221 .k(k)
7222 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007223 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007224 }
7225 }
7226 }
7227
Frank Barchardbeca6522020-10-30 22:34:35 -07007228 TEST(F32_SPMM_MINMAX_32X1__NEONFMA_X2, zero_weights) {
Frank Barchard846c0c62020-10-26 15:01:39 -07007229 TEST_REQUIRES_ARM_NEON_FMA;
7230 for (uint32_t n = 1; n < 10; n += 2) {
7231 for (size_t k = 1; k <= 10; k += 3) {
7232 SpMMMicrokernelTester()
7233 .mr(32)
7234 .nr(1)
7235 .m(64)
7236 .n(n)
7237 .k(k)
7238 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07007239 .Test(xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07007240 }
7241 }
7242 }
Marat Dukhan2fa7a0c2020-12-06 19:09:02 -08007243#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard846c0c62020-10-26 15:01:39 -07007244
7245
Marat Dukhan1dadbf72019-10-01 10:46:20 -07007246#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan355ab432020-04-09 19:01:52 -07007247 TEST(F32_SPMM_MINMAX_4X1__SSE, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007248 TEST_REQUIRES_X86_SSE;
7249 SpMMMicrokernelTester()
7250 .mr(4)
7251 .nr(1)
7252 .m(4)
7253 .n(1)
7254 .k(1)
7255 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007256 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007257 }
7258
Marat Dukhan355ab432020-04-09 19:01:52 -07007259 TEST(F32_SPMM_MINMAX_4X1__SSE, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007260 TEST_REQUIRES_X86_SSE;
7261 for (size_t k = 2; k < 10; k++) {
7262 SpMMMicrokernelTester()
7263 .mr(4)
7264 .nr(1)
7265 .m(4)
7266 .n(1)
7267 .k(k)
7268 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007269 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007270 }
7271 }
7272
Marat Dukhan355ab432020-04-09 19:01:52 -07007273 TEST(F32_SPMM_MINMAX_4X1__SSE, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007274 TEST_REQUIRES_X86_SSE;
7275 for (uint32_t n = 2; n < 10; n++) {
7276 for (size_t k = 1; k <= 5; k += 2) {
7277 SpMMMicrokernelTester()
7278 .mr(4)
7279 .nr(1)
7280 .m(4)
7281 .n(n)
7282 .k(k)
7283 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007284 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007285 }
7286 }
7287 }
7288
Marat Dukhan355ab432020-04-09 19:01:52 -07007289 TEST(F32_SPMM_MINMAX_4X1__SSE, m_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007290 TEST_REQUIRES_X86_SSE;
7291 for (uint32_t m = 1; m < 4; m++) {
7292 for (uint32_t n = 1; n < 10; n += 2) {
7293 for (size_t k = 1; k <= 5; k += 2) {
7294 SpMMMicrokernelTester()
7295 .mr(4)
7296 .nr(1)
7297 .m(m)
7298 .n(n)
7299 .k(k)
7300 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007301 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007302 }
7303 }
7304 }
7305 }
7306
Marat Dukhan355ab432020-04-09 19:01:52 -07007307 TEST(F32_SPMM_MINMAX_4X1__SSE, m_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007308 TEST_REQUIRES_X86_SSE;
7309 for (uint32_t m = 8; m <= 12; m += 4) {
7310 for (uint32_t n = 1; n < 10; n += 2) {
7311 for (size_t k = 1; k <= 5; k += 2) {
7312 SpMMMicrokernelTester()
7313 .mr(4)
7314 .nr(1)
7315 .m(m)
7316 .n(n)
7317 .k(k)
7318 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007319 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007320 }
7321 }
7322 }
7323 }
7324
Marat Dukhan355ab432020-04-09 19:01:52 -07007325 TEST(F32_SPMM_MINMAX_4X1__SSE, m_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007326 TEST_REQUIRES_X86_SSE;
7327 for (uint32_t m = 5; m < 8; m++) {
7328 for (uint32_t n = 1; n < 10; n += 2) {
7329 for (size_t k = 1; k <= 5; k += 2) {
7330 SpMMMicrokernelTester()
7331 .mr(4)
7332 .nr(1)
7333 .m(m)
7334 .n(n)
7335 .k(k)
7336 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007337 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007338 }
7339 }
7340 }
7341 }
7342
Marat Dukhane8bfcc82020-11-16 12:28:13 -08007343 TEST(F32_SPMM_MINMAX_4X1__SSE, output_stride) {
7344 TEST_REQUIRES_X86_SSE;
7345 for (uint32_t n = 1; n < 10; n += 2) {
7346 for (size_t k = 1; k <= 5; k += 2) {
7347 SpMMMicrokernelTester()
7348 .mr(4)
7349 .nr(1)
7350 .m(8)
7351 .n(n)
7352 .k(k)
7353 .output_stride(11)
7354 .sparsity(0.0f)
7355 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
7356 }
7357 }
7358 }
7359
Marat Dukhan355ab432020-04-09 19:01:52 -07007360 TEST(F32_SPMM_MINMAX_4X1__SSE, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007361 TEST_REQUIRES_X86_SSE;
7362 for (uint32_t n = 1; n < 10; n += 2) {
7363 for (size_t k = 1; k <= 5; k += 2) {
7364 SpMMMicrokernelTester()
7365 .mr(4)
7366 .nr(1)
7367 .m(8)
7368 .n(n)
7369 .k(k)
7370 .sparsity(0.0f)
7371 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07007372 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007373 }
7374 }
7375 }
7376
Marat Dukhan355ab432020-04-09 19:01:52 -07007377 TEST(F32_SPMM_MINMAX_4X1__SSE, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007378 TEST_REQUIRES_X86_SSE;
7379 for (uint32_t n = 1; n < 10; n += 2) {
7380 for (size_t k = 1; k <= 5; k += 2) {
7381 SpMMMicrokernelTester()
7382 .mr(4)
7383 .nr(1)
7384 .m(8)
7385 .n(n)
7386 .k(k)
7387 .sparsity(0.0f)
7388 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07007389 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007390 }
7391 }
7392 }
7393
Marat Dukhan355ab432020-04-09 19:01:52 -07007394 TEST(F32_SPMM_MINMAX_4X1__SSE, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007395 TEST_REQUIRES_X86_SSE;
7396 for (uint32_t n = 1; n < 10; n += 2) {
7397 for (size_t k = 1; k <= 5; k += 2) {
7398 SpMMMicrokernelTester()
7399 .mr(4)
7400 .nr(1)
7401 .m(8)
7402 .n(n)
7403 .k(k)
7404 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007405 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007406 }
7407 }
7408 }
7409
Marat Dukhan355ab432020-04-09 19:01:52 -07007410 TEST(F32_SPMM_MINMAX_4X1__SSE, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007411 TEST_REQUIRES_X86_SSE;
7412 for (uint32_t n = 1; n < 10; n += 2) {
7413 for (size_t k = 1; k <= 5; k += 2) {
7414 SpMMMicrokernelTester()
7415 .mr(4)
7416 .nr(1)
7417 .m(8)
7418 .n(n)
7419 .k(k)
7420 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007421 .Test(xnn_f32_spmm_minmax_ukernel_4x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007422 }
7423 }
7424 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07007425#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07007426
7427
Marat Dukhan1dadbf72019-10-01 10:46:20 -07007428#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan355ab432020-04-09 19:01:52 -07007429 TEST(F32_SPMM_MINMAX_8X1__SSE, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007430 TEST_REQUIRES_X86_SSE;
7431 SpMMMicrokernelTester()
7432 .mr(8)
7433 .nr(1)
7434 .m(8)
7435 .n(1)
7436 .k(1)
7437 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007438 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007439 }
7440
Marat Dukhan355ab432020-04-09 19:01:52 -07007441 TEST(F32_SPMM_MINMAX_8X1__SSE, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007442 TEST_REQUIRES_X86_SSE;
7443 for (size_t k = 2; k < 10; k++) {
7444 SpMMMicrokernelTester()
7445 .mr(8)
7446 .nr(1)
7447 .m(8)
7448 .n(1)
7449 .k(k)
7450 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007451 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007452 }
7453 }
7454
Marat Dukhan355ab432020-04-09 19:01:52 -07007455 TEST(F32_SPMM_MINMAX_8X1__SSE, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007456 TEST_REQUIRES_X86_SSE;
7457 for (uint32_t n = 2; n < 10; n++) {
7458 for (size_t k = 1; k <= 5; k += 2) {
7459 SpMMMicrokernelTester()
7460 .mr(8)
7461 .nr(1)
7462 .m(8)
7463 .n(n)
7464 .k(k)
7465 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007466 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007467 }
7468 }
7469 }
7470
Marat Dukhan355ab432020-04-09 19:01:52 -07007471 TEST(F32_SPMM_MINMAX_8X1__SSE, m_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007472 TEST_REQUIRES_X86_SSE;
7473 for (uint32_t m = 1; m < 8; m++) {
7474 for (uint32_t n = 1; n < 10; n += 2) {
7475 for (size_t k = 1; k <= 5; k += 2) {
7476 SpMMMicrokernelTester()
7477 .mr(8)
7478 .nr(1)
7479 .m(m)
7480 .n(n)
7481 .k(k)
7482 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007483 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007484 }
7485 }
7486 }
7487 }
7488
Marat Dukhan355ab432020-04-09 19:01:52 -07007489 TEST(F32_SPMM_MINMAX_8X1__SSE, m_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007490 TEST_REQUIRES_X86_SSE;
7491 for (uint32_t m = 16; m <= 24; m += 8) {
7492 for (uint32_t n = 1; n < 10; n += 2) {
7493 for (size_t k = 1; k <= 5; k += 2) {
7494 SpMMMicrokernelTester()
7495 .mr(8)
7496 .nr(1)
7497 .m(m)
7498 .n(n)
7499 .k(k)
7500 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007501 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007502 }
7503 }
7504 }
7505 }
7506
Marat Dukhan355ab432020-04-09 19:01:52 -07007507 TEST(F32_SPMM_MINMAX_8X1__SSE, m_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007508 TEST_REQUIRES_X86_SSE;
7509 for (uint32_t m = 9; m < 16; m++) {
7510 for (uint32_t n = 1; n < 10; n += 2) {
7511 for (size_t k = 1; k <= 5; k += 2) {
7512 SpMMMicrokernelTester()
7513 .mr(8)
7514 .nr(1)
7515 .m(m)
7516 .n(n)
7517 .k(k)
7518 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007519 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007520 }
7521 }
7522 }
7523 }
7524
Marat Dukhane8bfcc82020-11-16 12:28:13 -08007525 TEST(F32_SPMM_MINMAX_8X1__SSE, output_stride) {
7526 TEST_REQUIRES_X86_SSE;
7527 for (uint32_t n = 1; n < 10; n += 2) {
7528 for (size_t k = 1; k <= 5; k += 2) {
7529 SpMMMicrokernelTester()
7530 .mr(8)
7531 .nr(1)
7532 .m(16)
7533 .n(n)
7534 .k(k)
7535 .output_stride(19)
7536 .sparsity(0.0f)
7537 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
7538 }
7539 }
7540 }
7541
Marat Dukhan355ab432020-04-09 19:01:52 -07007542 TEST(F32_SPMM_MINMAX_8X1__SSE, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007543 TEST_REQUIRES_X86_SSE;
7544 for (uint32_t n = 1; n < 10; n += 2) {
7545 for (size_t k = 1; k <= 5; k += 2) {
7546 SpMMMicrokernelTester()
7547 .mr(8)
7548 .nr(1)
7549 .m(16)
7550 .n(n)
7551 .k(k)
7552 .sparsity(0.0f)
7553 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07007554 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007555 }
7556 }
7557 }
7558
Marat Dukhan355ab432020-04-09 19:01:52 -07007559 TEST(F32_SPMM_MINMAX_8X1__SSE, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007560 TEST_REQUIRES_X86_SSE;
7561 for (uint32_t n = 1; n < 10; n += 2) {
7562 for (size_t k = 1; k <= 5; k += 2) {
7563 SpMMMicrokernelTester()
7564 .mr(8)
7565 .nr(1)
7566 .m(16)
7567 .n(n)
7568 .k(k)
7569 .sparsity(0.0f)
7570 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -07007571 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007572 }
7573 }
7574 }
7575
Marat Dukhan355ab432020-04-09 19:01:52 -07007576 TEST(F32_SPMM_MINMAX_8X1__SSE, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007577 TEST_REQUIRES_X86_SSE;
7578 for (uint32_t n = 1; n < 10; n += 2) {
7579 for (size_t k = 1; k <= 5; k += 2) {
7580 SpMMMicrokernelTester()
7581 .mr(8)
7582 .nr(1)
7583 .m(16)
7584 .n(n)
7585 .k(k)
7586 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007587 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007588 }
7589 }
7590 }
7591
Marat Dukhan355ab432020-04-09 19:01:52 -07007592 TEST(F32_SPMM_MINMAX_8X1__SSE, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07007593 TEST_REQUIRES_X86_SSE;
7594 for (uint32_t n = 1; n < 10; n += 2) {
7595 for (size_t k = 1; k <= 5; k += 2) {
7596 SpMMMicrokernelTester()
7597 .mr(8)
7598 .nr(1)
7599 .m(16)
7600 .n(n)
7601 .k(k)
7602 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -07007603 .Test(xnn_f32_spmm_minmax_ukernel_8x1__sse);
XNNPACK Teamb455b122019-09-27 18:10:33 -07007604 }
7605 }
7606 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07007607#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07007608
7609
Erich Elsen6e80fdc2020-06-09 15:35:37 -07007610#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7611 TEST(F32_SPMM_MINMAX_16X1__SSE, k_eq_1) {
7612 TEST_REQUIRES_X86_SSE;
7613 SpMMMicrokernelTester()
7614 .mr(16)
7615 .nr(1)
7616 .m(16)
7617 .n(1)
7618 .k(1)
7619 .sparsity(0.0f)
7620 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7621 }
7622
7623 TEST(F32_SPMM_MINMAX_16X1__SSE, k_gt_1) {
7624 TEST_REQUIRES_X86_SSE;
7625 for (size_t k = 2; k < 10; k++) {
7626 SpMMMicrokernelTester()
7627 .mr(16)
7628 .nr(1)
7629 .m(16)
7630 .n(1)
7631 .k(k)
7632 .sparsity(0.0f)
7633 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7634 }
7635 }
7636
7637 TEST(F32_SPMM_MINMAX_16X1__SSE, n_gt_1) {
7638 TEST_REQUIRES_X86_SSE;
7639 for (uint32_t n = 2; n < 10; n++) {
7640 for (size_t k = 1; k <= 5; k += 2) {
7641 SpMMMicrokernelTester()
7642 .mr(16)
7643 .nr(1)
7644 .m(16)
7645 .n(n)
7646 .k(k)
7647 .sparsity(0.0f)
7648 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7649 }
7650 }
7651 }
7652
7653 TEST(F32_SPMM_MINMAX_16X1__SSE, m_lt_16) {
7654 TEST_REQUIRES_X86_SSE;
7655 for (uint32_t m = 1; m < 16; m++) {
7656 for (uint32_t n = 1; n < 10; n += 2) {
7657 for (size_t k = 1; k <= 5; k += 2) {
7658 SpMMMicrokernelTester()
7659 .mr(16)
7660 .nr(1)
7661 .m(m)
7662 .n(n)
7663 .k(k)
7664 .sparsity(0.0f)
7665 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7666 }
7667 }
7668 }
7669 }
7670
7671 TEST(F32_SPMM_MINMAX_16X1__SSE, m_div_16) {
7672 TEST_REQUIRES_X86_SSE;
7673 for (uint32_t m = 32; m <= 48; m += 16) {
7674 for (uint32_t n = 1; n < 10; n += 2) {
7675 for (size_t k = 1; k <= 5; k += 2) {
7676 SpMMMicrokernelTester()
7677 .mr(16)
7678 .nr(1)
7679 .m(m)
7680 .n(n)
7681 .k(k)
7682 .sparsity(0.0f)
7683 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7684 }
7685 }
7686 }
7687 }
7688
7689 TEST(F32_SPMM_MINMAX_16X1__SSE, m_gt_16) {
7690 TEST_REQUIRES_X86_SSE;
7691 for (uint32_t m = 17; m < 32; m++) {
7692 for (uint32_t n = 1; n < 10; n += 2) {
7693 for (size_t k = 1; k <= 5; k += 2) {
7694 SpMMMicrokernelTester()
7695 .mr(16)
7696 .nr(1)
7697 .m(m)
7698 .n(n)
7699 .k(k)
7700 .sparsity(0.0f)
7701 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7702 }
7703 }
7704 }
7705 }
7706
Marat Dukhane8bfcc82020-11-16 12:28:13 -08007707 TEST(F32_SPMM_MINMAX_16X1__SSE, output_stride) {
7708 TEST_REQUIRES_X86_SSE;
7709 for (uint32_t n = 1; n < 10; n += 2) {
7710 for (size_t k = 1; k <= 5; k += 2) {
7711 SpMMMicrokernelTester()
7712 .mr(16)
7713 .nr(1)
7714 .m(32)
7715 .n(n)
7716 .k(k)
7717 .output_stride(37)
7718 .sparsity(0.0f)
7719 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7720 }
7721 }
7722 }
7723
Erich Elsen6e80fdc2020-06-09 15:35:37 -07007724 TEST(F32_SPMM_MINMAX_16X1__SSE, qmin) {
7725 TEST_REQUIRES_X86_SSE;
7726 for (uint32_t n = 1; n < 10; n += 2) {
7727 for (size_t k = 1; k <= 5; k += 2) {
7728 SpMMMicrokernelTester()
7729 .mr(16)
7730 .nr(1)
7731 .m(32)
7732 .n(n)
7733 .k(k)
7734 .sparsity(0.0f)
7735 .qmin(128)
7736 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7737 }
7738 }
7739 }
7740
7741 TEST(F32_SPMM_MINMAX_16X1__SSE, qmax) {
7742 TEST_REQUIRES_X86_SSE;
7743 for (uint32_t n = 1; n < 10; n += 2) {
7744 for (size_t k = 1; k <= 5; k += 2) {
7745 SpMMMicrokernelTester()
7746 .mr(16)
7747 .nr(1)
7748 .m(32)
7749 .n(n)
7750 .k(k)
7751 .sparsity(0.0f)
7752 .qmax(128)
7753 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7754 }
7755 }
7756 }
7757
7758 TEST(F32_SPMM_MINMAX_16X1__SSE, half_sparse) {
7759 TEST_REQUIRES_X86_SSE;
7760 for (uint32_t n = 1; n < 10; n += 2) {
7761 for (size_t k = 1; k <= 5; k += 2) {
7762 SpMMMicrokernelTester()
7763 .mr(16)
7764 .nr(1)
7765 .m(32)
7766 .n(n)
7767 .k(k)
7768 .sparsity(0.5f)
7769 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7770 }
7771 }
7772 }
7773
7774 TEST(F32_SPMM_MINMAX_16X1__SSE, zero_weights) {
7775 TEST_REQUIRES_X86_SSE;
7776 for (uint32_t n = 1; n < 10; n += 2) {
7777 for (size_t k = 1; k <= 5; k += 2) {
7778 SpMMMicrokernelTester()
7779 .mr(16)
7780 .nr(1)
7781 .m(32)
7782 .n(n)
7783 .k(k)
7784 .sparsity(1.0f)
7785 .Test(xnn_f32_spmm_minmax_ukernel_16x1__sse);
7786 }
7787 }
7788 }
7789#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7790
7791
Frank Barchard846c0c62020-10-26 15:01:39 -07007792#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7793 TEST(F32_SPMM_MINMAX_32X1__SSE, k_eq_1) {
7794 TEST_REQUIRES_X86_SSE;
7795 SpMMMicrokernelTester()
7796 .mr(32)
7797 .nr(1)
7798 .m(32)
7799 .n(1)
7800 .k(1)
7801 .sparsity(0.0f)
7802 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7803 }
7804
7805 TEST(F32_SPMM_MINMAX_32X1__SSE, k_gt_1) {
7806 TEST_REQUIRES_X86_SSE;
7807 for (size_t k = 2; k < 10; k++) {
7808 SpMMMicrokernelTester()
7809 .mr(32)
7810 .nr(1)
7811 .m(32)
7812 .n(1)
7813 .k(k)
7814 .sparsity(0.0f)
7815 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7816 }
7817 }
7818
7819 TEST(F32_SPMM_MINMAX_32X1__SSE, n_gt_1) {
7820 TEST_REQUIRES_X86_SSE;
7821 for (uint32_t n = 2; n < 10; n++) {
7822 for (size_t k = 1; k <= 5; k += 2) {
7823 SpMMMicrokernelTester()
7824 .mr(32)
7825 .nr(1)
7826 .m(32)
7827 .n(n)
7828 .k(k)
7829 .sparsity(0.0f)
7830 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7831 }
7832 }
7833 }
7834
7835 TEST(F32_SPMM_MINMAX_32X1__SSE, m_lt_32) {
7836 TEST_REQUIRES_X86_SSE;
7837 for (uint32_t m = 1; m < 32; m++) {
7838 for (uint32_t n = 1; n < 10; n += 2) {
7839 for (size_t k = 1; k <= 5; k += 2) {
7840 SpMMMicrokernelTester()
7841 .mr(32)
7842 .nr(1)
7843 .m(m)
7844 .n(n)
7845 .k(k)
7846 .sparsity(0.0f)
7847 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7848 }
7849 }
7850 }
7851 }
7852
7853 TEST(F32_SPMM_MINMAX_32X1__SSE, m_div_32) {
7854 TEST_REQUIRES_X86_SSE;
7855 for (uint32_t m = 64; m <= 96; m += 32) {
7856 for (uint32_t n = 1; n < 10; n += 2) {
7857 for (size_t k = 1; k <= 5; k += 2) {
7858 SpMMMicrokernelTester()
7859 .mr(32)
7860 .nr(1)
7861 .m(m)
7862 .n(n)
7863 .k(k)
7864 .sparsity(0.0f)
7865 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7866 }
7867 }
7868 }
7869 }
7870
7871 TEST(F32_SPMM_MINMAX_32X1__SSE, m_gt_32) {
7872 TEST_REQUIRES_X86_SSE;
7873 for (uint32_t m = 33; m < 64; m++) {
7874 for (uint32_t n = 1; n < 10; n += 2) {
7875 for (size_t k = 1; k <= 5; k += 2) {
7876 SpMMMicrokernelTester()
7877 .mr(32)
7878 .nr(1)
7879 .m(m)
7880 .n(n)
7881 .k(k)
7882 .sparsity(0.0f)
7883 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7884 }
7885 }
7886 }
7887 }
7888
Marat Dukhane8bfcc82020-11-16 12:28:13 -08007889 TEST(F32_SPMM_MINMAX_32X1__SSE, output_stride) {
7890 TEST_REQUIRES_X86_SSE;
7891 for (uint32_t n = 1; n < 10; n += 2) {
7892 for (size_t k = 1; k <= 5; k += 2) {
7893 SpMMMicrokernelTester()
7894 .mr(32)
7895 .nr(1)
7896 .m(64)
7897 .n(n)
7898 .k(k)
7899 .output_stride(67)
7900 .sparsity(0.0f)
7901 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7902 }
7903 }
7904 }
7905
Frank Barchard846c0c62020-10-26 15:01:39 -07007906 TEST(F32_SPMM_MINMAX_32X1__SSE, qmin) {
7907 TEST_REQUIRES_X86_SSE;
7908 for (uint32_t n = 1; n < 10; n += 2) {
7909 for (size_t k = 1; k <= 5; k += 2) {
7910 SpMMMicrokernelTester()
7911 .mr(32)
7912 .nr(1)
7913 .m(64)
7914 .n(n)
7915 .k(k)
7916 .sparsity(0.0f)
7917 .qmin(128)
7918 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7919 }
7920 }
7921 }
7922
7923 TEST(F32_SPMM_MINMAX_32X1__SSE, qmax) {
7924 TEST_REQUIRES_X86_SSE;
7925 for (uint32_t n = 1; n < 10; n += 2) {
7926 for (size_t k = 1; k <= 5; k += 2) {
7927 SpMMMicrokernelTester()
7928 .mr(32)
7929 .nr(1)
7930 .m(64)
7931 .n(n)
7932 .k(k)
7933 .sparsity(0.0f)
7934 .qmax(128)
7935 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7936 }
7937 }
7938 }
7939
7940 TEST(F32_SPMM_MINMAX_32X1__SSE, half_sparse) {
7941 TEST_REQUIRES_X86_SSE;
7942 for (uint32_t n = 1; n < 10; n += 2) {
7943 for (size_t k = 1; k <= 5; k += 2) {
7944 SpMMMicrokernelTester()
7945 .mr(32)
7946 .nr(1)
7947 .m(64)
7948 .n(n)
7949 .k(k)
7950 .sparsity(0.5f)
7951 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7952 }
7953 }
7954 }
7955
7956 TEST(F32_SPMM_MINMAX_32X1__SSE, zero_weights) {
7957 TEST_REQUIRES_X86_SSE;
7958 for (uint32_t n = 1; n < 10; n += 2) {
7959 for (size_t k = 1; k <= 5; k += 2) {
7960 SpMMMicrokernelTester()
7961 .mr(32)
7962 .nr(1)
7963 .m(64)
7964 .n(n)
7965 .k(k)
7966 .sparsity(1.0f)
7967 .Test(xnn_f32_spmm_minmax_ukernel_32x1__sse);
7968 }
7969 }
7970 }
7971#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7972
7973
Frank Barchard9e053402020-10-19 15:29:08 -07007974#if XNN_ARCH_WASMSIMD
7975 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, k_eq_1) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07007976 SpMMMicrokernelTester()
7977 .mr(4)
7978 .nr(1)
7979 .m(4)
7980 .n(1)
7981 .k(1)
7982 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07007983 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07007984 }
7985
Frank Barchard9e053402020-10-19 15:29:08 -07007986 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, k_gt_1) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07007987 for (size_t k = 2; k < 10; k++) {
7988 SpMMMicrokernelTester()
7989 .mr(4)
7990 .nr(1)
7991 .m(4)
7992 .n(1)
7993 .k(k)
7994 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07007995 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07007996 }
7997 }
7998
Frank Barchard9e053402020-10-19 15:29:08 -07007999 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, n_gt_1) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008000 for (uint32_t n = 2; n < 10; n++) {
8001 for (size_t k = 1; k <= 5; k += 2) {
8002 SpMMMicrokernelTester()
8003 .mr(4)
8004 .nr(1)
8005 .m(4)
8006 .n(n)
8007 .k(k)
8008 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008009 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008010 }
8011 }
8012 }
8013
Frank Barchard9e053402020-10-19 15:29:08 -07008014 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, m_lt_4) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008015 for (uint32_t m = 1; m < 4; m++) {
8016 for (uint32_t n = 1; n < 10; n += 2) {
8017 for (size_t k = 1; k <= 5; k += 2) {
8018 SpMMMicrokernelTester()
8019 .mr(4)
8020 .nr(1)
8021 .m(m)
8022 .n(n)
8023 .k(k)
8024 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008025 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008026 }
8027 }
8028 }
8029 }
8030
Frank Barchard9e053402020-10-19 15:29:08 -07008031 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, m_div_4) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008032 for (uint32_t m = 8; m <= 12; m += 4) {
8033 for (uint32_t n = 1; n < 10; n += 2) {
8034 for (size_t k = 1; k <= 5; k += 2) {
8035 SpMMMicrokernelTester()
8036 .mr(4)
8037 .nr(1)
8038 .m(m)
8039 .n(n)
8040 .k(k)
8041 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008042 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008043 }
8044 }
8045 }
8046 }
8047
Frank Barchard9e053402020-10-19 15:29:08 -07008048 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, m_gt_4) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008049 for (uint32_t m = 5; m < 8; m++) {
8050 for (uint32_t n = 1; n < 10; n += 2) {
8051 for (size_t k = 1; k <= 5; k += 2) {
8052 SpMMMicrokernelTester()
8053 .mr(4)
8054 .nr(1)
8055 .m(m)
8056 .n(n)
8057 .k(k)
8058 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008059 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008060 }
8061 }
8062 }
8063 }
8064
Marat Dukhane8bfcc82020-11-16 12:28:13 -08008065 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, output_stride) {
8066 for (uint32_t n = 1; n < 10; n += 2) {
8067 for (size_t k = 1; k <= 5; k += 2) {
8068 SpMMMicrokernelTester()
8069 .mr(4)
8070 .nr(1)
8071 .m(8)
8072 .n(n)
8073 .k(k)
8074 .output_stride(11)
8075 .sparsity(0.0f)
8076 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
8077 }
8078 }
8079 }
8080
Frank Barchard9e053402020-10-19 15:29:08 -07008081 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, qmin) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008082 for (uint32_t n = 1; n < 10; n += 2) {
8083 for (size_t k = 1; k <= 5; k += 2) {
8084 SpMMMicrokernelTester()
8085 .mr(4)
8086 .nr(1)
8087 .m(8)
8088 .n(n)
8089 .k(k)
8090 .sparsity(0.0f)
8091 .qmin(128)
Frank Barchard9e053402020-10-19 15:29:08 -07008092 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008093 }
8094 }
8095 }
8096
Frank Barchard9e053402020-10-19 15:29:08 -07008097 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, qmax) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008098 for (uint32_t n = 1; n < 10; n += 2) {
8099 for (size_t k = 1; k <= 5; k += 2) {
8100 SpMMMicrokernelTester()
8101 .mr(4)
8102 .nr(1)
8103 .m(8)
8104 .n(n)
8105 .k(k)
8106 .sparsity(0.0f)
8107 .qmax(128)
Frank Barchard9e053402020-10-19 15:29:08 -07008108 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008109 }
8110 }
8111 }
8112
Frank Barchard9e053402020-10-19 15:29:08 -07008113 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, half_sparse) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008114 for (uint32_t n = 1; n < 10; n += 2) {
8115 for (size_t k = 1; k <= 5; k += 2) {
8116 SpMMMicrokernelTester()
8117 .mr(4)
8118 .nr(1)
8119 .m(8)
8120 .n(n)
8121 .k(k)
8122 .sparsity(0.5f)
Frank Barchard9e053402020-10-19 15:29:08 -07008123 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008124 }
8125 }
8126 }
8127
Frank Barchard9e053402020-10-19 15:29:08 -07008128 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM, zero_weights) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008129 for (uint32_t n = 1; n < 10; n += 2) {
8130 for (size_t k = 1; k <= 5; k += 2) {
8131 SpMMMicrokernelTester()
8132 .mr(4)
8133 .nr(1)
8134 .m(8)
8135 .n(n)
8136 .k(k)
8137 .sparsity(1.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008138 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008139 }
8140 }
8141 }
Frank Barchard9e053402020-10-19 15:29:08 -07008142#endif // XNN_ARCH_WASMSIMD
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008143
8144
Frank Barchard9e053402020-10-19 15:29:08 -07008145#if XNN_ARCH_WASMSIMD
8146 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, k_eq_1) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008147 SpMMMicrokernelTester()
8148 .mr(8)
8149 .nr(1)
8150 .m(8)
8151 .n(1)
8152 .k(1)
8153 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008154 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008155 }
8156
Frank Barchard9e053402020-10-19 15:29:08 -07008157 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, k_gt_1) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008158 for (size_t k = 2; k < 10; k++) {
8159 SpMMMicrokernelTester()
8160 .mr(8)
8161 .nr(1)
8162 .m(8)
8163 .n(1)
8164 .k(k)
8165 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008166 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008167 }
8168 }
8169
Frank Barchard9e053402020-10-19 15:29:08 -07008170 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, n_gt_1) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008171 for (uint32_t n = 2; n < 10; n++) {
8172 for (size_t k = 1; k <= 5; k += 2) {
8173 SpMMMicrokernelTester()
8174 .mr(8)
8175 .nr(1)
8176 .m(8)
8177 .n(n)
8178 .k(k)
8179 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008180 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008181 }
8182 }
8183 }
8184
Frank Barchard9e053402020-10-19 15:29:08 -07008185 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, m_lt_8) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008186 for (uint32_t m = 1; m < 8; m++) {
8187 for (uint32_t n = 1; n < 10; n += 2) {
8188 for (size_t k = 1; k <= 5; k += 2) {
8189 SpMMMicrokernelTester()
8190 .mr(8)
8191 .nr(1)
8192 .m(m)
8193 .n(n)
8194 .k(k)
8195 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008196 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008197 }
8198 }
8199 }
8200 }
8201
Frank Barchard9e053402020-10-19 15:29:08 -07008202 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, m_div_8) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008203 for (uint32_t m = 16; m <= 24; m += 8) {
8204 for (uint32_t n = 1; n < 10; n += 2) {
8205 for (size_t k = 1; k <= 5; k += 2) {
8206 SpMMMicrokernelTester()
8207 .mr(8)
8208 .nr(1)
8209 .m(m)
8210 .n(n)
8211 .k(k)
8212 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008213 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008214 }
8215 }
8216 }
8217 }
8218
Frank Barchard9e053402020-10-19 15:29:08 -07008219 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, m_gt_8) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008220 for (uint32_t m = 9; m < 16; m++) {
8221 for (uint32_t n = 1; n < 10; n += 2) {
8222 for (size_t k = 1; k <= 5; k += 2) {
8223 SpMMMicrokernelTester()
8224 .mr(8)
8225 .nr(1)
8226 .m(m)
8227 .n(n)
8228 .k(k)
8229 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008230 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008231 }
8232 }
8233 }
8234 }
8235
Marat Dukhane8bfcc82020-11-16 12:28:13 -08008236 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, output_stride) {
8237 for (uint32_t n = 1; n < 10; n += 2) {
8238 for (size_t k = 1; k <= 5; k += 2) {
8239 SpMMMicrokernelTester()
8240 .mr(8)
8241 .nr(1)
8242 .m(16)
8243 .n(n)
8244 .k(k)
8245 .output_stride(19)
8246 .sparsity(0.0f)
8247 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
8248 }
8249 }
8250 }
8251
Frank Barchard9e053402020-10-19 15:29:08 -07008252 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, qmin) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008253 for (uint32_t n = 1; n < 10; n += 2) {
8254 for (size_t k = 1; k <= 5; k += 2) {
8255 SpMMMicrokernelTester()
8256 .mr(8)
8257 .nr(1)
8258 .m(16)
8259 .n(n)
8260 .k(k)
8261 .sparsity(0.0f)
8262 .qmin(128)
Frank Barchard9e053402020-10-19 15:29:08 -07008263 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008264 }
8265 }
8266 }
8267
Frank Barchard9e053402020-10-19 15:29:08 -07008268 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, qmax) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008269 for (uint32_t n = 1; n < 10; n += 2) {
8270 for (size_t k = 1; k <= 5; k += 2) {
8271 SpMMMicrokernelTester()
8272 .mr(8)
8273 .nr(1)
8274 .m(16)
8275 .n(n)
8276 .k(k)
8277 .sparsity(0.0f)
8278 .qmax(128)
Frank Barchard9e053402020-10-19 15:29:08 -07008279 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008280 }
8281 }
8282 }
8283
Frank Barchard9e053402020-10-19 15:29:08 -07008284 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, half_sparse) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008285 for (uint32_t n = 1; n < 10; n += 2) {
8286 for (size_t k = 1; k <= 5; k += 2) {
8287 SpMMMicrokernelTester()
8288 .mr(8)
8289 .nr(1)
8290 .m(16)
8291 .n(n)
8292 .k(k)
8293 .sparsity(0.5f)
Frank Barchard9e053402020-10-19 15:29:08 -07008294 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008295 }
8296 }
8297 }
8298
Frank Barchard9e053402020-10-19 15:29:08 -07008299 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM, zero_weights) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008300 for (uint32_t n = 1; n < 10; n += 2) {
8301 for (size_t k = 1; k <= 5; k += 2) {
8302 SpMMMicrokernelTester()
8303 .mr(8)
8304 .nr(1)
8305 .m(16)
8306 .n(n)
8307 .k(k)
8308 .sparsity(1.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008309 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008310 }
8311 }
8312 }
Frank Barchard9e053402020-10-19 15:29:08 -07008313#endif // XNN_ARCH_WASMSIMD
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008314
8315
Frank Barchard9e053402020-10-19 15:29:08 -07008316#if XNN_ARCH_WASMSIMD
8317 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, k_eq_1) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008318 SpMMMicrokernelTester()
8319 .mr(16)
8320 .nr(1)
8321 .m(16)
8322 .n(1)
8323 .k(1)
8324 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008325 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008326 }
8327
Frank Barchard9e053402020-10-19 15:29:08 -07008328 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, k_gt_1) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008329 for (size_t k = 2; k < 10; k++) {
8330 SpMMMicrokernelTester()
8331 .mr(16)
8332 .nr(1)
8333 .m(16)
8334 .n(1)
8335 .k(k)
8336 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008337 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008338 }
8339 }
8340
Frank Barchard9e053402020-10-19 15:29:08 -07008341 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, n_gt_1) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008342 for (uint32_t n = 2; n < 10; n++) {
8343 for (size_t k = 1; k <= 5; k += 2) {
8344 SpMMMicrokernelTester()
8345 .mr(16)
8346 .nr(1)
8347 .m(16)
8348 .n(n)
8349 .k(k)
8350 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008351 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008352 }
8353 }
8354 }
8355
Frank Barchard9e053402020-10-19 15:29:08 -07008356 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, m_lt_16) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008357 for (uint32_t m = 1; m < 16; m++) {
8358 for (uint32_t n = 1; n < 10; n += 2) {
8359 for (size_t k = 1; k <= 5; k += 2) {
8360 SpMMMicrokernelTester()
8361 .mr(16)
8362 .nr(1)
8363 .m(m)
8364 .n(n)
8365 .k(k)
8366 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008367 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008368 }
8369 }
8370 }
8371 }
8372
Frank Barchard9e053402020-10-19 15:29:08 -07008373 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, m_div_16) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008374 for (uint32_t m = 32; m <= 48; m += 16) {
8375 for (uint32_t n = 1; n < 10; n += 2) {
8376 for (size_t k = 1; k <= 5; k += 2) {
8377 SpMMMicrokernelTester()
8378 .mr(16)
8379 .nr(1)
8380 .m(m)
8381 .n(n)
8382 .k(k)
8383 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008384 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008385 }
8386 }
8387 }
8388 }
8389
Frank Barchard9e053402020-10-19 15:29:08 -07008390 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, m_gt_16) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008391 for (uint32_t m = 17; m < 32; m++) {
8392 for (uint32_t n = 1; n < 10; n += 2) {
8393 for (size_t k = 1; k <= 5; k += 2) {
8394 SpMMMicrokernelTester()
8395 .mr(16)
8396 .nr(1)
8397 .m(m)
8398 .n(n)
8399 .k(k)
8400 .sparsity(0.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008401 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008402 }
8403 }
8404 }
8405 }
8406
Marat Dukhane8bfcc82020-11-16 12:28:13 -08008407 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, output_stride) {
8408 for (uint32_t n = 1; n < 10; n += 2) {
8409 for (size_t k = 1; k <= 5; k += 2) {
8410 SpMMMicrokernelTester()
8411 .mr(16)
8412 .nr(1)
8413 .m(32)
8414 .n(n)
8415 .k(k)
8416 .output_stride(37)
8417 .sparsity(0.0f)
8418 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
8419 }
8420 }
8421 }
8422
Frank Barchard9e053402020-10-19 15:29:08 -07008423 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, qmin) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008424 for (uint32_t n = 1; n < 10; n += 2) {
8425 for (size_t k = 1; k <= 5; k += 2) {
8426 SpMMMicrokernelTester()
8427 .mr(16)
8428 .nr(1)
8429 .m(32)
8430 .n(n)
8431 .k(k)
8432 .sparsity(0.0f)
8433 .qmin(128)
Frank Barchard9e053402020-10-19 15:29:08 -07008434 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008435 }
8436 }
8437 }
8438
Frank Barchard9e053402020-10-19 15:29:08 -07008439 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, qmax) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008440 for (uint32_t n = 1; n < 10; n += 2) {
8441 for (size_t k = 1; k <= 5; k += 2) {
8442 SpMMMicrokernelTester()
8443 .mr(16)
8444 .nr(1)
8445 .m(32)
8446 .n(n)
8447 .k(k)
8448 .sparsity(0.0f)
8449 .qmax(128)
Frank Barchard9e053402020-10-19 15:29:08 -07008450 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008451 }
8452 }
8453 }
8454
Frank Barchard9e053402020-10-19 15:29:08 -07008455 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, half_sparse) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008456 for (uint32_t n = 1; n < 10; n += 2) {
8457 for (size_t k = 1; k <= 5; k += 2) {
8458 SpMMMicrokernelTester()
8459 .mr(16)
8460 .nr(1)
8461 .m(32)
8462 .n(n)
8463 .k(k)
8464 .sparsity(0.5f)
Frank Barchard9e053402020-10-19 15:29:08 -07008465 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008466 }
8467 }
8468 }
8469
Frank Barchard9e053402020-10-19 15:29:08 -07008470 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM, zero_weights) {
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008471 for (uint32_t n = 1; n < 10; n += 2) {
8472 for (size_t k = 1; k <= 5; k += 2) {
8473 SpMMMicrokernelTester()
8474 .mr(16)
8475 .nr(1)
8476 .m(32)
8477 .n(n)
8478 .k(k)
8479 .sparsity(1.0f)
Frank Barchard9e053402020-10-19 15:29:08 -07008480 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm);
Erich Elsen6e80fdc2020-06-09 15:35:37 -07008481 }
8482 }
8483 }
Frank Barchard9e053402020-10-19 15:29:08 -07008484#endif // XNN_ARCH_WASMSIMD
8485
8486
8487#if XNN_ARCH_WASMSIMD
Frank Barchard846c0c62020-10-26 15:01:39 -07008488 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, k_eq_1) {
8489 SpMMMicrokernelTester()
8490 .mr(32)
8491 .nr(1)
8492 .m(32)
8493 .n(1)
8494 .k(1)
8495 .sparsity(0.0f)
8496 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8497 }
8498
8499 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, k_gt_1) {
8500 for (size_t k = 2; k < 10; k++) {
8501 SpMMMicrokernelTester()
8502 .mr(32)
8503 .nr(1)
8504 .m(32)
8505 .n(1)
8506 .k(k)
8507 .sparsity(0.0f)
8508 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8509 }
8510 }
8511
8512 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, n_gt_1) {
8513 for (uint32_t n = 2; n < 10; n++) {
8514 for (size_t k = 1; k <= 5; k += 2) {
8515 SpMMMicrokernelTester()
8516 .mr(32)
8517 .nr(1)
8518 .m(32)
8519 .n(n)
8520 .k(k)
8521 .sparsity(0.0f)
8522 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8523 }
8524 }
8525 }
8526
8527 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, m_lt_32) {
8528 for (uint32_t m = 1; m < 32; m++) {
8529 for (uint32_t n = 1; n < 10; n += 2) {
8530 for (size_t k = 1; k <= 5; k += 2) {
8531 SpMMMicrokernelTester()
8532 .mr(32)
8533 .nr(1)
8534 .m(m)
8535 .n(n)
8536 .k(k)
8537 .sparsity(0.0f)
8538 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8539 }
8540 }
8541 }
8542 }
8543
8544 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, m_div_32) {
8545 for (uint32_t m = 64; m <= 96; m += 32) {
8546 for (uint32_t n = 1; n < 10; n += 2) {
8547 for (size_t k = 1; k <= 5; k += 2) {
8548 SpMMMicrokernelTester()
8549 .mr(32)
8550 .nr(1)
8551 .m(m)
8552 .n(n)
8553 .k(k)
8554 .sparsity(0.0f)
8555 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8556 }
8557 }
8558 }
8559 }
8560
8561 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, m_gt_32) {
8562 for (uint32_t m = 33; m < 64; m++) {
8563 for (uint32_t n = 1; n < 10; n += 2) {
8564 for (size_t k = 1; k <= 5; k += 2) {
8565 SpMMMicrokernelTester()
8566 .mr(32)
8567 .nr(1)
8568 .m(m)
8569 .n(n)
8570 .k(k)
8571 .sparsity(0.0f)
8572 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8573 }
8574 }
8575 }
8576 }
8577
Marat Dukhane8bfcc82020-11-16 12:28:13 -08008578 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, output_stride) {
8579 for (uint32_t n = 1; n < 10; n += 2) {
8580 for (size_t k = 1; k <= 5; k += 2) {
8581 SpMMMicrokernelTester()
8582 .mr(32)
8583 .nr(1)
8584 .m(64)
8585 .n(n)
8586 .k(k)
8587 .output_stride(67)
8588 .sparsity(0.0f)
8589 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8590 }
8591 }
8592 }
8593
Frank Barchard846c0c62020-10-26 15:01:39 -07008594 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, qmin) {
8595 for (uint32_t n = 1; n < 10; n += 2) {
8596 for (size_t k = 1; k <= 5; k += 2) {
8597 SpMMMicrokernelTester()
8598 .mr(32)
8599 .nr(1)
8600 .m(64)
8601 .n(n)
8602 .k(k)
8603 .sparsity(0.0f)
8604 .qmin(128)
8605 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8606 }
8607 }
8608 }
8609
8610 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, qmax) {
8611 for (uint32_t n = 1; n < 10; n += 2) {
8612 for (size_t k = 1; k <= 5; k += 2) {
8613 SpMMMicrokernelTester()
8614 .mr(32)
8615 .nr(1)
8616 .m(64)
8617 .n(n)
8618 .k(k)
8619 .sparsity(0.0f)
8620 .qmax(128)
8621 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8622 }
8623 }
8624 }
8625
8626 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, half_sparse) {
8627 for (uint32_t n = 1; n < 10; n += 2) {
8628 for (size_t k = 1; k <= 5; k += 2) {
8629 SpMMMicrokernelTester()
8630 .mr(32)
8631 .nr(1)
8632 .m(64)
8633 .n(n)
8634 .k(k)
8635 .sparsity(0.5f)
8636 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8637 }
8638 }
8639 }
8640
8641 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM, zero_weights) {
8642 for (uint32_t n = 1; n < 10; n += 2) {
8643 for (size_t k = 1; k <= 5; k += 2) {
8644 SpMMMicrokernelTester()
8645 .mr(32)
8646 .nr(1)
8647 .m(64)
8648 .n(n)
8649 .k(k)
8650 .sparsity(1.0f)
8651 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm);
8652 }
8653 }
8654 }
8655#endif // XNN_ARCH_WASMSIMD
8656
8657
8658#if XNN_ARCH_WASMSIMD
Frank Barchard9e053402020-10-19 15:29:08 -07008659 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, k_eq_1) {
8660 SpMMMicrokernelTester()
8661 .mr(4)
8662 .nr(1)
8663 .m(4)
8664 .n(1)
8665 .k(1)
8666 .sparsity(0.0f)
8667 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8668 }
8669
8670 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, k_gt_1) {
8671 for (size_t k = 2; k < 10; k++) {
8672 SpMMMicrokernelTester()
8673 .mr(4)
8674 .nr(1)
8675 .m(4)
8676 .n(1)
8677 .k(k)
8678 .sparsity(0.0f)
8679 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8680 }
8681 }
8682
8683 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, n_gt_1) {
8684 for (uint32_t n = 2; n < 10; n++) {
8685 for (size_t k = 1; k <= 5; k += 2) {
8686 SpMMMicrokernelTester()
8687 .mr(4)
8688 .nr(1)
8689 .m(4)
8690 .n(n)
8691 .k(k)
8692 .sparsity(0.0f)
8693 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8694 }
8695 }
8696 }
8697
8698 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, m_lt_4) {
8699 for (uint32_t m = 1; m < 4; m++) {
8700 for (uint32_t n = 1; n < 10; n += 2) {
8701 for (size_t k = 1; k <= 5; k += 2) {
8702 SpMMMicrokernelTester()
8703 .mr(4)
8704 .nr(1)
8705 .m(m)
8706 .n(n)
8707 .k(k)
8708 .sparsity(0.0f)
8709 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8710 }
8711 }
8712 }
8713 }
8714
8715 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, m_div_4) {
8716 for (uint32_t m = 8; m <= 12; m += 4) {
8717 for (uint32_t n = 1; n < 10; n += 2) {
8718 for (size_t k = 1; k <= 5; k += 2) {
8719 SpMMMicrokernelTester()
8720 .mr(4)
8721 .nr(1)
8722 .m(m)
8723 .n(n)
8724 .k(k)
8725 .sparsity(0.0f)
8726 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8727 }
8728 }
8729 }
8730 }
8731
8732 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, m_gt_4) {
8733 for (uint32_t m = 5; m < 8; m++) {
8734 for (uint32_t n = 1; n < 10; n += 2) {
8735 for (size_t k = 1; k <= 5; k += 2) {
8736 SpMMMicrokernelTester()
8737 .mr(4)
8738 .nr(1)
8739 .m(m)
8740 .n(n)
8741 .k(k)
8742 .sparsity(0.0f)
8743 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8744 }
8745 }
8746 }
8747 }
8748
Marat Dukhane8bfcc82020-11-16 12:28:13 -08008749 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, output_stride) {
8750 for (uint32_t n = 1; n < 10; n += 2) {
8751 for (size_t k = 1; k <= 5; k += 2) {
8752 SpMMMicrokernelTester()
8753 .mr(4)
8754 .nr(1)
8755 .m(8)
8756 .n(n)
8757 .k(k)
8758 .output_stride(11)
8759 .sparsity(0.0f)
8760 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8761 }
8762 }
8763 }
8764
Frank Barchard9e053402020-10-19 15:29:08 -07008765 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, qmin) {
8766 for (uint32_t n = 1; n < 10; n += 2) {
8767 for (size_t k = 1; k <= 5; k += 2) {
8768 SpMMMicrokernelTester()
8769 .mr(4)
8770 .nr(1)
8771 .m(8)
8772 .n(n)
8773 .k(k)
8774 .sparsity(0.0f)
8775 .qmin(128)
8776 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8777 }
8778 }
8779 }
8780
8781 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, qmax) {
8782 for (uint32_t n = 1; n < 10; n += 2) {
8783 for (size_t k = 1; k <= 5; k += 2) {
8784 SpMMMicrokernelTester()
8785 .mr(4)
8786 .nr(1)
8787 .m(8)
8788 .n(n)
8789 .k(k)
8790 .sparsity(0.0f)
8791 .qmax(128)
8792 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8793 }
8794 }
8795 }
8796
8797 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, half_sparse) {
8798 for (uint32_t n = 1; n < 10; n += 2) {
8799 for (size_t k = 1; k <= 5; k += 2) {
8800 SpMMMicrokernelTester()
8801 .mr(4)
8802 .nr(1)
8803 .m(8)
8804 .n(n)
8805 .k(k)
8806 .sparsity(0.5f)
8807 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8808 }
8809 }
8810 }
8811
8812 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86, zero_weights) {
8813 for (uint32_t n = 1; n < 10; n += 2) {
8814 for (size_t k = 1; k <= 5; k += 2) {
8815 SpMMMicrokernelTester()
8816 .mr(4)
8817 .nr(1)
8818 .m(8)
8819 .n(n)
8820 .k(k)
8821 .sparsity(1.0f)
8822 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86);
8823 }
8824 }
8825 }
8826#endif // XNN_ARCH_WASMSIMD
8827
8828
8829#if XNN_ARCH_WASMSIMD
8830 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, k_eq_1) {
8831 SpMMMicrokernelTester()
8832 .mr(8)
8833 .nr(1)
8834 .m(8)
8835 .n(1)
8836 .k(1)
8837 .sparsity(0.0f)
8838 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8839 }
8840
8841 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, k_gt_1) {
8842 for (size_t k = 2; k < 10; k++) {
8843 SpMMMicrokernelTester()
8844 .mr(8)
8845 .nr(1)
8846 .m(8)
8847 .n(1)
8848 .k(k)
8849 .sparsity(0.0f)
8850 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8851 }
8852 }
8853
8854 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, n_gt_1) {
8855 for (uint32_t n = 2; n < 10; n++) {
8856 for (size_t k = 1; k <= 5; k += 2) {
8857 SpMMMicrokernelTester()
8858 .mr(8)
8859 .nr(1)
8860 .m(8)
8861 .n(n)
8862 .k(k)
8863 .sparsity(0.0f)
8864 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8865 }
8866 }
8867 }
8868
8869 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, m_lt_8) {
8870 for (uint32_t m = 1; m < 8; m++) {
8871 for (uint32_t n = 1; n < 10; n += 2) {
8872 for (size_t k = 1; k <= 5; k += 2) {
8873 SpMMMicrokernelTester()
8874 .mr(8)
8875 .nr(1)
8876 .m(m)
8877 .n(n)
8878 .k(k)
8879 .sparsity(0.0f)
8880 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8881 }
8882 }
8883 }
8884 }
8885
8886 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, m_div_8) {
8887 for (uint32_t m = 16; m <= 24; m += 8) {
8888 for (uint32_t n = 1; n < 10; n += 2) {
8889 for (size_t k = 1; k <= 5; k += 2) {
8890 SpMMMicrokernelTester()
8891 .mr(8)
8892 .nr(1)
8893 .m(m)
8894 .n(n)
8895 .k(k)
8896 .sparsity(0.0f)
8897 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8898 }
8899 }
8900 }
8901 }
8902
8903 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, m_gt_8) {
8904 for (uint32_t m = 9; m < 16; m++) {
8905 for (uint32_t n = 1; n < 10; n += 2) {
8906 for (size_t k = 1; k <= 5; k += 2) {
8907 SpMMMicrokernelTester()
8908 .mr(8)
8909 .nr(1)
8910 .m(m)
8911 .n(n)
8912 .k(k)
8913 .sparsity(0.0f)
8914 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8915 }
8916 }
8917 }
8918 }
8919
Marat Dukhane8bfcc82020-11-16 12:28:13 -08008920 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, output_stride) {
8921 for (uint32_t n = 1; n < 10; n += 2) {
8922 for (size_t k = 1; k <= 5; k += 2) {
8923 SpMMMicrokernelTester()
8924 .mr(8)
8925 .nr(1)
8926 .m(16)
8927 .n(n)
8928 .k(k)
8929 .output_stride(19)
8930 .sparsity(0.0f)
8931 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8932 }
8933 }
8934 }
8935
Frank Barchard9e053402020-10-19 15:29:08 -07008936 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, qmin) {
8937 for (uint32_t n = 1; n < 10; n += 2) {
8938 for (size_t k = 1; k <= 5; k += 2) {
8939 SpMMMicrokernelTester()
8940 .mr(8)
8941 .nr(1)
8942 .m(16)
8943 .n(n)
8944 .k(k)
8945 .sparsity(0.0f)
8946 .qmin(128)
8947 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8948 }
8949 }
8950 }
8951
8952 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, qmax) {
8953 for (uint32_t n = 1; n < 10; n += 2) {
8954 for (size_t k = 1; k <= 5; k += 2) {
8955 SpMMMicrokernelTester()
8956 .mr(8)
8957 .nr(1)
8958 .m(16)
8959 .n(n)
8960 .k(k)
8961 .sparsity(0.0f)
8962 .qmax(128)
8963 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8964 }
8965 }
8966 }
8967
8968 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, half_sparse) {
8969 for (uint32_t n = 1; n < 10; n += 2) {
8970 for (size_t k = 1; k <= 5; k += 2) {
8971 SpMMMicrokernelTester()
8972 .mr(8)
8973 .nr(1)
8974 .m(16)
8975 .n(n)
8976 .k(k)
8977 .sparsity(0.5f)
8978 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8979 }
8980 }
8981 }
8982
8983 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86, zero_weights) {
8984 for (uint32_t n = 1; n < 10; n += 2) {
8985 for (size_t k = 1; k <= 5; k += 2) {
8986 SpMMMicrokernelTester()
8987 .mr(8)
8988 .nr(1)
8989 .m(16)
8990 .n(n)
8991 .k(k)
8992 .sparsity(1.0f)
8993 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86);
8994 }
8995 }
8996 }
8997#endif // XNN_ARCH_WASMSIMD
8998
8999
9000#if XNN_ARCH_WASMSIMD
9001 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, k_eq_1) {
9002 SpMMMicrokernelTester()
9003 .mr(16)
9004 .nr(1)
9005 .m(16)
9006 .n(1)
9007 .k(1)
9008 .sparsity(0.0f)
9009 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9010 }
9011
9012 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, k_gt_1) {
9013 for (size_t k = 2; k < 10; k++) {
9014 SpMMMicrokernelTester()
9015 .mr(16)
9016 .nr(1)
9017 .m(16)
9018 .n(1)
9019 .k(k)
9020 .sparsity(0.0f)
9021 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9022 }
9023 }
9024
9025 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, n_gt_1) {
9026 for (uint32_t n = 2; n < 10; n++) {
9027 for (size_t k = 1; k <= 5; k += 2) {
9028 SpMMMicrokernelTester()
9029 .mr(16)
9030 .nr(1)
9031 .m(16)
9032 .n(n)
9033 .k(k)
9034 .sparsity(0.0f)
9035 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9036 }
9037 }
9038 }
9039
9040 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, m_lt_16) {
9041 for (uint32_t m = 1; m < 16; m++) {
9042 for (uint32_t n = 1; n < 10; n += 2) {
9043 for (size_t k = 1; k <= 5; k += 2) {
9044 SpMMMicrokernelTester()
9045 .mr(16)
9046 .nr(1)
9047 .m(m)
9048 .n(n)
9049 .k(k)
9050 .sparsity(0.0f)
9051 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9052 }
9053 }
9054 }
9055 }
9056
9057 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, m_div_16) {
9058 for (uint32_t m = 32; m <= 48; m += 16) {
9059 for (uint32_t n = 1; n < 10; n += 2) {
9060 for (size_t k = 1; k <= 5; k += 2) {
9061 SpMMMicrokernelTester()
9062 .mr(16)
9063 .nr(1)
9064 .m(m)
9065 .n(n)
9066 .k(k)
9067 .sparsity(0.0f)
9068 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9069 }
9070 }
9071 }
9072 }
9073
9074 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, m_gt_16) {
9075 for (uint32_t m = 17; m < 32; m++) {
9076 for (uint32_t n = 1; n < 10; n += 2) {
9077 for (size_t k = 1; k <= 5; k += 2) {
9078 SpMMMicrokernelTester()
9079 .mr(16)
9080 .nr(1)
9081 .m(m)
9082 .n(n)
9083 .k(k)
9084 .sparsity(0.0f)
9085 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9086 }
9087 }
9088 }
9089 }
9090
Marat Dukhane8bfcc82020-11-16 12:28:13 -08009091 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, output_stride) {
9092 for (uint32_t n = 1; n < 10; n += 2) {
9093 for (size_t k = 1; k <= 5; k += 2) {
9094 SpMMMicrokernelTester()
9095 .mr(16)
9096 .nr(1)
9097 .m(32)
9098 .n(n)
9099 .k(k)
9100 .output_stride(37)
9101 .sparsity(0.0f)
9102 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9103 }
9104 }
9105 }
9106
Frank Barchard9e053402020-10-19 15:29:08 -07009107 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, qmin) {
9108 for (uint32_t n = 1; n < 10; n += 2) {
9109 for (size_t k = 1; k <= 5; k += 2) {
9110 SpMMMicrokernelTester()
9111 .mr(16)
9112 .nr(1)
9113 .m(32)
9114 .n(n)
9115 .k(k)
9116 .sparsity(0.0f)
9117 .qmin(128)
9118 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9119 }
9120 }
9121 }
9122
9123 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, qmax) {
9124 for (uint32_t n = 1; n < 10; n += 2) {
9125 for (size_t k = 1; k <= 5; k += 2) {
9126 SpMMMicrokernelTester()
9127 .mr(16)
9128 .nr(1)
9129 .m(32)
9130 .n(n)
9131 .k(k)
9132 .sparsity(0.0f)
9133 .qmax(128)
9134 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9135 }
9136 }
9137 }
9138
9139 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, half_sparse) {
9140 for (uint32_t n = 1; n < 10; n += 2) {
9141 for (size_t k = 1; k <= 5; k += 2) {
9142 SpMMMicrokernelTester()
9143 .mr(16)
9144 .nr(1)
9145 .m(32)
9146 .n(n)
9147 .k(k)
9148 .sparsity(0.5f)
9149 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9150 }
9151 }
9152 }
9153
9154 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86, zero_weights) {
9155 for (uint32_t n = 1; n < 10; n += 2) {
9156 for (size_t k = 1; k <= 5; k += 2) {
9157 SpMMMicrokernelTester()
9158 .mr(16)
9159 .nr(1)
9160 .m(32)
9161 .n(n)
9162 .k(k)
9163 .sparsity(1.0f)
9164 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86);
9165 }
9166 }
9167 }
9168#endif // XNN_ARCH_WASMSIMD
Erich Elsen6e80fdc2020-06-09 15:35:37 -07009169
9170
Frank Barchardc451e8a2020-10-21 17:13:12 -07009171#if XNN_ARCH_WASMSIMD
Frank Barchard846c0c62020-10-26 15:01:39 -07009172 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, k_eq_1) {
9173 SpMMMicrokernelTester()
9174 .mr(32)
9175 .nr(1)
9176 .m(32)
9177 .n(1)
9178 .k(1)
9179 .sparsity(0.0f)
9180 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9181 }
9182
9183 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, k_gt_1) {
9184 for (size_t k = 2; k < 10; k++) {
9185 SpMMMicrokernelTester()
9186 .mr(32)
9187 .nr(1)
9188 .m(32)
9189 .n(1)
9190 .k(k)
9191 .sparsity(0.0f)
9192 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9193 }
9194 }
9195
9196 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, n_gt_1) {
9197 for (uint32_t n = 2; n < 10; n++) {
9198 for (size_t k = 1; k <= 5; k += 2) {
9199 SpMMMicrokernelTester()
9200 .mr(32)
9201 .nr(1)
9202 .m(32)
9203 .n(n)
9204 .k(k)
9205 .sparsity(0.0f)
9206 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9207 }
9208 }
9209 }
9210
9211 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, m_lt_32) {
9212 for (uint32_t m = 1; m < 32; m++) {
9213 for (uint32_t n = 1; n < 10; n += 2) {
9214 for (size_t k = 1; k <= 5; k += 2) {
9215 SpMMMicrokernelTester()
9216 .mr(32)
9217 .nr(1)
9218 .m(m)
9219 .n(n)
9220 .k(k)
9221 .sparsity(0.0f)
9222 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9223 }
9224 }
9225 }
9226 }
9227
9228 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, m_div_32) {
9229 for (uint32_t m = 64; m <= 96; m += 32) {
9230 for (uint32_t n = 1; n < 10; n += 2) {
9231 for (size_t k = 1; k <= 5; k += 2) {
9232 SpMMMicrokernelTester()
9233 .mr(32)
9234 .nr(1)
9235 .m(m)
9236 .n(n)
9237 .k(k)
9238 .sparsity(0.0f)
9239 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9240 }
9241 }
9242 }
9243 }
9244
9245 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, m_gt_32) {
9246 for (uint32_t m = 33; m < 64; m++) {
9247 for (uint32_t n = 1; n < 10; n += 2) {
9248 for (size_t k = 1; k <= 5; k += 2) {
9249 SpMMMicrokernelTester()
9250 .mr(32)
9251 .nr(1)
9252 .m(m)
9253 .n(n)
9254 .k(k)
9255 .sparsity(0.0f)
9256 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9257 }
9258 }
9259 }
9260 }
9261
Marat Dukhane8bfcc82020-11-16 12:28:13 -08009262 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, output_stride) {
9263 for (uint32_t n = 1; n < 10; n += 2) {
9264 for (size_t k = 1; k <= 5; k += 2) {
9265 SpMMMicrokernelTester()
9266 .mr(32)
9267 .nr(1)
9268 .m(64)
9269 .n(n)
9270 .k(k)
9271 .output_stride(67)
9272 .sparsity(0.0f)
9273 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9274 }
9275 }
9276 }
9277
Frank Barchard846c0c62020-10-26 15:01:39 -07009278 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, qmin) {
9279 for (uint32_t n = 1; n < 10; n += 2) {
9280 for (size_t k = 1; k <= 5; k += 2) {
9281 SpMMMicrokernelTester()
9282 .mr(32)
9283 .nr(1)
9284 .m(64)
9285 .n(n)
9286 .k(k)
9287 .sparsity(0.0f)
9288 .qmin(128)
9289 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9290 }
9291 }
9292 }
9293
9294 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, qmax) {
9295 for (uint32_t n = 1; n < 10; n += 2) {
9296 for (size_t k = 1; k <= 5; k += 2) {
9297 SpMMMicrokernelTester()
9298 .mr(32)
9299 .nr(1)
9300 .m(64)
9301 .n(n)
9302 .k(k)
9303 .sparsity(0.0f)
9304 .qmax(128)
9305 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9306 }
9307 }
9308 }
9309
9310 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, half_sparse) {
9311 for (uint32_t n = 1; n < 10; n += 2) {
9312 for (size_t k = 1; k <= 5; k += 2) {
9313 SpMMMicrokernelTester()
9314 .mr(32)
9315 .nr(1)
9316 .m(64)
9317 .n(n)
9318 .k(k)
9319 .sparsity(0.5f)
9320 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9321 }
9322 }
9323 }
9324
9325 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86, zero_weights) {
9326 for (uint32_t n = 1; n < 10; n += 2) {
9327 for (size_t k = 1; k <= 5; k += 2) {
9328 SpMMMicrokernelTester()
9329 .mr(32)
9330 .nr(1)
9331 .m(64)
9332 .n(n)
9333 .k(k)
9334 .sparsity(1.0f)
9335 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86);
9336 }
9337 }
9338 }
9339#endif // XNN_ARCH_WASMSIMD
9340
9341
9342#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -07009343 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, k_eq_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009344 SpMMMicrokernelTester()
9345 .mr(4)
9346 .nr(1)
9347 .m(4)
9348 .n(1)
9349 .k(2)
9350 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009351 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009352 }
9353
Frank Barchardbeca6522020-10-30 22:34:35 -07009354 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, k_lt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009355 for (size_t k = 1; k < 2; k++) {
9356 SpMMMicrokernelTester()
9357 .mr(4)
9358 .nr(1)
9359 .m(4)
9360 .n(1)
9361 .k(k)
9362 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009363 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009364 }
9365 }
9366
Frank Barchardbeca6522020-10-30 22:34:35 -07009367 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, k_gt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009368 for (size_t k = 3; k < 4; k++) {
9369 SpMMMicrokernelTester()
9370 .mr(4)
9371 .nr(1)
9372 .m(4)
9373 .n(1)
9374 .k(k)
9375 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009376 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009377 }
9378 }
9379
Frank Barchardbeca6522020-10-30 22:34:35 -07009380 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, k_div_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009381 for (size_t k = 4; k <= 20; k += 2) {
9382 SpMMMicrokernelTester()
9383 .mr(4)
9384 .nr(1)
9385 .m(4)
9386 .n(1)
9387 .k(k)
9388 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009389 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009390 }
9391 }
9392
Frank Barchardbeca6522020-10-30 22:34:35 -07009393 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009394 for (uint32_t n = 2; n < 10; n++) {
9395 for (size_t k = 1; k <= 10; k += 3) {
9396 SpMMMicrokernelTester()
9397 .mr(4)
9398 .nr(1)
9399 .m(4)
9400 .n(n)
9401 .k(k)
9402 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009403 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009404 }
9405 }
9406 }
9407
Frank Barchardbeca6522020-10-30 22:34:35 -07009408 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, m_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009409 for (uint32_t m = 1; m < 4; m++) {
9410 for (uint32_t n = 1; n < 10; n += 2) {
9411 for (size_t k = 1; k <= 10; k += 3) {
9412 SpMMMicrokernelTester()
9413 .mr(4)
9414 .nr(1)
9415 .m(m)
9416 .n(n)
9417 .k(k)
9418 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009419 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009420 }
9421 }
9422 }
9423 }
9424
Frank Barchardbeca6522020-10-30 22:34:35 -07009425 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, m_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009426 for (uint32_t m = 8; m <= 12; m += 4) {
9427 for (uint32_t n = 1; n < 10; n += 2) {
9428 for (size_t k = 1; k <= 10; k += 3) {
9429 SpMMMicrokernelTester()
9430 .mr(4)
9431 .nr(1)
9432 .m(m)
9433 .n(n)
9434 .k(k)
9435 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009436 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009437 }
9438 }
9439 }
9440 }
9441
Frank Barchardbeca6522020-10-30 22:34:35 -07009442 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, m_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009443 for (uint32_t m = 5; m < 8; m++) {
9444 for (uint32_t n = 1; n < 10; n += 2) {
9445 for (size_t k = 1; k <= 10; k += 3) {
9446 SpMMMicrokernelTester()
9447 .mr(4)
9448 .nr(1)
9449 .m(m)
9450 .n(n)
9451 .k(k)
9452 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009453 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009454 }
9455 }
9456 }
9457 }
9458
Marat Dukhane8bfcc82020-11-16 12:28:13 -08009459 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, output_stride) {
9460 for (uint32_t n = 1; n < 10; n += 2) {
9461 for (size_t k = 1; k <= 10; k += 3) {
9462 SpMMMicrokernelTester()
9463 .mr(4)
9464 .nr(1)
9465 .m(8)
9466 .n(n)
9467 .k(k)
9468 .output_stride(11)
9469 .sparsity(0.0f)
9470 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
9471 }
9472 }
9473 }
9474
Frank Barchardbeca6522020-10-30 22:34:35 -07009475 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009476 for (uint32_t n = 1; n < 10; n += 2) {
9477 for (size_t k = 1; k <= 10; k += 3) {
9478 SpMMMicrokernelTester()
9479 .mr(4)
9480 .nr(1)
9481 .m(8)
9482 .n(n)
9483 .k(k)
9484 .sparsity(0.0f)
9485 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07009486 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009487 }
9488 }
9489 }
9490
Frank Barchardbeca6522020-10-30 22:34:35 -07009491 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009492 for (uint32_t n = 1; n < 10; n += 2) {
9493 for (size_t k = 1; k <= 10; k += 3) {
9494 SpMMMicrokernelTester()
9495 .mr(4)
9496 .nr(1)
9497 .m(8)
9498 .n(n)
9499 .k(k)
9500 .sparsity(0.0f)
9501 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07009502 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009503 }
9504 }
9505 }
9506
Frank Barchardbeca6522020-10-30 22:34:35 -07009507 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009508 for (uint32_t n = 1; n < 10; n += 2) {
9509 for (size_t k = 1; k <= 10; k += 3) {
9510 SpMMMicrokernelTester()
9511 .mr(4)
9512 .nr(1)
9513 .m(8)
9514 .n(n)
9515 .k(k)
9516 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009517 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009518 }
9519 }
9520 }
9521
Frank Barchardbeca6522020-10-30 22:34:35 -07009522 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X2, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009523 for (uint32_t n = 1; n < 10; n += 2) {
9524 for (size_t k = 1; k <= 10; k += 3) {
9525 SpMMMicrokernelTester()
9526 .mr(4)
9527 .nr(1)
9528 .m(8)
9529 .n(n)
9530 .k(k)
9531 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009532 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009533 }
9534 }
9535 }
9536#endif // XNN_ARCH_WASMSIMD
9537
9538
9539#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -07009540 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, k_eq_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009541 SpMMMicrokernelTester()
9542 .mr(8)
9543 .nr(1)
9544 .m(8)
9545 .n(1)
9546 .k(2)
9547 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009548 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009549 }
9550
Frank Barchardbeca6522020-10-30 22:34:35 -07009551 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, k_lt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009552 for (size_t k = 1; k < 2; k++) {
9553 SpMMMicrokernelTester()
9554 .mr(8)
9555 .nr(1)
9556 .m(8)
9557 .n(1)
9558 .k(k)
9559 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009560 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009561 }
9562 }
9563
Frank Barchardbeca6522020-10-30 22:34:35 -07009564 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, k_gt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009565 for (size_t k = 3; k < 4; k++) {
9566 SpMMMicrokernelTester()
9567 .mr(8)
9568 .nr(1)
9569 .m(8)
9570 .n(1)
9571 .k(k)
9572 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009573 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009574 }
9575 }
9576
Frank Barchardbeca6522020-10-30 22:34:35 -07009577 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, k_div_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009578 for (size_t k = 4; k <= 20; k += 2) {
9579 SpMMMicrokernelTester()
9580 .mr(8)
9581 .nr(1)
9582 .m(8)
9583 .n(1)
9584 .k(k)
9585 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009586 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009587 }
9588 }
9589
Frank Barchardbeca6522020-10-30 22:34:35 -07009590 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009591 for (uint32_t n = 2; n < 10; n++) {
9592 for (size_t k = 1; k <= 10; k += 3) {
9593 SpMMMicrokernelTester()
9594 .mr(8)
9595 .nr(1)
9596 .m(8)
9597 .n(n)
9598 .k(k)
9599 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009600 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009601 }
9602 }
9603 }
9604
Frank Barchardbeca6522020-10-30 22:34:35 -07009605 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, m_lt_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009606 for (uint32_t m = 1; m < 8; m++) {
9607 for (uint32_t n = 1; n < 10; n += 2) {
9608 for (size_t k = 1; k <= 10; k += 3) {
9609 SpMMMicrokernelTester()
9610 .mr(8)
9611 .nr(1)
9612 .m(m)
9613 .n(n)
9614 .k(k)
9615 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009616 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009617 }
9618 }
9619 }
9620 }
9621
Frank Barchardbeca6522020-10-30 22:34:35 -07009622 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, m_div_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009623 for (uint32_t m = 16; m <= 24; m += 8) {
9624 for (uint32_t n = 1; n < 10; n += 2) {
9625 for (size_t k = 1; k <= 10; k += 3) {
9626 SpMMMicrokernelTester()
9627 .mr(8)
9628 .nr(1)
9629 .m(m)
9630 .n(n)
9631 .k(k)
9632 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009633 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009634 }
9635 }
9636 }
9637 }
9638
Frank Barchardbeca6522020-10-30 22:34:35 -07009639 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, m_gt_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009640 for (uint32_t m = 9; m < 16; m++) {
9641 for (uint32_t n = 1; n < 10; n += 2) {
9642 for (size_t k = 1; k <= 10; k += 3) {
9643 SpMMMicrokernelTester()
9644 .mr(8)
9645 .nr(1)
9646 .m(m)
9647 .n(n)
9648 .k(k)
9649 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009650 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009651 }
9652 }
9653 }
9654 }
9655
Marat Dukhane8bfcc82020-11-16 12:28:13 -08009656 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, output_stride) {
9657 for (uint32_t n = 1; n < 10; n += 2) {
9658 for (size_t k = 1; k <= 10; k += 3) {
9659 SpMMMicrokernelTester()
9660 .mr(8)
9661 .nr(1)
9662 .m(16)
9663 .n(n)
9664 .k(k)
9665 .output_stride(19)
9666 .sparsity(0.0f)
9667 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
9668 }
9669 }
9670 }
9671
Frank Barchardbeca6522020-10-30 22:34:35 -07009672 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009673 for (uint32_t n = 1; n < 10; n += 2) {
9674 for (size_t k = 1; k <= 10; k += 3) {
9675 SpMMMicrokernelTester()
9676 .mr(8)
9677 .nr(1)
9678 .m(16)
9679 .n(n)
9680 .k(k)
9681 .sparsity(0.0f)
9682 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07009683 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009684 }
9685 }
9686 }
9687
Frank Barchardbeca6522020-10-30 22:34:35 -07009688 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009689 for (uint32_t n = 1; n < 10; n += 2) {
9690 for (size_t k = 1; k <= 10; k += 3) {
9691 SpMMMicrokernelTester()
9692 .mr(8)
9693 .nr(1)
9694 .m(16)
9695 .n(n)
9696 .k(k)
9697 .sparsity(0.0f)
9698 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07009699 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009700 }
9701 }
9702 }
9703
Frank Barchardbeca6522020-10-30 22:34:35 -07009704 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009705 for (uint32_t n = 1; n < 10; n += 2) {
9706 for (size_t k = 1; k <= 10; k += 3) {
9707 SpMMMicrokernelTester()
9708 .mr(8)
9709 .nr(1)
9710 .m(16)
9711 .n(n)
9712 .k(k)
9713 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009714 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009715 }
9716 }
9717 }
9718
Frank Barchardbeca6522020-10-30 22:34:35 -07009719 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X2, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009720 for (uint32_t n = 1; n < 10; n += 2) {
9721 for (size_t k = 1; k <= 10; k += 3) {
9722 SpMMMicrokernelTester()
9723 .mr(8)
9724 .nr(1)
9725 .m(16)
9726 .n(n)
9727 .k(k)
9728 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009729 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009730 }
9731 }
9732 }
9733#endif // XNN_ARCH_WASMSIMD
9734
9735
9736#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -07009737 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, k_eq_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009738 SpMMMicrokernelTester()
9739 .mr(16)
9740 .nr(1)
9741 .m(16)
9742 .n(1)
9743 .k(2)
9744 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009745 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009746 }
9747
Frank Barchardbeca6522020-10-30 22:34:35 -07009748 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, k_lt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009749 for (size_t k = 1; k < 2; k++) {
9750 SpMMMicrokernelTester()
9751 .mr(16)
9752 .nr(1)
9753 .m(16)
9754 .n(1)
9755 .k(k)
9756 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009757 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009758 }
9759 }
9760
Frank Barchardbeca6522020-10-30 22:34:35 -07009761 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, k_gt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009762 for (size_t k = 3; k < 4; k++) {
9763 SpMMMicrokernelTester()
9764 .mr(16)
9765 .nr(1)
9766 .m(16)
9767 .n(1)
9768 .k(k)
9769 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009770 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009771 }
9772 }
9773
Frank Barchardbeca6522020-10-30 22:34:35 -07009774 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, k_div_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009775 for (size_t k = 4; k <= 20; k += 2) {
9776 SpMMMicrokernelTester()
9777 .mr(16)
9778 .nr(1)
9779 .m(16)
9780 .n(1)
9781 .k(k)
9782 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009783 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009784 }
9785 }
9786
Frank Barchardbeca6522020-10-30 22:34:35 -07009787 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009788 for (uint32_t n = 2; n < 10; n++) {
9789 for (size_t k = 1; k <= 10; k += 3) {
9790 SpMMMicrokernelTester()
9791 .mr(16)
9792 .nr(1)
9793 .m(16)
9794 .n(n)
9795 .k(k)
9796 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009797 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009798 }
9799 }
9800 }
9801
Frank Barchardbeca6522020-10-30 22:34:35 -07009802 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, m_lt_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009803 for (uint32_t m = 1; m < 16; m++) {
9804 for (uint32_t n = 1; n < 10; n += 2) {
9805 for (size_t k = 1; k <= 10; k += 3) {
9806 SpMMMicrokernelTester()
9807 .mr(16)
9808 .nr(1)
9809 .m(m)
9810 .n(n)
9811 .k(k)
9812 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009813 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009814 }
9815 }
9816 }
9817 }
9818
Frank Barchardbeca6522020-10-30 22:34:35 -07009819 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, m_div_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009820 for (uint32_t m = 32; m <= 48; m += 16) {
9821 for (uint32_t n = 1; n < 10; n += 2) {
9822 for (size_t k = 1; k <= 10; k += 3) {
9823 SpMMMicrokernelTester()
9824 .mr(16)
9825 .nr(1)
9826 .m(m)
9827 .n(n)
9828 .k(k)
9829 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009830 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009831 }
9832 }
9833 }
9834 }
9835
Frank Barchardbeca6522020-10-30 22:34:35 -07009836 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, m_gt_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009837 for (uint32_t m = 17; m < 32; m++) {
9838 for (uint32_t n = 1; n < 10; n += 2) {
9839 for (size_t k = 1; k <= 10; k += 3) {
9840 SpMMMicrokernelTester()
9841 .mr(16)
9842 .nr(1)
9843 .m(m)
9844 .n(n)
9845 .k(k)
9846 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009847 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009848 }
9849 }
9850 }
9851 }
9852
Marat Dukhane8bfcc82020-11-16 12:28:13 -08009853 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, output_stride) {
9854 for (uint32_t n = 1; n < 10; n += 2) {
9855 for (size_t k = 1; k <= 10; k += 3) {
9856 SpMMMicrokernelTester()
9857 .mr(16)
9858 .nr(1)
9859 .m(32)
9860 .n(n)
9861 .k(k)
9862 .output_stride(37)
9863 .sparsity(0.0f)
9864 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
9865 }
9866 }
9867 }
9868
Frank Barchardbeca6522020-10-30 22:34:35 -07009869 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009870 for (uint32_t n = 1; n < 10; n += 2) {
9871 for (size_t k = 1; k <= 10; k += 3) {
9872 SpMMMicrokernelTester()
9873 .mr(16)
9874 .nr(1)
9875 .m(32)
9876 .n(n)
9877 .k(k)
9878 .sparsity(0.0f)
9879 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07009880 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009881 }
9882 }
9883 }
9884
Frank Barchardbeca6522020-10-30 22:34:35 -07009885 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009886 for (uint32_t n = 1; n < 10; n += 2) {
9887 for (size_t k = 1; k <= 10; k += 3) {
9888 SpMMMicrokernelTester()
9889 .mr(16)
9890 .nr(1)
9891 .m(32)
9892 .n(n)
9893 .k(k)
9894 .sparsity(0.0f)
9895 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -07009896 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009897 }
9898 }
9899 }
9900
Frank Barchardbeca6522020-10-30 22:34:35 -07009901 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009902 for (uint32_t n = 1; n < 10; n += 2) {
9903 for (size_t k = 1; k <= 10; k += 3) {
9904 SpMMMicrokernelTester()
9905 .mr(16)
9906 .nr(1)
9907 .m(32)
9908 .n(n)
9909 .k(k)
9910 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009911 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009912 }
9913 }
9914 }
9915
Frank Barchardbeca6522020-10-30 22:34:35 -07009916 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X2, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -07009917 for (uint32_t n = 1; n < 10; n += 2) {
9918 for (size_t k = 1; k <= 10; k += 3) {
9919 SpMMMicrokernelTester()
9920 .mr(16)
9921 .nr(1)
9922 .m(32)
9923 .n(n)
9924 .k(k)
9925 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009926 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -07009927 }
9928 }
9929 }
9930#endif // XNN_ARCH_WASMSIMD
9931
9932
9933#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -07009934 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, k_eq_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -07009935 SpMMMicrokernelTester()
9936 .mr(32)
9937 .nr(1)
9938 .m(32)
9939 .n(1)
9940 .k(2)
9941 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009942 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07009943 }
9944
Frank Barchardbeca6522020-10-30 22:34:35 -07009945 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, k_lt_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -07009946 for (size_t k = 1; k < 2; k++) {
9947 SpMMMicrokernelTester()
9948 .mr(32)
9949 .nr(1)
9950 .m(32)
9951 .n(1)
9952 .k(k)
9953 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009954 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07009955 }
9956 }
9957
Frank Barchardbeca6522020-10-30 22:34:35 -07009958 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, k_gt_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -07009959 for (size_t k = 3; k < 4; k++) {
9960 SpMMMicrokernelTester()
9961 .mr(32)
9962 .nr(1)
9963 .m(32)
9964 .n(1)
9965 .k(k)
9966 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009967 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07009968 }
9969 }
9970
Frank Barchardbeca6522020-10-30 22:34:35 -07009971 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, k_div_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -07009972 for (size_t k = 4; k <= 20; k += 2) {
9973 SpMMMicrokernelTester()
9974 .mr(32)
9975 .nr(1)
9976 .m(32)
9977 .n(1)
9978 .k(k)
9979 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009980 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07009981 }
9982 }
9983
Frank Barchardbeca6522020-10-30 22:34:35 -07009984 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, n_gt_1) {
Frank Barchard846c0c62020-10-26 15:01:39 -07009985 for (uint32_t n = 2; n < 10; n++) {
9986 for (size_t k = 1; k <= 10; k += 3) {
9987 SpMMMicrokernelTester()
9988 .mr(32)
9989 .nr(1)
9990 .m(32)
9991 .n(n)
9992 .k(k)
9993 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -07009994 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -07009995 }
9996 }
9997 }
9998
Frank Barchardbeca6522020-10-30 22:34:35 -07009999 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, m_lt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010000 for (uint32_t m = 1; m < 32; m++) {
10001 for (uint32_t n = 1; n < 10; n += 2) {
10002 for (size_t k = 1; k <= 10; k += 3) {
10003 SpMMMicrokernelTester()
10004 .mr(32)
10005 .nr(1)
10006 .m(m)
10007 .n(n)
10008 .k(k)
10009 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010010 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010011 }
10012 }
10013 }
10014 }
10015
Frank Barchardbeca6522020-10-30 22:34:35 -070010016 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, m_div_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010017 for (uint32_t m = 64; m <= 96; m += 32) {
10018 for (uint32_t n = 1; n < 10; n += 2) {
10019 for (size_t k = 1; k <= 10; k += 3) {
10020 SpMMMicrokernelTester()
10021 .mr(32)
10022 .nr(1)
10023 .m(m)
10024 .n(n)
10025 .k(k)
10026 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010027 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010028 }
10029 }
10030 }
10031 }
10032
Frank Barchardbeca6522020-10-30 22:34:35 -070010033 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, m_gt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010034 for (uint32_t m = 33; m < 64; m++) {
10035 for (uint32_t n = 1; n < 10; n += 2) {
10036 for (size_t k = 1; k <= 10; k += 3) {
10037 SpMMMicrokernelTester()
10038 .mr(32)
10039 .nr(1)
10040 .m(m)
10041 .n(n)
10042 .k(k)
10043 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010044 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010045 }
10046 }
10047 }
10048 }
10049
Marat Dukhane8bfcc82020-11-16 12:28:13 -080010050 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, output_stride) {
10051 for (uint32_t n = 1; n < 10; n += 2) {
10052 for (size_t k = 1; k <= 10; k += 3) {
10053 SpMMMicrokernelTester()
10054 .mr(32)
10055 .nr(1)
10056 .m(64)
10057 .n(n)
10058 .k(k)
10059 .output_stride(67)
10060 .sparsity(0.0f)
10061 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
10062 }
10063 }
10064 }
10065
Frank Barchardbeca6522020-10-30 22:34:35 -070010066 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, qmin) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010067 for (uint32_t n = 1; n < 10; n += 2) {
10068 for (size_t k = 1; k <= 10; k += 3) {
10069 SpMMMicrokernelTester()
10070 .mr(32)
10071 .nr(1)
10072 .m(64)
10073 .n(n)
10074 .k(k)
10075 .sparsity(0.0f)
10076 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010077 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010078 }
10079 }
10080 }
10081
Frank Barchardbeca6522020-10-30 22:34:35 -070010082 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, qmax) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010083 for (uint32_t n = 1; n < 10; n += 2) {
10084 for (size_t k = 1; k <= 10; k += 3) {
10085 SpMMMicrokernelTester()
10086 .mr(32)
10087 .nr(1)
10088 .m(64)
10089 .n(n)
10090 .k(k)
10091 .sparsity(0.0f)
10092 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010093 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010094 }
10095 }
10096 }
10097
Frank Barchardbeca6522020-10-30 22:34:35 -070010098 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, half_sparse) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010099 for (uint32_t n = 1; n < 10; n += 2) {
10100 for (size_t k = 1; k <= 10; k += 3) {
10101 SpMMMicrokernelTester()
10102 .mr(32)
10103 .nr(1)
10104 .m(64)
10105 .n(n)
10106 .k(k)
10107 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010108 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010109 }
10110 }
10111 }
10112
Frank Barchardbeca6522020-10-30 22:34:35 -070010113 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X2, zero_weights) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010114 for (uint32_t n = 1; n < 10; n += 2) {
10115 for (size_t k = 1; k <= 10; k += 3) {
10116 SpMMMicrokernelTester()
10117 .mr(32)
10118 .nr(1)
10119 .m(64)
10120 .n(n)
10121 .k(k)
10122 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010123 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010124 }
10125 }
10126 }
10127#endif // XNN_ARCH_WASMSIMD
10128
10129
10130#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070010131 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, k_eq_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010132 SpMMMicrokernelTester()
10133 .mr(4)
10134 .nr(1)
10135 .m(4)
10136 .n(1)
10137 .k(2)
10138 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010139 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010140 }
10141
Frank Barchardbeca6522020-10-30 22:34:35 -070010142 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, k_lt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010143 for (size_t k = 1; k < 2; k++) {
10144 SpMMMicrokernelTester()
10145 .mr(4)
10146 .nr(1)
10147 .m(4)
10148 .n(1)
10149 .k(k)
10150 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010151 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010152 }
10153 }
10154
Frank Barchardbeca6522020-10-30 22:34:35 -070010155 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, k_gt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010156 for (size_t k = 3; k < 4; k++) {
10157 SpMMMicrokernelTester()
10158 .mr(4)
10159 .nr(1)
10160 .m(4)
10161 .n(1)
10162 .k(k)
10163 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010164 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010165 }
10166 }
10167
Frank Barchardbeca6522020-10-30 22:34:35 -070010168 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, k_div_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010169 for (size_t k = 4; k <= 20; k += 2) {
10170 SpMMMicrokernelTester()
10171 .mr(4)
10172 .nr(1)
10173 .m(4)
10174 .n(1)
10175 .k(k)
10176 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010177 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010178 }
10179 }
10180
Frank Barchardbeca6522020-10-30 22:34:35 -070010181 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010182 for (uint32_t n = 2; n < 10; n++) {
10183 for (size_t k = 1; k <= 10; k += 3) {
10184 SpMMMicrokernelTester()
10185 .mr(4)
10186 .nr(1)
10187 .m(4)
10188 .n(n)
10189 .k(k)
10190 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010191 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010192 }
10193 }
10194 }
10195
Frank Barchardbeca6522020-10-30 22:34:35 -070010196 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, m_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010197 for (uint32_t m = 1; m < 4; m++) {
10198 for (uint32_t n = 1; n < 10; n += 2) {
10199 for (size_t k = 1; k <= 10; k += 3) {
10200 SpMMMicrokernelTester()
10201 .mr(4)
10202 .nr(1)
10203 .m(m)
10204 .n(n)
10205 .k(k)
10206 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010207 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010208 }
10209 }
10210 }
10211 }
10212
Frank Barchardbeca6522020-10-30 22:34:35 -070010213 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, m_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010214 for (uint32_t m = 8; m <= 12; m += 4) {
10215 for (uint32_t n = 1; n < 10; n += 2) {
10216 for (size_t k = 1; k <= 10; k += 3) {
10217 SpMMMicrokernelTester()
10218 .mr(4)
10219 .nr(1)
10220 .m(m)
10221 .n(n)
10222 .k(k)
10223 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010224 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010225 }
10226 }
10227 }
10228 }
10229
Frank Barchardbeca6522020-10-30 22:34:35 -070010230 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, m_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010231 for (uint32_t m = 5; m < 8; m++) {
10232 for (uint32_t n = 1; n < 10; n += 2) {
10233 for (size_t k = 1; k <= 10; k += 3) {
10234 SpMMMicrokernelTester()
10235 .mr(4)
10236 .nr(1)
10237 .m(m)
10238 .n(n)
10239 .k(k)
10240 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010241 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010242 }
10243 }
10244 }
10245 }
10246
Marat Dukhane8bfcc82020-11-16 12:28:13 -080010247 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, output_stride) {
10248 for (uint32_t n = 1; n < 10; n += 2) {
10249 for (size_t k = 1; k <= 10; k += 3) {
10250 SpMMMicrokernelTester()
10251 .mr(4)
10252 .nr(1)
10253 .m(8)
10254 .n(n)
10255 .k(k)
10256 .output_stride(11)
10257 .sparsity(0.0f)
10258 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
10259 }
10260 }
10261 }
10262
Frank Barchardbeca6522020-10-30 22:34:35 -070010263 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010264 for (uint32_t n = 1; n < 10; n += 2) {
10265 for (size_t k = 1; k <= 10; k += 3) {
10266 SpMMMicrokernelTester()
10267 .mr(4)
10268 .nr(1)
10269 .m(8)
10270 .n(n)
10271 .k(k)
10272 .sparsity(0.0f)
10273 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010274 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010275 }
10276 }
10277 }
10278
Frank Barchardbeca6522020-10-30 22:34:35 -070010279 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010280 for (uint32_t n = 1; n < 10; n += 2) {
10281 for (size_t k = 1; k <= 10; k += 3) {
10282 SpMMMicrokernelTester()
10283 .mr(4)
10284 .nr(1)
10285 .m(8)
10286 .n(n)
10287 .k(k)
10288 .sparsity(0.0f)
10289 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010290 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010291 }
10292 }
10293 }
10294
Frank Barchardbeca6522020-10-30 22:34:35 -070010295 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010296 for (uint32_t n = 1; n < 10; n += 2) {
10297 for (size_t k = 1; k <= 10; k += 3) {
10298 SpMMMicrokernelTester()
10299 .mr(4)
10300 .nr(1)
10301 .m(8)
10302 .n(n)
10303 .k(k)
10304 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010305 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010306 }
10307 }
10308 }
10309
Frank Barchardbeca6522020-10-30 22:34:35 -070010310 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X2, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010311 for (uint32_t n = 1; n < 10; n += 2) {
10312 for (size_t k = 1; k <= 10; k += 3) {
10313 SpMMMicrokernelTester()
10314 .mr(4)
10315 .nr(1)
10316 .m(8)
10317 .n(n)
10318 .k(k)
10319 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010320 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010321 }
10322 }
10323 }
10324#endif // XNN_ARCH_WASMSIMD
10325
10326
10327#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070010328 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, k_eq_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010329 SpMMMicrokernelTester()
10330 .mr(8)
10331 .nr(1)
10332 .m(8)
10333 .n(1)
10334 .k(2)
10335 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010336 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010337 }
10338
Frank Barchardbeca6522020-10-30 22:34:35 -070010339 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, k_lt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010340 for (size_t k = 1; k < 2; k++) {
10341 SpMMMicrokernelTester()
10342 .mr(8)
10343 .nr(1)
10344 .m(8)
10345 .n(1)
10346 .k(k)
10347 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010348 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010349 }
10350 }
10351
Frank Barchardbeca6522020-10-30 22:34:35 -070010352 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, k_gt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010353 for (size_t k = 3; k < 4; k++) {
10354 SpMMMicrokernelTester()
10355 .mr(8)
10356 .nr(1)
10357 .m(8)
10358 .n(1)
10359 .k(k)
10360 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010361 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010362 }
10363 }
10364
Frank Barchardbeca6522020-10-30 22:34:35 -070010365 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, k_div_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010366 for (size_t k = 4; k <= 20; k += 2) {
10367 SpMMMicrokernelTester()
10368 .mr(8)
10369 .nr(1)
10370 .m(8)
10371 .n(1)
10372 .k(k)
10373 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010374 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010375 }
10376 }
10377
Frank Barchardbeca6522020-10-30 22:34:35 -070010378 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010379 for (uint32_t n = 2; n < 10; n++) {
10380 for (size_t k = 1; k <= 10; k += 3) {
10381 SpMMMicrokernelTester()
10382 .mr(8)
10383 .nr(1)
10384 .m(8)
10385 .n(n)
10386 .k(k)
10387 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010388 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010389 }
10390 }
10391 }
10392
Frank Barchardbeca6522020-10-30 22:34:35 -070010393 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, m_lt_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010394 for (uint32_t m = 1; m < 8; m++) {
10395 for (uint32_t n = 1; n < 10; n += 2) {
10396 for (size_t k = 1; k <= 10; k += 3) {
10397 SpMMMicrokernelTester()
10398 .mr(8)
10399 .nr(1)
10400 .m(m)
10401 .n(n)
10402 .k(k)
10403 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010404 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010405 }
10406 }
10407 }
10408 }
10409
Frank Barchardbeca6522020-10-30 22:34:35 -070010410 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, m_div_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010411 for (uint32_t m = 16; m <= 24; m += 8) {
10412 for (uint32_t n = 1; n < 10; n += 2) {
10413 for (size_t k = 1; k <= 10; k += 3) {
10414 SpMMMicrokernelTester()
10415 .mr(8)
10416 .nr(1)
10417 .m(m)
10418 .n(n)
10419 .k(k)
10420 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010421 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010422 }
10423 }
10424 }
10425 }
10426
Frank Barchardbeca6522020-10-30 22:34:35 -070010427 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, m_gt_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010428 for (uint32_t m = 9; m < 16; m++) {
10429 for (uint32_t n = 1; n < 10; n += 2) {
10430 for (size_t k = 1; k <= 10; k += 3) {
10431 SpMMMicrokernelTester()
10432 .mr(8)
10433 .nr(1)
10434 .m(m)
10435 .n(n)
10436 .k(k)
10437 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010438 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010439 }
10440 }
10441 }
10442 }
10443
Marat Dukhane8bfcc82020-11-16 12:28:13 -080010444 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, output_stride) {
10445 for (uint32_t n = 1; n < 10; n += 2) {
10446 for (size_t k = 1; k <= 10; k += 3) {
10447 SpMMMicrokernelTester()
10448 .mr(8)
10449 .nr(1)
10450 .m(16)
10451 .n(n)
10452 .k(k)
10453 .output_stride(19)
10454 .sparsity(0.0f)
10455 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
10456 }
10457 }
10458 }
10459
Frank Barchardbeca6522020-10-30 22:34:35 -070010460 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010461 for (uint32_t n = 1; n < 10; n += 2) {
10462 for (size_t k = 1; k <= 10; k += 3) {
10463 SpMMMicrokernelTester()
10464 .mr(8)
10465 .nr(1)
10466 .m(16)
10467 .n(n)
10468 .k(k)
10469 .sparsity(0.0f)
10470 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010471 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010472 }
10473 }
10474 }
10475
Frank Barchardbeca6522020-10-30 22:34:35 -070010476 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010477 for (uint32_t n = 1; n < 10; n += 2) {
10478 for (size_t k = 1; k <= 10; k += 3) {
10479 SpMMMicrokernelTester()
10480 .mr(8)
10481 .nr(1)
10482 .m(16)
10483 .n(n)
10484 .k(k)
10485 .sparsity(0.0f)
10486 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010487 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010488 }
10489 }
10490 }
10491
Frank Barchardbeca6522020-10-30 22:34:35 -070010492 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010493 for (uint32_t n = 1; n < 10; n += 2) {
10494 for (size_t k = 1; k <= 10; k += 3) {
10495 SpMMMicrokernelTester()
10496 .mr(8)
10497 .nr(1)
10498 .m(16)
10499 .n(n)
10500 .k(k)
10501 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010502 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010503 }
10504 }
10505 }
10506
Frank Barchardbeca6522020-10-30 22:34:35 -070010507 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X2, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010508 for (uint32_t n = 1; n < 10; n += 2) {
10509 for (size_t k = 1; k <= 10; k += 3) {
10510 SpMMMicrokernelTester()
10511 .mr(8)
10512 .nr(1)
10513 .m(16)
10514 .n(n)
10515 .k(k)
10516 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010517 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010518 }
10519 }
10520 }
10521#endif // XNN_ARCH_WASMSIMD
10522
10523
10524#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070010525 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, k_eq_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010526 SpMMMicrokernelTester()
10527 .mr(16)
10528 .nr(1)
10529 .m(16)
10530 .n(1)
10531 .k(2)
10532 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010533 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010534 }
10535
Frank Barchardbeca6522020-10-30 22:34:35 -070010536 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, k_lt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010537 for (size_t k = 1; k < 2; k++) {
10538 SpMMMicrokernelTester()
10539 .mr(16)
10540 .nr(1)
10541 .m(16)
10542 .n(1)
10543 .k(k)
10544 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010545 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010546 }
10547 }
10548
Frank Barchardbeca6522020-10-30 22:34:35 -070010549 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, k_gt_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010550 for (size_t k = 3; k < 4; k++) {
10551 SpMMMicrokernelTester()
10552 .mr(16)
10553 .nr(1)
10554 .m(16)
10555 .n(1)
10556 .k(k)
10557 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010558 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010559 }
10560 }
10561
Frank Barchardbeca6522020-10-30 22:34:35 -070010562 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, k_div_2) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010563 for (size_t k = 4; k <= 20; k += 2) {
10564 SpMMMicrokernelTester()
10565 .mr(16)
10566 .nr(1)
10567 .m(16)
10568 .n(1)
10569 .k(k)
10570 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010571 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010572 }
10573 }
10574
Frank Barchardbeca6522020-10-30 22:34:35 -070010575 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010576 for (uint32_t n = 2; n < 10; n++) {
10577 for (size_t k = 1; k <= 10; k += 3) {
10578 SpMMMicrokernelTester()
10579 .mr(16)
10580 .nr(1)
10581 .m(16)
10582 .n(n)
10583 .k(k)
10584 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010585 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010586 }
10587 }
10588 }
10589
Frank Barchardbeca6522020-10-30 22:34:35 -070010590 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, m_lt_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010591 for (uint32_t m = 1; m < 16; m++) {
10592 for (uint32_t n = 1; n < 10; n += 2) {
10593 for (size_t k = 1; k <= 10; k += 3) {
10594 SpMMMicrokernelTester()
10595 .mr(16)
10596 .nr(1)
10597 .m(m)
10598 .n(n)
10599 .k(k)
10600 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010601 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010602 }
10603 }
10604 }
10605 }
10606
Frank Barchardbeca6522020-10-30 22:34:35 -070010607 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, m_div_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010608 for (uint32_t m = 32; m <= 48; m += 16) {
10609 for (uint32_t n = 1; n < 10; n += 2) {
10610 for (size_t k = 1; k <= 10; k += 3) {
10611 SpMMMicrokernelTester()
10612 .mr(16)
10613 .nr(1)
10614 .m(m)
10615 .n(n)
10616 .k(k)
10617 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010618 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010619 }
10620 }
10621 }
10622 }
10623
Frank Barchardbeca6522020-10-30 22:34:35 -070010624 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, m_gt_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010625 for (uint32_t m = 17; m < 32; m++) {
10626 for (uint32_t n = 1; n < 10; n += 2) {
10627 for (size_t k = 1; k <= 10; k += 3) {
10628 SpMMMicrokernelTester()
10629 .mr(16)
10630 .nr(1)
10631 .m(m)
10632 .n(n)
10633 .k(k)
10634 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010635 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010636 }
10637 }
10638 }
10639 }
10640
Marat Dukhane8bfcc82020-11-16 12:28:13 -080010641 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, output_stride) {
10642 for (uint32_t n = 1; n < 10; n += 2) {
10643 for (size_t k = 1; k <= 10; k += 3) {
10644 SpMMMicrokernelTester()
10645 .mr(16)
10646 .nr(1)
10647 .m(32)
10648 .n(n)
10649 .k(k)
10650 .output_stride(37)
10651 .sparsity(0.0f)
10652 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
10653 }
10654 }
10655 }
10656
Frank Barchardbeca6522020-10-30 22:34:35 -070010657 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010658 for (uint32_t n = 1; n < 10; n += 2) {
10659 for (size_t k = 1; k <= 10; k += 3) {
10660 SpMMMicrokernelTester()
10661 .mr(16)
10662 .nr(1)
10663 .m(32)
10664 .n(n)
10665 .k(k)
10666 .sparsity(0.0f)
10667 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010668 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010669 }
10670 }
10671 }
10672
Frank Barchardbeca6522020-10-30 22:34:35 -070010673 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010674 for (uint32_t n = 1; n < 10; n += 2) {
10675 for (size_t k = 1; k <= 10; k += 3) {
10676 SpMMMicrokernelTester()
10677 .mr(16)
10678 .nr(1)
10679 .m(32)
10680 .n(n)
10681 .k(k)
10682 .sparsity(0.0f)
10683 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010684 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010685 }
10686 }
10687 }
10688
Frank Barchardbeca6522020-10-30 22:34:35 -070010689 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010690 for (uint32_t n = 1; n < 10; n += 2) {
10691 for (size_t k = 1; k <= 10; k += 3) {
10692 SpMMMicrokernelTester()
10693 .mr(16)
10694 .nr(1)
10695 .m(32)
10696 .n(n)
10697 .k(k)
10698 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010699 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010700 }
10701 }
10702 }
10703
Frank Barchardbeca6522020-10-30 22:34:35 -070010704 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X2, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010705 for (uint32_t n = 1; n < 10; n += 2) {
10706 for (size_t k = 1; k <= 10; k += 3) {
10707 SpMMMicrokernelTester()
10708 .mr(16)
10709 .nr(1)
10710 .m(32)
10711 .n(n)
10712 .k(k)
10713 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010714 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x2);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010715 }
10716 }
10717 }
10718#endif // XNN_ARCH_WASMSIMD
10719
10720
10721#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070010722 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, k_eq_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010723 SpMMMicrokernelTester()
10724 .mr(32)
10725 .nr(1)
10726 .m(32)
10727 .n(1)
10728 .k(2)
10729 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010730 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010731 }
10732
Frank Barchardbeca6522020-10-30 22:34:35 -070010733 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, k_lt_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010734 for (size_t k = 1; k < 2; k++) {
10735 SpMMMicrokernelTester()
10736 .mr(32)
10737 .nr(1)
10738 .m(32)
10739 .n(1)
10740 .k(k)
10741 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010742 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010743 }
10744 }
10745
Frank Barchardbeca6522020-10-30 22:34:35 -070010746 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, k_gt_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010747 for (size_t k = 3; k < 4; k++) {
10748 SpMMMicrokernelTester()
10749 .mr(32)
10750 .nr(1)
10751 .m(32)
10752 .n(1)
10753 .k(k)
10754 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010755 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010756 }
10757 }
10758
Frank Barchardbeca6522020-10-30 22:34:35 -070010759 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, k_div_2) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010760 for (size_t k = 4; k <= 20; k += 2) {
10761 SpMMMicrokernelTester()
10762 .mr(32)
10763 .nr(1)
10764 .m(32)
10765 .n(1)
10766 .k(k)
10767 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010768 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010769 }
10770 }
10771
Frank Barchardbeca6522020-10-30 22:34:35 -070010772 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, n_gt_1) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010773 for (uint32_t n = 2; n < 10; n++) {
10774 for (size_t k = 1; k <= 10; k += 3) {
10775 SpMMMicrokernelTester()
10776 .mr(32)
10777 .nr(1)
10778 .m(32)
10779 .n(n)
10780 .k(k)
10781 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010782 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010783 }
10784 }
10785 }
10786
Frank Barchardbeca6522020-10-30 22:34:35 -070010787 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, m_lt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010788 for (uint32_t m = 1; m < 32; m++) {
10789 for (uint32_t n = 1; n < 10; n += 2) {
10790 for (size_t k = 1; k <= 10; k += 3) {
10791 SpMMMicrokernelTester()
10792 .mr(32)
10793 .nr(1)
10794 .m(m)
10795 .n(n)
10796 .k(k)
10797 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010798 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010799 }
10800 }
10801 }
10802 }
10803
Frank Barchardbeca6522020-10-30 22:34:35 -070010804 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, m_div_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010805 for (uint32_t m = 64; m <= 96; m += 32) {
10806 for (uint32_t n = 1; n < 10; n += 2) {
10807 for (size_t k = 1; k <= 10; k += 3) {
10808 SpMMMicrokernelTester()
10809 .mr(32)
10810 .nr(1)
10811 .m(m)
10812 .n(n)
10813 .k(k)
10814 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010815 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010816 }
10817 }
10818 }
10819 }
10820
Frank Barchardbeca6522020-10-30 22:34:35 -070010821 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, m_gt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010822 for (uint32_t m = 33; m < 64; m++) {
10823 for (uint32_t n = 1; n < 10; n += 2) {
10824 for (size_t k = 1; k <= 10; k += 3) {
10825 SpMMMicrokernelTester()
10826 .mr(32)
10827 .nr(1)
10828 .m(m)
10829 .n(n)
10830 .k(k)
10831 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010832 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010833 }
10834 }
10835 }
10836 }
10837
Marat Dukhane8bfcc82020-11-16 12:28:13 -080010838 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, output_stride) {
10839 for (uint32_t n = 1; n < 10; n += 2) {
10840 for (size_t k = 1; k <= 10; k += 3) {
10841 SpMMMicrokernelTester()
10842 .mr(32)
10843 .nr(1)
10844 .m(64)
10845 .n(n)
10846 .k(k)
10847 .output_stride(67)
10848 .sparsity(0.0f)
10849 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
10850 }
10851 }
10852 }
10853
Frank Barchardbeca6522020-10-30 22:34:35 -070010854 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, qmin) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010855 for (uint32_t n = 1; n < 10; n += 2) {
10856 for (size_t k = 1; k <= 10; k += 3) {
10857 SpMMMicrokernelTester()
10858 .mr(32)
10859 .nr(1)
10860 .m(64)
10861 .n(n)
10862 .k(k)
10863 .sparsity(0.0f)
10864 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010865 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010866 }
10867 }
10868 }
10869
Frank Barchardbeca6522020-10-30 22:34:35 -070010870 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, qmax) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010871 for (uint32_t n = 1; n < 10; n += 2) {
10872 for (size_t k = 1; k <= 10; k += 3) {
10873 SpMMMicrokernelTester()
10874 .mr(32)
10875 .nr(1)
10876 .m(64)
10877 .n(n)
10878 .k(k)
10879 .sparsity(0.0f)
10880 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070010881 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010882 }
10883 }
10884 }
10885
Frank Barchardbeca6522020-10-30 22:34:35 -070010886 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, half_sparse) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010887 for (uint32_t n = 1; n < 10; n += 2) {
10888 for (size_t k = 1; k <= 10; k += 3) {
10889 SpMMMicrokernelTester()
10890 .mr(32)
10891 .nr(1)
10892 .m(64)
10893 .n(n)
10894 .k(k)
10895 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010896 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010897 }
10898 }
10899 }
10900
Frank Barchardbeca6522020-10-30 22:34:35 -070010901 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X2, zero_weights) {
Frank Barchard846c0c62020-10-26 15:01:39 -070010902 for (uint32_t n = 1; n < 10; n += 2) {
10903 for (size_t k = 1; k <= 10; k += 3) {
10904 SpMMMicrokernelTester()
10905 .mr(32)
10906 .nr(1)
10907 .m(64)
10908 .n(n)
10909 .k(k)
10910 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010911 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2);
Frank Barchard846c0c62020-10-26 15:01:39 -070010912 }
10913 }
10914 }
10915#endif // XNN_ARCH_WASMSIMD
10916
10917
10918#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070010919 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, k_eq_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010920 SpMMMicrokernelTester()
10921 .mr(4)
10922 .nr(1)
10923 .m(4)
10924 .n(1)
10925 .k(4)
10926 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010927 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010928 }
10929
Frank Barchardbeca6522020-10-30 22:34:35 -070010930 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, k_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010931 for (size_t k = 1; k < 4; k++) {
10932 SpMMMicrokernelTester()
10933 .mr(4)
10934 .nr(1)
10935 .m(4)
10936 .n(1)
10937 .k(k)
10938 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010939 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010940 }
10941 }
10942
Frank Barchardbeca6522020-10-30 22:34:35 -070010943 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, k_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010944 for (size_t k = 5; k < 8; k++) {
10945 SpMMMicrokernelTester()
10946 .mr(4)
10947 .nr(1)
10948 .m(4)
10949 .n(1)
10950 .k(k)
10951 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010952 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010953 }
10954 }
10955
Frank Barchardbeca6522020-10-30 22:34:35 -070010956 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, k_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010957 for (size_t k = 8; k <= 40; k += 4) {
10958 SpMMMicrokernelTester()
10959 .mr(4)
10960 .nr(1)
10961 .m(4)
10962 .n(1)
10963 .k(k)
10964 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010965 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010966 }
10967 }
10968
Frank Barchardbeca6522020-10-30 22:34:35 -070010969 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010970 for (uint32_t n = 2; n < 10; n++) {
10971 for (size_t k = 1; k <= 20; k += 5) {
10972 SpMMMicrokernelTester()
10973 .mr(4)
10974 .nr(1)
10975 .m(4)
10976 .n(n)
10977 .k(k)
10978 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010979 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010980 }
10981 }
10982 }
10983
Frank Barchardbeca6522020-10-30 22:34:35 -070010984 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, m_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070010985 for (uint32_t m = 1; m < 4; m++) {
10986 for (uint32_t n = 1; n < 10; n += 2) {
10987 for (size_t k = 1; k <= 20; k += 5) {
10988 SpMMMicrokernelTester()
10989 .mr(4)
10990 .nr(1)
10991 .m(m)
10992 .n(n)
10993 .k(k)
10994 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070010995 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070010996 }
10997 }
10998 }
10999 }
11000
Frank Barchardbeca6522020-10-30 22:34:35 -070011001 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, m_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011002 for (uint32_t m = 8; m <= 12; m += 4) {
11003 for (uint32_t n = 1; n < 10; n += 2) {
11004 for (size_t k = 1; k <= 20; k += 5) {
11005 SpMMMicrokernelTester()
11006 .mr(4)
11007 .nr(1)
11008 .m(m)
11009 .n(n)
11010 .k(k)
11011 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011012 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011013 }
11014 }
11015 }
11016 }
11017
Frank Barchardbeca6522020-10-30 22:34:35 -070011018 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, m_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011019 for (uint32_t m = 5; m < 8; m++) {
11020 for (uint32_t n = 1; n < 10; n += 2) {
11021 for (size_t k = 1; k <= 20; k += 5) {
11022 SpMMMicrokernelTester()
11023 .mr(4)
11024 .nr(1)
11025 .m(m)
11026 .n(n)
11027 .k(k)
11028 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011029 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011030 }
11031 }
11032 }
11033 }
11034
Marat Dukhane8bfcc82020-11-16 12:28:13 -080011035 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, output_stride) {
11036 for (uint32_t n = 1; n < 10; n += 2) {
11037 for (size_t k = 1; k <= 20; k += 5) {
11038 SpMMMicrokernelTester()
11039 .mr(4)
11040 .nr(1)
11041 .m(8)
11042 .n(n)
11043 .k(k)
11044 .output_stride(11)
11045 .sparsity(0.0f)
11046 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
11047 }
11048 }
11049 }
11050
Frank Barchardbeca6522020-10-30 22:34:35 -070011051 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011052 for (uint32_t n = 1; n < 10; n += 2) {
11053 for (size_t k = 1; k <= 20; k += 5) {
11054 SpMMMicrokernelTester()
11055 .mr(4)
11056 .nr(1)
11057 .m(8)
11058 .n(n)
11059 .k(k)
11060 .sparsity(0.0f)
11061 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011062 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011063 }
11064 }
11065 }
11066
Frank Barchardbeca6522020-10-30 22:34:35 -070011067 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011068 for (uint32_t n = 1; n < 10; n += 2) {
11069 for (size_t k = 1; k <= 20; k += 5) {
11070 SpMMMicrokernelTester()
11071 .mr(4)
11072 .nr(1)
11073 .m(8)
11074 .n(n)
11075 .k(k)
11076 .sparsity(0.0f)
11077 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011078 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011079 }
11080 }
11081 }
11082
Frank Barchardbeca6522020-10-30 22:34:35 -070011083 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011084 for (uint32_t n = 1; n < 10; n += 2) {
11085 for (size_t k = 1; k <= 20; k += 5) {
11086 SpMMMicrokernelTester()
11087 .mr(4)
11088 .nr(1)
11089 .m(8)
11090 .n(n)
11091 .k(k)
11092 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011093 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011094 }
11095 }
11096 }
11097
Frank Barchardbeca6522020-10-30 22:34:35 -070011098 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_X4, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011099 for (uint32_t n = 1; n < 10; n += 2) {
11100 for (size_t k = 1; k <= 20; k += 5) {
11101 SpMMMicrokernelTester()
11102 .mr(4)
11103 .nr(1)
11104 .m(8)
11105 .n(n)
11106 .k(k)
11107 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011108 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011109 }
11110 }
11111 }
11112#endif // XNN_ARCH_WASMSIMD
11113
11114
11115#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070011116 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, k_eq_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011117 SpMMMicrokernelTester()
11118 .mr(8)
11119 .nr(1)
11120 .m(8)
11121 .n(1)
11122 .k(4)
11123 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011124 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011125 }
11126
Frank Barchardbeca6522020-10-30 22:34:35 -070011127 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, k_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011128 for (size_t k = 1; k < 4; k++) {
11129 SpMMMicrokernelTester()
11130 .mr(8)
11131 .nr(1)
11132 .m(8)
11133 .n(1)
11134 .k(k)
11135 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011136 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011137 }
11138 }
11139
Frank Barchardbeca6522020-10-30 22:34:35 -070011140 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, k_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011141 for (size_t k = 5; k < 8; k++) {
11142 SpMMMicrokernelTester()
11143 .mr(8)
11144 .nr(1)
11145 .m(8)
11146 .n(1)
11147 .k(k)
11148 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011149 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011150 }
11151 }
11152
Frank Barchardbeca6522020-10-30 22:34:35 -070011153 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, k_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011154 for (size_t k = 8; k <= 40; k += 4) {
11155 SpMMMicrokernelTester()
11156 .mr(8)
11157 .nr(1)
11158 .m(8)
11159 .n(1)
11160 .k(k)
11161 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011162 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011163 }
11164 }
11165
Frank Barchardbeca6522020-10-30 22:34:35 -070011166 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011167 for (uint32_t n = 2; n < 10; n++) {
11168 for (size_t k = 1; k <= 20; k += 5) {
11169 SpMMMicrokernelTester()
11170 .mr(8)
11171 .nr(1)
11172 .m(8)
11173 .n(n)
11174 .k(k)
11175 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011176 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011177 }
11178 }
11179 }
11180
Frank Barchardbeca6522020-10-30 22:34:35 -070011181 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, m_lt_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011182 for (uint32_t m = 1; m < 8; m++) {
11183 for (uint32_t n = 1; n < 10; n += 2) {
11184 for (size_t k = 1; k <= 20; k += 5) {
11185 SpMMMicrokernelTester()
11186 .mr(8)
11187 .nr(1)
11188 .m(m)
11189 .n(n)
11190 .k(k)
11191 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011192 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011193 }
11194 }
11195 }
11196 }
11197
Frank Barchardbeca6522020-10-30 22:34:35 -070011198 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, m_div_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011199 for (uint32_t m = 16; m <= 24; m += 8) {
11200 for (uint32_t n = 1; n < 10; n += 2) {
11201 for (size_t k = 1; k <= 20; k += 5) {
11202 SpMMMicrokernelTester()
11203 .mr(8)
11204 .nr(1)
11205 .m(m)
11206 .n(n)
11207 .k(k)
11208 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011209 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011210 }
11211 }
11212 }
11213 }
11214
Frank Barchardbeca6522020-10-30 22:34:35 -070011215 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, m_gt_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011216 for (uint32_t m = 9; m < 16; m++) {
11217 for (uint32_t n = 1; n < 10; n += 2) {
11218 for (size_t k = 1; k <= 20; k += 5) {
11219 SpMMMicrokernelTester()
11220 .mr(8)
11221 .nr(1)
11222 .m(m)
11223 .n(n)
11224 .k(k)
11225 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011226 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011227 }
11228 }
11229 }
11230 }
11231
Marat Dukhane8bfcc82020-11-16 12:28:13 -080011232 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, output_stride) {
11233 for (uint32_t n = 1; n < 10; n += 2) {
11234 for (size_t k = 1; k <= 20; k += 5) {
11235 SpMMMicrokernelTester()
11236 .mr(8)
11237 .nr(1)
11238 .m(16)
11239 .n(n)
11240 .k(k)
11241 .output_stride(19)
11242 .sparsity(0.0f)
11243 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
11244 }
11245 }
11246 }
11247
Frank Barchardbeca6522020-10-30 22:34:35 -070011248 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011249 for (uint32_t n = 1; n < 10; n += 2) {
11250 for (size_t k = 1; k <= 20; k += 5) {
11251 SpMMMicrokernelTester()
11252 .mr(8)
11253 .nr(1)
11254 .m(16)
11255 .n(n)
11256 .k(k)
11257 .sparsity(0.0f)
11258 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011259 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011260 }
11261 }
11262 }
11263
Frank Barchardbeca6522020-10-30 22:34:35 -070011264 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011265 for (uint32_t n = 1; n < 10; n += 2) {
11266 for (size_t k = 1; k <= 20; k += 5) {
11267 SpMMMicrokernelTester()
11268 .mr(8)
11269 .nr(1)
11270 .m(16)
11271 .n(n)
11272 .k(k)
11273 .sparsity(0.0f)
11274 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011275 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011276 }
11277 }
11278 }
11279
Frank Barchardbeca6522020-10-30 22:34:35 -070011280 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011281 for (uint32_t n = 1; n < 10; n += 2) {
11282 for (size_t k = 1; k <= 20; k += 5) {
11283 SpMMMicrokernelTester()
11284 .mr(8)
11285 .nr(1)
11286 .m(16)
11287 .n(n)
11288 .k(k)
11289 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011290 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011291 }
11292 }
11293 }
11294
Frank Barchardbeca6522020-10-30 22:34:35 -070011295 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_X4, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011296 for (uint32_t n = 1; n < 10; n += 2) {
11297 for (size_t k = 1; k <= 20; k += 5) {
11298 SpMMMicrokernelTester()
11299 .mr(8)
11300 .nr(1)
11301 .m(16)
11302 .n(n)
11303 .k(k)
11304 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011305 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011306 }
11307 }
11308 }
11309#endif // XNN_ARCH_WASMSIMD
11310
11311
11312#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070011313 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, k_eq_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011314 SpMMMicrokernelTester()
11315 .mr(16)
11316 .nr(1)
11317 .m(16)
11318 .n(1)
11319 .k(4)
11320 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011321 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011322 }
11323
Frank Barchardbeca6522020-10-30 22:34:35 -070011324 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, k_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011325 for (size_t k = 1; k < 4; k++) {
11326 SpMMMicrokernelTester()
11327 .mr(16)
11328 .nr(1)
11329 .m(16)
11330 .n(1)
11331 .k(k)
11332 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011333 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011334 }
11335 }
11336
Frank Barchardbeca6522020-10-30 22:34:35 -070011337 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, k_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011338 for (size_t k = 5; k < 8; k++) {
11339 SpMMMicrokernelTester()
11340 .mr(16)
11341 .nr(1)
11342 .m(16)
11343 .n(1)
11344 .k(k)
11345 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011346 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011347 }
11348 }
11349
Frank Barchardbeca6522020-10-30 22:34:35 -070011350 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, k_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011351 for (size_t k = 8; k <= 40; k += 4) {
11352 SpMMMicrokernelTester()
11353 .mr(16)
11354 .nr(1)
11355 .m(16)
11356 .n(1)
11357 .k(k)
11358 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011359 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011360 }
11361 }
11362
Frank Barchardbeca6522020-10-30 22:34:35 -070011363 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011364 for (uint32_t n = 2; n < 10; n++) {
11365 for (size_t k = 1; k <= 20; k += 5) {
11366 SpMMMicrokernelTester()
11367 .mr(16)
11368 .nr(1)
11369 .m(16)
11370 .n(n)
11371 .k(k)
11372 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011373 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011374 }
11375 }
11376 }
11377
Frank Barchardbeca6522020-10-30 22:34:35 -070011378 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, m_lt_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011379 for (uint32_t m = 1; m < 16; m++) {
11380 for (uint32_t n = 1; n < 10; n += 2) {
11381 for (size_t k = 1; k <= 20; k += 5) {
11382 SpMMMicrokernelTester()
11383 .mr(16)
11384 .nr(1)
11385 .m(m)
11386 .n(n)
11387 .k(k)
11388 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011389 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011390 }
11391 }
11392 }
11393 }
11394
Frank Barchardbeca6522020-10-30 22:34:35 -070011395 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, m_div_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011396 for (uint32_t m = 32; m <= 48; m += 16) {
11397 for (uint32_t n = 1; n < 10; n += 2) {
11398 for (size_t k = 1; k <= 20; k += 5) {
11399 SpMMMicrokernelTester()
11400 .mr(16)
11401 .nr(1)
11402 .m(m)
11403 .n(n)
11404 .k(k)
11405 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011406 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011407 }
11408 }
11409 }
11410 }
11411
Frank Barchardbeca6522020-10-30 22:34:35 -070011412 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, m_gt_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011413 for (uint32_t m = 17; m < 32; m++) {
11414 for (uint32_t n = 1; n < 10; n += 2) {
11415 for (size_t k = 1; k <= 20; k += 5) {
11416 SpMMMicrokernelTester()
11417 .mr(16)
11418 .nr(1)
11419 .m(m)
11420 .n(n)
11421 .k(k)
11422 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011423 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011424 }
11425 }
11426 }
11427 }
11428
Marat Dukhane8bfcc82020-11-16 12:28:13 -080011429 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, output_stride) {
11430 for (uint32_t n = 1; n < 10; n += 2) {
11431 for (size_t k = 1; k <= 20; k += 5) {
11432 SpMMMicrokernelTester()
11433 .mr(16)
11434 .nr(1)
11435 .m(32)
11436 .n(n)
11437 .k(k)
11438 .output_stride(37)
11439 .sparsity(0.0f)
11440 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
11441 }
11442 }
11443 }
11444
Frank Barchardbeca6522020-10-30 22:34:35 -070011445 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011446 for (uint32_t n = 1; n < 10; n += 2) {
11447 for (size_t k = 1; k <= 20; k += 5) {
11448 SpMMMicrokernelTester()
11449 .mr(16)
11450 .nr(1)
11451 .m(32)
11452 .n(n)
11453 .k(k)
11454 .sparsity(0.0f)
11455 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011456 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011457 }
11458 }
11459 }
11460
Frank Barchardbeca6522020-10-30 22:34:35 -070011461 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011462 for (uint32_t n = 1; n < 10; n += 2) {
11463 for (size_t k = 1; k <= 20; k += 5) {
11464 SpMMMicrokernelTester()
11465 .mr(16)
11466 .nr(1)
11467 .m(32)
11468 .n(n)
11469 .k(k)
11470 .sparsity(0.0f)
11471 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011472 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011473 }
11474 }
11475 }
11476
Frank Barchardbeca6522020-10-30 22:34:35 -070011477 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011478 for (uint32_t n = 1; n < 10; n += 2) {
11479 for (size_t k = 1; k <= 20; k += 5) {
11480 SpMMMicrokernelTester()
11481 .mr(16)
11482 .nr(1)
11483 .m(32)
11484 .n(n)
11485 .k(k)
11486 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011487 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011488 }
11489 }
11490 }
11491
Frank Barchardbeca6522020-10-30 22:34:35 -070011492 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_X4, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011493 for (uint32_t n = 1; n < 10; n += 2) {
11494 for (size_t k = 1; k <= 20; k += 5) {
11495 SpMMMicrokernelTester()
11496 .mr(16)
11497 .nr(1)
11498 .m(32)
11499 .n(n)
11500 .k(k)
11501 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011502 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011503 }
11504 }
11505 }
11506#endif // XNN_ARCH_WASMSIMD
11507
11508
11509#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070011510 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, k_eq_4) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011511 SpMMMicrokernelTester()
11512 .mr(32)
11513 .nr(1)
11514 .m(32)
11515 .n(1)
11516 .k(4)
11517 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011518 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011519 }
11520
Frank Barchardbeca6522020-10-30 22:34:35 -070011521 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, k_lt_4) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011522 for (size_t k = 1; k < 4; k++) {
11523 SpMMMicrokernelTester()
11524 .mr(32)
11525 .nr(1)
11526 .m(32)
11527 .n(1)
11528 .k(k)
11529 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011530 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011531 }
11532 }
11533
Frank Barchardbeca6522020-10-30 22:34:35 -070011534 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, k_gt_4) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011535 for (size_t k = 5; k < 8; k++) {
11536 SpMMMicrokernelTester()
11537 .mr(32)
11538 .nr(1)
11539 .m(32)
11540 .n(1)
11541 .k(k)
11542 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011543 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011544 }
11545 }
11546
Frank Barchardbeca6522020-10-30 22:34:35 -070011547 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, k_div_4) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011548 for (size_t k = 8; k <= 40; k += 4) {
11549 SpMMMicrokernelTester()
11550 .mr(32)
11551 .nr(1)
11552 .m(32)
11553 .n(1)
11554 .k(k)
11555 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011556 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011557 }
11558 }
11559
Frank Barchardbeca6522020-10-30 22:34:35 -070011560 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, n_gt_1) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011561 for (uint32_t n = 2; n < 10; n++) {
11562 for (size_t k = 1; k <= 20; k += 5) {
11563 SpMMMicrokernelTester()
11564 .mr(32)
11565 .nr(1)
11566 .m(32)
11567 .n(n)
11568 .k(k)
11569 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011570 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011571 }
11572 }
11573 }
11574
Frank Barchardbeca6522020-10-30 22:34:35 -070011575 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, m_lt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011576 for (uint32_t m = 1; m < 32; m++) {
11577 for (uint32_t n = 1; n < 10; n += 2) {
11578 for (size_t k = 1; k <= 20; k += 5) {
11579 SpMMMicrokernelTester()
11580 .mr(32)
11581 .nr(1)
11582 .m(m)
11583 .n(n)
11584 .k(k)
11585 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011586 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011587 }
11588 }
11589 }
11590 }
11591
Frank Barchardbeca6522020-10-30 22:34:35 -070011592 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, m_div_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011593 for (uint32_t m = 64; m <= 96; m += 32) {
11594 for (uint32_t n = 1; n < 10; n += 2) {
11595 for (size_t k = 1; k <= 20; k += 5) {
11596 SpMMMicrokernelTester()
11597 .mr(32)
11598 .nr(1)
11599 .m(m)
11600 .n(n)
11601 .k(k)
11602 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011603 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011604 }
11605 }
11606 }
11607 }
11608
Frank Barchardbeca6522020-10-30 22:34:35 -070011609 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, m_gt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011610 for (uint32_t m = 33; m < 64; m++) {
11611 for (uint32_t n = 1; n < 10; n += 2) {
11612 for (size_t k = 1; k <= 20; k += 5) {
11613 SpMMMicrokernelTester()
11614 .mr(32)
11615 .nr(1)
11616 .m(m)
11617 .n(n)
11618 .k(k)
11619 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011620 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011621 }
11622 }
11623 }
11624 }
11625
Marat Dukhane8bfcc82020-11-16 12:28:13 -080011626 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, output_stride) {
11627 for (uint32_t n = 1; n < 10; n += 2) {
11628 for (size_t k = 1; k <= 20; k += 5) {
11629 SpMMMicrokernelTester()
11630 .mr(32)
11631 .nr(1)
11632 .m(64)
11633 .n(n)
11634 .k(k)
11635 .output_stride(67)
11636 .sparsity(0.0f)
11637 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
11638 }
11639 }
11640 }
11641
Frank Barchardbeca6522020-10-30 22:34:35 -070011642 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, qmin) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011643 for (uint32_t n = 1; n < 10; n += 2) {
11644 for (size_t k = 1; k <= 20; k += 5) {
11645 SpMMMicrokernelTester()
11646 .mr(32)
11647 .nr(1)
11648 .m(64)
11649 .n(n)
11650 .k(k)
11651 .sparsity(0.0f)
11652 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011653 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011654 }
11655 }
11656 }
11657
Frank Barchardbeca6522020-10-30 22:34:35 -070011658 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, qmax) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011659 for (uint32_t n = 1; n < 10; n += 2) {
11660 for (size_t k = 1; k <= 20; k += 5) {
11661 SpMMMicrokernelTester()
11662 .mr(32)
11663 .nr(1)
11664 .m(64)
11665 .n(n)
11666 .k(k)
11667 .sparsity(0.0f)
11668 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011669 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011670 }
11671 }
11672 }
11673
Frank Barchardbeca6522020-10-30 22:34:35 -070011674 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, half_sparse) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011675 for (uint32_t n = 1; n < 10; n += 2) {
11676 for (size_t k = 1; k <= 20; k += 5) {
11677 SpMMMicrokernelTester()
11678 .mr(32)
11679 .nr(1)
11680 .m(64)
11681 .n(n)
11682 .k(k)
11683 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011684 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011685 }
11686 }
11687 }
11688
Frank Barchardbeca6522020-10-30 22:34:35 -070011689 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_X4, zero_weights) {
Frank Barchard846c0c62020-10-26 15:01:39 -070011690 for (uint32_t n = 1; n < 10; n += 2) {
11691 for (size_t k = 1; k <= 20; k += 5) {
11692 SpMMMicrokernelTester()
11693 .mr(32)
11694 .nr(1)
11695 .m(64)
11696 .n(n)
11697 .k(k)
11698 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011699 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070011700 }
11701 }
11702 }
11703#endif // XNN_ARCH_WASMSIMD
11704
11705
11706#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070011707 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, k_eq_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011708 SpMMMicrokernelTester()
11709 .mr(4)
11710 .nr(1)
11711 .m(4)
11712 .n(1)
11713 .k(4)
11714 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011715 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011716 }
11717
Frank Barchardbeca6522020-10-30 22:34:35 -070011718 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, k_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011719 for (size_t k = 1; k < 4; k++) {
11720 SpMMMicrokernelTester()
11721 .mr(4)
11722 .nr(1)
11723 .m(4)
11724 .n(1)
11725 .k(k)
11726 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011727 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011728 }
11729 }
11730
Frank Barchardbeca6522020-10-30 22:34:35 -070011731 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, k_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011732 for (size_t k = 5; k < 8; k++) {
11733 SpMMMicrokernelTester()
11734 .mr(4)
11735 .nr(1)
11736 .m(4)
11737 .n(1)
11738 .k(k)
11739 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011740 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011741 }
11742 }
11743
Frank Barchardbeca6522020-10-30 22:34:35 -070011744 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, k_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011745 for (size_t k = 8; k <= 40; k += 4) {
11746 SpMMMicrokernelTester()
11747 .mr(4)
11748 .nr(1)
11749 .m(4)
11750 .n(1)
11751 .k(k)
11752 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011753 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011754 }
11755 }
11756
Frank Barchardbeca6522020-10-30 22:34:35 -070011757 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011758 for (uint32_t n = 2; n < 10; n++) {
11759 for (size_t k = 1; k <= 20; k += 5) {
11760 SpMMMicrokernelTester()
11761 .mr(4)
11762 .nr(1)
11763 .m(4)
11764 .n(n)
11765 .k(k)
11766 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011767 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011768 }
11769 }
11770 }
11771
Frank Barchardbeca6522020-10-30 22:34:35 -070011772 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, m_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011773 for (uint32_t m = 1; m < 4; m++) {
11774 for (uint32_t n = 1; n < 10; n += 2) {
11775 for (size_t k = 1; k <= 20; k += 5) {
11776 SpMMMicrokernelTester()
11777 .mr(4)
11778 .nr(1)
11779 .m(m)
11780 .n(n)
11781 .k(k)
11782 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011783 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011784 }
11785 }
11786 }
11787 }
11788
Frank Barchardbeca6522020-10-30 22:34:35 -070011789 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, m_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011790 for (uint32_t m = 8; m <= 12; m += 4) {
11791 for (uint32_t n = 1; n < 10; n += 2) {
11792 for (size_t k = 1; k <= 20; k += 5) {
11793 SpMMMicrokernelTester()
11794 .mr(4)
11795 .nr(1)
11796 .m(m)
11797 .n(n)
11798 .k(k)
11799 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011800 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011801 }
11802 }
11803 }
11804 }
11805
Frank Barchardbeca6522020-10-30 22:34:35 -070011806 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, m_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011807 for (uint32_t m = 5; m < 8; m++) {
11808 for (uint32_t n = 1; n < 10; n += 2) {
11809 for (size_t k = 1; k <= 20; k += 5) {
11810 SpMMMicrokernelTester()
11811 .mr(4)
11812 .nr(1)
11813 .m(m)
11814 .n(n)
11815 .k(k)
11816 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011817 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011818 }
11819 }
11820 }
11821 }
11822
Marat Dukhane8bfcc82020-11-16 12:28:13 -080011823 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, output_stride) {
11824 for (uint32_t n = 1; n < 10; n += 2) {
11825 for (size_t k = 1; k <= 20; k += 5) {
11826 SpMMMicrokernelTester()
11827 .mr(4)
11828 .nr(1)
11829 .m(8)
11830 .n(n)
11831 .k(k)
11832 .output_stride(11)
11833 .sparsity(0.0f)
11834 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
11835 }
11836 }
11837 }
11838
Frank Barchardbeca6522020-10-30 22:34:35 -070011839 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011840 for (uint32_t n = 1; n < 10; n += 2) {
11841 for (size_t k = 1; k <= 20; k += 5) {
11842 SpMMMicrokernelTester()
11843 .mr(4)
11844 .nr(1)
11845 .m(8)
11846 .n(n)
11847 .k(k)
11848 .sparsity(0.0f)
11849 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011850 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011851 }
11852 }
11853 }
11854
Frank Barchardbeca6522020-10-30 22:34:35 -070011855 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011856 for (uint32_t n = 1; n < 10; n += 2) {
11857 for (size_t k = 1; k <= 20; k += 5) {
11858 SpMMMicrokernelTester()
11859 .mr(4)
11860 .nr(1)
11861 .m(8)
11862 .n(n)
11863 .k(k)
11864 .sparsity(0.0f)
11865 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070011866 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011867 }
11868 }
11869 }
11870
Frank Barchardbeca6522020-10-30 22:34:35 -070011871 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011872 for (uint32_t n = 1; n < 10; n += 2) {
11873 for (size_t k = 1; k <= 20; k += 5) {
11874 SpMMMicrokernelTester()
11875 .mr(4)
11876 .nr(1)
11877 .m(8)
11878 .n(n)
11879 .k(k)
11880 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011881 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011882 }
11883 }
11884 }
11885
Frank Barchardbeca6522020-10-30 22:34:35 -070011886 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_X4, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011887 for (uint32_t n = 1; n < 10; n += 2) {
11888 for (size_t k = 1; k <= 20; k += 5) {
11889 SpMMMicrokernelTester()
11890 .mr(4)
11891 .nr(1)
11892 .m(8)
11893 .n(n)
11894 .k(k)
11895 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011896 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011897 }
11898 }
11899 }
11900#endif // XNN_ARCH_WASMSIMD
11901
11902
11903#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070011904 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, k_eq_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011905 SpMMMicrokernelTester()
11906 .mr(8)
11907 .nr(1)
11908 .m(8)
11909 .n(1)
11910 .k(4)
11911 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011912 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011913 }
11914
Frank Barchardbeca6522020-10-30 22:34:35 -070011915 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, k_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011916 for (size_t k = 1; k < 4; k++) {
11917 SpMMMicrokernelTester()
11918 .mr(8)
11919 .nr(1)
11920 .m(8)
11921 .n(1)
11922 .k(k)
11923 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011924 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011925 }
11926 }
11927
Frank Barchardbeca6522020-10-30 22:34:35 -070011928 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, k_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011929 for (size_t k = 5; k < 8; k++) {
11930 SpMMMicrokernelTester()
11931 .mr(8)
11932 .nr(1)
11933 .m(8)
11934 .n(1)
11935 .k(k)
11936 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011937 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011938 }
11939 }
11940
Frank Barchardbeca6522020-10-30 22:34:35 -070011941 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, k_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011942 for (size_t k = 8; k <= 40; k += 4) {
11943 SpMMMicrokernelTester()
11944 .mr(8)
11945 .nr(1)
11946 .m(8)
11947 .n(1)
11948 .k(k)
11949 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011950 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011951 }
11952 }
11953
Frank Barchardbeca6522020-10-30 22:34:35 -070011954 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011955 for (uint32_t n = 2; n < 10; n++) {
11956 for (size_t k = 1; k <= 20; k += 5) {
11957 SpMMMicrokernelTester()
11958 .mr(8)
11959 .nr(1)
11960 .m(8)
11961 .n(n)
11962 .k(k)
11963 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011964 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011965 }
11966 }
11967 }
11968
Frank Barchardbeca6522020-10-30 22:34:35 -070011969 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, m_lt_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011970 for (uint32_t m = 1; m < 8; m++) {
11971 for (uint32_t n = 1; n < 10; n += 2) {
11972 for (size_t k = 1; k <= 20; k += 5) {
11973 SpMMMicrokernelTester()
11974 .mr(8)
11975 .nr(1)
11976 .m(m)
11977 .n(n)
11978 .k(k)
11979 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011980 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011981 }
11982 }
11983 }
11984 }
11985
Frank Barchardbeca6522020-10-30 22:34:35 -070011986 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, m_div_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070011987 for (uint32_t m = 16; m <= 24; m += 8) {
11988 for (uint32_t n = 1; n < 10; n += 2) {
11989 for (size_t k = 1; k <= 20; k += 5) {
11990 SpMMMicrokernelTester()
11991 .mr(8)
11992 .nr(1)
11993 .m(m)
11994 .n(n)
11995 .k(k)
11996 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070011997 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070011998 }
11999 }
12000 }
12001 }
12002
Frank Barchardbeca6522020-10-30 22:34:35 -070012003 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, m_gt_8) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012004 for (uint32_t m = 9; m < 16; m++) {
12005 for (uint32_t n = 1; n < 10; n += 2) {
12006 for (size_t k = 1; k <= 20; k += 5) {
12007 SpMMMicrokernelTester()
12008 .mr(8)
12009 .nr(1)
12010 .m(m)
12011 .n(n)
12012 .k(k)
12013 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012014 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012015 }
12016 }
12017 }
12018 }
12019
Marat Dukhane8bfcc82020-11-16 12:28:13 -080012020 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, output_stride) {
12021 for (uint32_t n = 1; n < 10; n += 2) {
12022 for (size_t k = 1; k <= 20; k += 5) {
12023 SpMMMicrokernelTester()
12024 .mr(8)
12025 .nr(1)
12026 .m(16)
12027 .n(n)
12028 .k(k)
12029 .output_stride(19)
12030 .sparsity(0.0f)
12031 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
12032 }
12033 }
12034 }
12035
Frank Barchardbeca6522020-10-30 22:34:35 -070012036 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012037 for (uint32_t n = 1; n < 10; n += 2) {
12038 for (size_t k = 1; k <= 20; k += 5) {
12039 SpMMMicrokernelTester()
12040 .mr(8)
12041 .nr(1)
12042 .m(16)
12043 .n(n)
12044 .k(k)
12045 .sparsity(0.0f)
12046 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070012047 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012048 }
12049 }
12050 }
12051
Frank Barchardbeca6522020-10-30 22:34:35 -070012052 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012053 for (uint32_t n = 1; n < 10; n += 2) {
12054 for (size_t k = 1; k <= 20; k += 5) {
12055 SpMMMicrokernelTester()
12056 .mr(8)
12057 .nr(1)
12058 .m(16)
12059 .n(n)
12060 .k(k)
12061 .sparsity(0.0f)
12062 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070012063 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012064 }
12065 }
12066 }
12067
Frank Barchardbeca6522020-10-30 22:34:35 -070012068 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012069 for (uint32_t n = 1; n < 10; n += 2) {
12070 for (size_t k = 1; k <= 20; k += 5) {
12071 SpMMMicrokernelTester()
12072 .mr(8)
12073 .nr(1)
12074 .m(16)
12075 .n(n)
12076 .k(k)
12077 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012078 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012079 }
12080 }
12081 }
12082
Frank Barchardbeca6522020-10-30 22:34:35 -070012083 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_X4, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012084 for (uint32_t n = 1; n < 10; n += 2) {
12085 for (size_t k = 1; k <= 20; k += 5) {
12086 SpMMMicrokernelTester()
12087 .mr(8)
12088 .nr(1)
12089 .m(16)
12090 .n(n)
12091 .k(k)
12092 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012093 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012094 }
12095 }
12096 }
12097#endif // XNN_ARCH_WASMSIMD
12098
12099
12100#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070012101 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, k_eq_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012102 SpMMMicrokernelTester()
12103 .mr(16)
12104 .nr(1)
12105 .m(16)
12106 .n(1)
12107 .k(4)
12108 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012109 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012110 }
12111
Frank Barchardbeca6522020-10-30 22:34:35 -070012112 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, k_lt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012113 for (size_t k = 1; k < 4; k++) {
12114 SpMMMicrokernelTester()
12115 .mr(16)
12116 .nr(1)
12117 .m(16)
12118 .n(1)
12119 .k(k)
12120 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012121 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012122 }
12123 }
12124
Frank Barchardbeca6522020-10-30 22:34:35 -070012125 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, k_gt_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012126 for (size_t k = 5; k < 8; k++) {
12127 SpMMMicrokernelTester()
12128 .mr(16)
12129 .nr(1)
12130 .m(16)
12131 .n(1)
12132 .k(k)
12133 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012134 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012135 }
12136 }
12137
Frank Barchardbeca6522020-10-30 22:34:35 -070012138 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, k_div_4) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012139 for (size_t k = 8; k <= 40; k += 4) {
12140 SpMMMicrokernelTester()
12141 .mr(16)
12142 .nr(1)
12143 .m(16)
12144 .n(1)
12145 .k(k)
12146 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012147 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012148 }
12149 }
12150
Frank Barchardbeca6522020-10-30 22:34:35 -070012151 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, n_gt_1) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012152 for (uint32_t n = 2; n < 10; n++) {
12153 for (size_t k = 1; k <= 20; k += 5) {
12154 SpMMMicrokernelTester()
12155 .mr(16)
12156 .nr(1)
12157 .m(16)
12158 .n(n)
12159 .k(k)
12160 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012161 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012162 }
12163 }
12164 }
12165
Frank Barchardbeca6522020-10-30 22:34:35 -070012166 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, m_lt_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012167 for (uint32_t m = 1; m < 16; m++) {
12168 for (uint32_t n = 1; n < 10; n += 2) {
12169 for (size_t k = 1; k <= 20; k += 5) {
12170 SpMMMicrokernelTester()
12171 .mr(16)
12172 .nr(1)
12173 .m(m)
12174 .n(n)
12175 .k(k)
12176 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012177 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012178 }
12179 }
12180 }
12181 }
12182
Frank Barchardbeca6522020-10-30 22:34:35 -070012183 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, m_div_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012184 for (uint32_t m = 32; m <= 48; m += 16) {
12185 for (uint32_t n = 1; n < 10; n += 2) {
12186 for (size_t k = 1; k <= 20; k += 5) {
12187 SpMMMicrokernelTester()
12188 .mr(16)
12189 .nr(1)
12190 .m(m)
12191 .n(n)
12192 .k(k)
12193 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012194 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012195 }
12196 }
12197 }
12198 }
12199
Frank Barchardbeca6522020-10-30 22:34:35 -070012200 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, m_gt_16) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012201 for (uint32_t m = 17; m < 32; m++) {
12202 for (uint32_t n = 1; n < 10; n += 2) {
12203 for (size_t k = 1; k <= 20; k += 5) {
12204 SpMMMicrokernelTester()
12205 .mr(16)
12206 .nr(1)
12207 .m(m)
12208 .n(n)
12209 .k(k)
12210 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012211 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012212 }
12213 }
12214 }
12215 }
12216
Marat Dukhane8bfcc82020-11-16 12:28:13 -080012217 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, output_stride) {
12218 for (uint32_t n = 1; n < 10; n += 2) {
12219 for (size_t k = 1; k <= 20; k += 5) {
12220 SpMMMicrokernelTester()
12221 .mr(16)
12222 .nr(1)
12223 .m(32)
12224 .n(n)
12225 .k(k)
12226 .output_stride(37)
12227 .sparsity(0.0f)
12228 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
12229 }
12230 }
12231 }
12232
Frank Barchardbeca6522020-10-30 22:34:35 -070012233 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, qmin) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012234 for (uint32_t n = 1; n < 10; n += 2) {
12235 for (size_t k = 1; k <= 20; k += 5) {
12236 SpMMMicrokernelTester()
12237 .mr(16)
12238 .nr(1)
12239 .m(32)
12240 .n(n)
12241 .k(k)
12242 .sparsity(0.0f)
12243 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070012244 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012245 }
12246 }
12247 }
12248
Frank Barchardbeca6522020-10-30 22:34:35 -070012249 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, qmax) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012250 for (uint32_t n = 1; n < 10; n += 2) {
12251 for (size_t k = 1; k <= 20; k += 5) {
12252 SpMMMicrokernelTester()
12253 .mr(16)
12254 .nr(1)
12255 .m(32)
12256 .n(n)
12257 .k(k)
12258 .sparsity(0.0f)
12259 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070012260 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012261 }
12262 }
12263 }
12264
Frank Barchardbeca6522020-10-30 22:34:35 -070012265 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, half_sparse) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012266 for (uint32_t n = 1; n < 10; n += 2) {
12267 for (size_t k = 1; k <= 20; k += 5) {
12268 SpMMMicrokernelTester()
12269 .mr(16)
12270 .nr(1)
12271 .m(32)
12272 .n(n)
12273 .k(k)
12274 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012275 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012276 }
12277 }
12278 }
12279
Frank Barchardbeca6522020-10-30 22:34:35 -070012280 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_X4, zero_weights) {
Frank Barchardc451e8a2020-10-21 17:13:12 -070012281 for (uint32_t n = 1; n < 10; n += 2) {
12282 for (size_t k = 1; k <= 20; k += 5) {
12283 SpMMMicrokernelTester()
12284 .mr(16)
12285 .nr(1)
12286 .m(32)
12287 .n(n)
12288 .k(k)
12289 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012290 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4);
Frank Barchardc451e8a2020-10-21 17:13:12 -070012291 }
12292 }
12293 }
12294#endif // XNN_ARCH_WASMSIMD
12295
12296
Frank Barchard846c0c62020-10-26 15:01:39 -070012297#if XNN_ARCH_WASMSIMD
Frank Barchardbeca6522020-10-30 22:34:35 -070012298 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, k_eq_4) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012299 SpMMMicrokernelTester()
12300 .mr(32)
12301 .nr(1)
12302 .m(32)
12303 .n(1)
12304 .k(4)
12305 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012306 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012307 }
12308
Frank Barchardbeca6522020-10-30 22:34:35 -070012309 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, k_lt_4) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012310 for (size_t k = 1; k < 4; k++) {
12311 SpMMMicrokernelTester()
12312 .mr(32)
12313 .nr(1)
12314 .m(32)
12315 .n(1)
12316 .k(k)
12317 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012318 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012319 }
12320 }
12321
Frank Barchardbeca6522020-10-30 22:34:35 -070012322 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, k_gt_4) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012323 for (size_t k = 5; k < 8; k++) {
12324 SpMMMicrokernelTester()
12325 .mr(32)
12326 .nr(1)
12327 .m(32)
12328 .n(1)
12329 .k(k)
12330 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012331 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012332 }
12333 }
12334
Frank Barchardbeca6522020-10-30 22:34:35 -070012335 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, k_div_4) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012336 for (size_t k = 8; k <= 40; k += 4) {
12337 SpMMMicrokernelTester()
12338 .mr(32)
12339 .nr(1)
12340 .m(32)
12341 .n(1)
12342 .k(k)
12343 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012344 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012345 }
12346 }
12347
Frank Barchardbeca6522020-10-30 22:34:35 -070012348 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, n_gt_1) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012349 for (uint32_t n = 2; n < 10; n++) {
12350 for (size_t k = 1; k <= 20; k += 5) {
12351 SpMMMicrokernelTester()
12352 .mr(32)
12353 .nr(1)
12354 .m(32)
12355 .n(n)
12356 .k(k)
12357 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012358 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012359 }
12360 }
12361 }
12362
Frank Barchardbeca6522020-10-30 22:34:35 -070012363 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, m_lt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012364 for (uint32_t m = 1; m < 32; m++) {
12365 for (uint32_t n = 1; n < 10; n += 2) {
12366 for (size_t k = 1; k <= 20; k += 5) {
12367 SpMMMicrokernelTester()
12368 .mr(32)
12369 .nr(1)
12370 .m(m)
12371 .n(n)
12372 .k(k)
12373 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012374 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012375 }
12376 }
12377 }
12378 }
12379
Frank Barchardbeca6522020-10-30 22:34:35 -070012380 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, m_div_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012381 for (uint32_t m = 64; m <= 96; m += 32) {
12382 for (uint32_t n = 1; n < 10; n += 2) {
12383 for (size_t k = 1; k <= 20; k += 5) {
12384 SpMMMicrokernelTester()
12385 .mr(32)
12386 .nr(1)
12387 .m(m)
12388 .n(n)
12389 .k(k)
12390 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012391 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012392 }
12393 }
12394 }
12395 }
12396
Frank Barchardbeca6522020-10-30 22:34:35 -070012397 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, m_gt_32) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012398 for (uint32_t m = 33; m < 64; m++) {
12399 for (uint32_t n = 1; n < 10; n += 2) {
12400 for (size_t k = 1; k <= 20; k += 5) {
12401 SpMMMicrokernelTester()
12402 .mr(32)
12403 .nr(1)
12404 .m(m)
12405 .n(n)
12406 .k(k)
12407 .sparsity(0.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012408 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012409 }
12410 }
12411 }
12412 }
12413
Marat Dukhane8bfcc82020-11-16 12:28:13 -080012414 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, output_stride) {
12415 for (uint32_t n = 1; n < 10; n += 2) {
12416 for (size_t k = 1; k <= 20; k += 5) {
12417 SpMMMicrokernelTester()
12418 .mr(32)
12419 .nr(1)
12420 .m(64)
12421 .n(n)
12422 .k(k)
12423 .output_stride(67)
12424 .sparsity(0.0f)
12425 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
12426 }
12427 }
12428 }
12429
Frank Barchardbeca6522020-10-30 22:34:35 -070012430 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, qmin) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012431 for (uint32_t n = 1; n < 10; n += 2) {
12432 for (size_t k = 1; k <= 20; k += 5) {
12433 SpMMMicrokernelTester()
12434 .mr(32)
12435 .nr(1)
12436 .m(64)
12437 .n(n)
12438 .k(k)
12439 .sparsity(0.0f)
12440 .qmin(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070012441 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012442 }
12443 }
12444 }
12445
Frank Barchardbeca6522020-10-30 22:34:35 -070012446 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, qmax) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012447 for (uint32_t n = 1; n < 10; n += 2) {
12448 for (size_t k = 1; k <= 20; k += 5) {
12449 SpMMMicrokernelTester()
12450 .mr(32)
12451 .nr(1)
12452 .m(64)
12453 .n(n)
12454 .k(k)
12455 .sparsity(0.0f)
12456 .qmax(128)
Frank Barchardbeca6522020-10-30 22:34:35 -070012457 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012458 }
12459 }
12460 }
12461
Frank Barchardbeca6522020-10-30 22:34:35 -070012462 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, half_sparse) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012463 for (uint32_t n = 1; n < 10; n += 2) {
12464 for (size_t k = 1; k <= 20; k += 5) {
12465 SpMMMicrokernelTester()
12466 .mr(32)
12467 .nr(1)
12468 .m(64)
12469 .n(n)
12470 .k(k)
12471 .sparsity(0.5f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012472 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012473 }
12474 }
12475 }
12476
Frank Barchardbeca6522020-10-30 22:34:35 -070012477 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_X4, zero_weights) {
Frank Barchard846c0c62020-10-26 15:01:39 -070012478 for (uint32_t n = 1; n < 10; n += 2) {
12479 for (size_t k = 1; k <= 20; k += 5) {
12480 SpMMMicrokernelTester()
12481 .mr(32)
12482 .nr(1)
12483 .m(64)
12484 .n(n)
12485 .k(k)
12486 .sparsity(1.0f)
Frank Barchardbeca6522020-10-30 22:34:35 -070012487 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4);
Frank Barchard846c0c62020-10-26 15:01:39 -070012488 }
12489 }
12490 }
12491#endif // XNN_ARCH_WASMSIMD
12492
12493
Frank Barchard8ef44cd2020-11-03 12:30:23 -080012494#if XNN_ARCH_WASMSIMD
12495 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, k_eq_1) {
12496 SpMMMicrokernelTester()
12497 .mr(4)
12498 .nr(1)
12499 .m(4)
12500 .n(1)
12501 .k(1)
12502 .sparsity(0.0f)
12503 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12504 }
12505
12506 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, k_gt_1) {
12507 for (size_t k = 2; k < 10; k++) {
12508 SpMMMicrokernelTester()
12509 .mr(4)
12510 .nr(1)
12511 .m(4)
12512 .n(1)
12513 .k(k)
12514 .sparsity(0.0f)
12515 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12516 }
12517 }
12518
12519 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, n_gt_1) {
12520 for (uint32_t n = 2; n < 10; n++) {
12521 for (size_t k = 1; k <= 5; k += 2) {
12522 SpMMMicrokernelTester()
12523 .mr(4)
12524 .nr(1)
12525 .m(4)
12526 .n(n)
12527 .k(k)
12528 .sparsity(0.0f)
12529 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12530 }
12531 }
12532 }
12533
12534 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, m_lt_4) {
12535 for (uint32_t m = 1; m < 4; m++) {
12536 for (uint32_t n = 1; n < 10; n += 2) {
12537 for (size_t k = 1; k <= 5; k += 2) {
12538 SpMMMicrokernelTester()
12539 .mr(4)
12540 .nr(1)
12541 .m(m)
12542 .n(n)
12543 .k(k)
12544 .sparsity(0.0f)
12545 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12546 }
12547 }
12548 }
12549 }
12550
12551 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, m_div_4) {
12552 for (uint32_t m = 8; m <= 12; m += 4) {
12553 for (uint32_t n = 1; n < 10; n += 2) {
12554 for (size_t k = 1; k <= 5; k += 2) {
12555 SpMMMicrokernelTester()
12556 .mr(4)
12557 .nr(1)
12558 .m(m)
12559 .n(n)
12560 .k(k)
12561 .sparsity(0.0f)
12562 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12563 }
12564 }
12565 }
12566 }
12567
12568 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, m_gt_4) {
12569 for (uint32_t m = 5; m < 8; m++) {
12570 for (uint32_t n = 1; n < 10; n += 2) {
12571 for (size_t k = 1; k <= 5; k += 2) {
12572 SpMMMicrokernelTester()
12573 .mr(4)
12574 .nr(1)
12575 .m(m)
12576 .n(n)
12577 .k(k)
12578 .sparsity(0.0f)
12579 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12580 }
12581 }
12582 }
12583 }
12584
Marat Dukhane8bfcc82020-11-16 12:28:13 -080012585 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, output_stride) {
12586 for (uint32_t n = 1; n < 10; n += 2) {
12587 for (size_t k = 1; k <= 5; k += 2) {
12588 SpMMMicrokernelTester()
12589 .mr(4)
12590 .nr(1)
12591 .m(8)
12592 .n(n)
12593 .k(k)
12594 .output_stride(11)
12595 .sparsity(0.0f)
12596 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12597 }
12598 }
12599 }
12600
Frank Barchard8ef44cd2020-11-03 12:30:23 -080012601 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, qmin) {
12602 for (uint32_t n = 1; n < 10; n += 2) {
12603 for (size_t k = 1; k <= 5; k += 2) {
12604 SpMMMicrokernelTester()
12605 .mr(4)
12606 .nr(1)
12607 .m(8)
12608 .n(n)
12609 .k(k)
12610 .sparsity(0.0f)
12611 .qmin(128)
12612 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12613 }
12614 }
12615 }
12616
12617 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, qmax) {
12618 for (uint32_t n = 1; n < 10; n += 2) {
12619 for (size_t k = 1; k <= 5; k += 2) {
12620 SpMMMicrokernelTester()
12621 .mr(4)
12622 .nr(1)
12623 .m(8)
12624 .n(n)
12625 .k(k)
12626 .sparsity(0.0f)
12627 .qmax(128)
12628 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12629 }
12630 }
12631 }
12632
12633 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, half_sparse) {
12634 for (uint32_t n = 1; n < 10; n += 2) {
12635 for (size_t k = 1; k <= 5; k += 2) {
12636 SpMMMicrokernelTester()
12637 .mr(4)
12638 .nr(1)
12639 .m(8)
12640 .n(n)
12641 .k(k)
12642 .sparsity(0.5f)
12643 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12644 }
12645 }
12646 }
12647
12648 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED, zero_weights) {
12649 for (uint32_t n = 1; n < 10; n += 2) {
12650 for (size_t k = 1; k <= 5; k += 2) {
12651 SpMMMicrokernelTester()
12652 .mr(4)
12653 .nr(1)
12654 .m(8)
12655 .n(n)
12656 .k(k)
12657 .sparsity(1.0f)
12658 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined);
12659 }
12660 }
12661 }
12662#endif // XNN_ARCH_WASMSIMD
12663
12664
12665#if XNN_ARCH_WASMSIMD
12666 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, k_eq_1) {
12667 SpMMMicrokernelTester()
12668 .mr(8)
12669 .nr(1)
12670 .m(8)
12671 .n(1)
12672 .k(1)
12673 .sparsity(0.0f)
12674 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12675 }
12676
12677 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, k_gt_1) {
12678 for (size_t k = 2; k < 10; k++) {
12679 SpMMMicrokernelTester()
12680 .mr(8)
12681 .nr(1)
12682 .m(8)
12683 .n(1)
12684 .k(k)
12685 .sparsity(0.0f)
12686 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12687 }
12688 }
12689
12690 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, n_gt_1) {
12691 for (uint32_t n = 2; n < 10; n++) {
12692 for (size_t k = 1; k <= 5; k += 2) {
12693 SpMMMicrokernelTester()
12694 .mr(8)
12695 .nr(1)
12696 .m(8)
12697 .n(n)
12698 .k(k)
12699 .sparsity(0.0f)
12700 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12701 }
12702 }
12703 }
12704
12705 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, m_lt_8) {
12706 for (uint32_t m = 1; m < 8; m++) {
12707 for (uint32_t n = 1; n < 10; n += 2) {
12708 for (size_t k = 1; k <= 5; k += 2) {
12709 SpMMMicrokernelTester()
12710 .mr(8)
12711 .nr(1)
12712 .m(m)
12713 .n(n)
12714 .k(k)
12715 .sparsity(0.0f)
12716 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12717 }
12718 }
12719 }
12720 }
12721
12722 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, m_div_8) {
12723 for (uint32_t m = 16; m <= 24; m += 8) {
12724 for (uint32_t n = 1; n < 10; n += 2) {
12725 for (size_t k = 1; k <= 5; k += 2) {
12726 SpMMMicrokernelTester()
12727 .mr(8)
12728 .nr(1)
12729 .m(m)
12730 .n(n)
12731 .k(k)
12732 .sparsity(0.0f)
12733 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12734 }
12735 }
12736 }
12737 }
12738
12739 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, m_gt_8) {
12740 for (uint32_t m = 9; m < 16; m++) {
12741 for (uint32_t n = 1; n < 10; n += 2) {
12742 for (size_t k = 1; k <= 5; k += 2) {
12743 SpMMMicrokernelTester()
12744 .mr(8)
12745 .nr(1)
12746 .m(m)
12747 .n(n)
12748 .k(k)
12749 .sparsity(0.0f)
12750 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12751 }
12752 }
12753 }
12754 }
12755
Marat Dukhane8bfcc82020-11-16 12:28:13 -080012756 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, output_stride) {
12757 for (uint32_t n = 1; n < 10; n += 2) {
12758 for (size_t k = 1; k <= 5; k += 2) {
12759 SpMMMicrokernelTester()
12760 .mr(8)
12761 .nr(1)
12762 .m(16)
12763 .n(n)
12764 .k(k)
12765 .output_stride(19)
12766 .sparsity(0.0f)
12767 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12768 }
12769 }
12770 }
12771
Frank Barchard8ef44cd2020-11-03 12:30:23 -080012772 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, qmin) {
12773 for (uint32_t n = 1; n < 10; n += 2) {
12774 for (size_t k = 1; k <= 5; k += 2) {
12775 SpMMMicrokernelTester()
12776 .mr(8)
12777 .nr(1)
12778 .m(16)
12779 .n(n)
12780 .k(k)
12781 .sparsity(0.0f)
12782 .qmin(128)
12783 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12784 }
12785 }
12786 }
12787
12788 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, qmax) {
12789 for (uint32_t n = 1; n < 10; n += 2) {
12790 for (size_t k = 1; k <= 5; k += 2) {
12791 SpMMMicrokernelTester()
12792 .mr(8)
12793 .nr(1)
12794 .m(16)
12795 .n(n)
12796 .k(k)
12797 .sparsity(0.0f)
12798 .qmax(128)
12799 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12800 }
12801 }
12802 }
12803
12804 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, half_sparse) {
12805 for (uint32_t n = 1; n < 10; n += 2) {
12806 for (size_t k = 1; k <= 5; k += 2) {
12807 SpMMMicrokernelTester()
12808 .mr(8)
12809 .nr(1)
12810 .m(16)
12811 .n(n)
12812 .k(k)
12813 .sparsity(0.5f)
12814 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12815 }
12816 }
12817 }
12818
12819 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED, zero_weights) {
12820 for (uint32_t n = 1; n < 10; n += 2) {
12821 for (size_t k = 1; k <= 5; k += 2) {
12822 SpMMMicrokernelTester()
12823 .mr(8)
12824 .nr(1)
12825 .m(16)
12826 .n(n)
12827 .k(k)
12828 .sparsity(1.0f)
12829 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined);
12830 }
12831 }
12832 }
12833#endif // XNN_ARCH_WASMSIMD
12834
12835
12836#if XNN_ARCH_WASMSIMD
12837 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, k_eq_1) {
12838 SpMMMicrokernelTester()
12839 .mr(16)
12840 .nr(1)
12841 .m(16)
12842 .n(1)
12843 .k(1)
12844 .sparsity(0.0f)
12845 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12846 }
12847
12848 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, k_gt_1) {
12849 for (size_t k = 2; k < 10; k++) {
12850 SpMMMicrokernelTester()
12851 .mr(16)
12852 .nr(1)
12853 .m(16)
12854 .n(1)
12855 .k(k)
12856 .sparsity(0.0f)
12857 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12858 }
12859 }
12860
12861 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, n_gt_1) {
12862 for (uint32_t n = 2; n < 10; n++) {
12863 for (size_t k = 1; k <= 5; k += 2) {
12864 SpMMMicrokernelTester()
12865 .mr(16)
12866 .nr(1)
12867 .m(16)
12868 .n(n)
12869 .k(k)
12870 .sparsity(0.0f)
12871 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12872 }
12873 }
12874 }
12875
12876 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, m_lt_16) {
12877 for (uint32_t m = 1; m < 16; m++) {
12878 for (uint32_t n = 1; n < 10; n += 2) {
12879 for (size_t k = 1; k <= 5; k += 2) {
12880 SpMMMicrokernelTester()
12881 .mr(16)
12882 .nr(1)
12883 .m(m)
12884 .n(n)
12885 .k(k)
12886 .sparsity(0.0f)
12887 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12888 }
12889 }
12890 }
12891 }
12892
12893 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, m_div_16) {
12894 for (uint32_t m = 32; m <= 48; m += 16) {
12895 for (uint32_t n = 1; n < 10; n += 2) {
12896 for (size_t k = 1; k <= 5; k += 2) {
12897 SpMMMicrokernelTester()
12898 .mr(16)
12899 .nr(1)
12900 .m(m)
12901 .n(n)
12902 .k(k)
12903 .sparsity(0.0f)
12904 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12905 }
12906 }
12907 }
12908 }
12909
12910 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, m_gt_16) {
12911 for (uint32_t m = 17; m < 32; m++) {
12912 for (uint32_t n = 1; n < 10; n += 2) {
12913 for (size_t k = 1; k <= 5; k += 2) {
12914 SpMMMicrokernelTester()
12915 .mr(16)
12916 .nr(1)
12917 .m(m)
12918 .n(n)
12919 .k(k)
12920 .sparsity(0.0f)
12921 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12922 }
12923 }
12924 }
12925 }
12926
Marat Dukhane8bfcc82020-11-16 12:28:13 -080012927 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, output_stride) {
12928 for (uint32_t n = 1; n < 10; n += 2) {
12929 for (size_t k = 1; k <= 5; k += 2) {
12930 SpMMMicrokernelTester()
12931 .mr(16)
12932 .nr(1)
12933 .m(32)
12934 .n(n)
12935 .k(k)
12936 .output_stride(37)
12937 .sparsity(0.0f)
12938 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12939 }
12940 }
12941 }
12942
Frank Barchard8ef44cd2020-11-03 12:30:23 -080012943 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, qmin) {
12944 for (uint32_t n = 1; n < 10; n += 2) {
12945 for (size_t k = 1; k <= 5; k += 2) {
12946 SpMMMicrokernelTester()
12947 .mr(16)
12948 .nr(1)
12949 .m(32)
12950 .n(n)
12951 .k(k)
12952 .sparsity(0.0f)
12953 .qmin(128)
12954 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12955 }
12956 }
12957 }
12958
12959 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, qmax) {
12960 for (uint32_t n = 1; n < 10; n += 2) {
12961 for (size_t k = 1; k <= 5; k += 2) {
12962 SpMMMicrokernelTester()
12963 .mr(16)
12964 .nr(1)
12965 .m(32)
12966 .n(n)
12967 .k(k)
12968 .sparsity(0.0f)
12969 .qmax(128)
12970 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12971 }
12972 }
12973 }
12974
12975 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, half_sparse) {
12976 for (uint32_t n = 1; n < 10; n += 2) {
12977 for (size_t k = 1; k <= 5; k += 2) {
12978 SpMMMicrokernelTester()
12979 .mr(16)
12980 .nr(1)
12981 .m(32)
12982 .n(n)
12983 .k(k)
12984 .sparsity(0.5f)
12985 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
12986 }
12987 }
12988 }
12989
12990 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED, zero_weights) {
12991 for (uint32_t n = 1; n < 10; n += 2) {
12992 for (size_t k = 1; k <= 5; k += 2) {
12993 SpMMMicrokernelTester()
12994 .mr(16)
12995 .nr(1)
12996 .m(32)
12997 .n(n)
12998 .k(k)
12999 .sparsity(1.0f)
13000 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined);
13001 }
13002 }
13003 }
13004#endif // XNN_ARCH_WASMSIMD
13005
13006
13007#if XNN_ARCH_WASMSIMD
13008 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, k_eq_1) {
13009 SpMMMicrokernelTester()
13010 .mr(32)
13011 .nr(1)
13012 .m(32)
13013 .n(1)
13014 .k(1)
13015 .sparsity(0.0f)
13016 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13017 }
13018
13019 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, k_gt_1) {
13020 for (size_t k = 2; k < 10; k++) {
13021 SpMMMicrokernelTester()
13022 .mr(32)
13023 .nr(1)
13024 .m(32)
13025 .n(1)
13026 .k(k)
13027 .sparsity(0.0f)
13028 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13029 }
13030 }
13031
13032 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, n_gt_1) {
13033 for (uint32_t n = 2; n < 10; n++) {
13034 for (size_t k = 1; k <= 5; k += 2) {
13035 SpMMMicrokernelTester()
13036 .mr(32)
13037 .nr(1)
13038 .m(32)
13039 .n(n)
13040 .k(k)
13041 .sparsity(0.0f)
13042 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13043 }
13044 }
13045 }
13046
13047 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, m_lt_32) {
13048 for (uint32_t m = 1; m < 32; m++) {
13049 for (uint32_t n = 1; n < 10; n += 2) {
13050 for (size_t k = 1; k <= 5; k += 2) {
13051 SpMMMicrokernelTester()
13052 .mr(32)
13053 .nr(1)
13054 .m(m)
13055 .n(n)
13056 .k(k)
13057 .sparsity(0.0f)
13058 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13059 }
13060 }
13061 }
13062 }
13063
13064 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, m_div_32) {
13065 for (uint32_t m = 64; m <= 96; m += 32) {
13066 for (uint32_t n = 1; n < 10; n += 2) {
13067 for (size_t k = 1; k <= 5; k += 2) {
13068 SpMMMicrokernelTester()
13069 .mr(32)
13070 .nr(1)
13071 .m(m)
13072 .n(n)
13073 .k(k)
13074 .sparsity(0.0f)
13075 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13076 }
13077 }
13078 }
13079 }
13080
13081 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, m_gt_32) {
13082 for (uint32_t m = 33; m < 64; m++) {
13083 for (uint32_t n = 1; n < 10; n += 2) {
13084 for (size_t k = 1; k <= 5; k += 2) {
13085 SpMMMicrokernelTester()
13086 .mr(32)
13087 .nr(1)
13088 .m(m)
13089 .n(n)
13090 .k(k)
13091 .sparsity(0.0f)
13092 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13093 }
13094 }
13095 }
13096 }
13097
Marat Dukhane8bfcc82020-11-16 12:28:13 -080013098 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, output_stride) {
13099 for (uint32_t n = 1; n < 10; n += 2) {
13100 for (size_t k = 1; k <= 5; k += 2) {
13101 SpMMMicrokernelTester()
13102 .mr(32)
13103 .nr(1)
13104 .m(64)
13105 .n(n)
13106 .k(k)
13107 .output_stride(67)
13108 .sparsity(0.0f)
13109 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13110 }
13111 }
13112 }
13113
Frank Barchard8ef44cd2020-11-03 12:30:23 -080013114 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, qmin) {
13115 for (uint32_t n = 1; n < 10; n += 2) {
13116 for (size_t k = 1; k <= 5; k += 2) {
13117 SpMMMicrokernelTester()
13118 .mr(32)
13119 .nr(1)
13120 .m(64)
13121 .n(n)
13122 .k(k)
13123 .sparsity(0.0f)
13124 .qmin(128)
13125 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13126 }
13127 }
13128 }
13129
13130 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, qmax) {
13131 for (uint32_t n = 1; n < 10; n += 2) {
13132 for (size_t k = 1; k <= 5; k += 2) {
13133 SpMMMicrokernelTester()
13134 .mr(32)
13135 .nr(1)
13136 .m(64)
13137 .n(n)
13138 .k(k)
13139 .sparsity(0.0f)
13140 .qmax(128)
13141 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13142 }
13143 }
13144 }
13145
13146 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, half_sparse) {
13147 for (uint32_t n = 1; n < 10; n += 2) {
13148 for (size_t k = 1; k <= 5; k += 2) {
13149 SpMMMicrokernelTester()
13150 .mr(32)
13151 .nr(1)
13152 .m(64)
13153 .n(n)
13154 .k(k)
13155 .sparsity(0.5f)
13156 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13157 }
13158 }
13159 }
13160
13161 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED, zero_weights) {
13162 for (uint32_t n = 1; n < 10; n += 2) {
13163 for (size_t k = 1; k <= 5; k += 2) {
13164 SpMMMicrokernelTester()
13165 .mr(32)
13166 .nr(1)
13167 .m(64)
13168 .n(n)
13169 .k(k)
13170 .sparsity(1.0f)
13171 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined);
13172 }
13173 }
13174 }
13175#endif // XNN_ARCH_WASMSIMD
13176
13177
13178#if XNN_ARCH_WASMSIMD
13179 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, k_eq_1) {
13180 SpMMMicrokernelTester()
13181 .mr(4)
13182 .nr(1)
13183 .m(4)
13184 .n(1)
13185 .k(1)
13186 .sparsity(0.0f)
13187 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13188 }
13189
13190 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, k_gt_1) {
13191 for (size_t k = 2; k < 10; k++) {
13192 SpMMMicrokernelTester()
13193 .mr(4)
13194 .nr(1)
13195 .m(4)
13196 .n(1)
13197 .k(k)
13198 .sparsity(0.0f)
13199 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13200 }
13201 }
13202
13203 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, n_gt_1) {
13204 for (uint32_t n = 2; n < 10; n++) {
13205 for (size_t k = 1; k <= 5; k += 2) {
13206 SpMMMicrokernelTester()
13207 .mr(4)
13208 .nr(1)
13209 .m(4)
13210 .n(n)
13211 .k(k)
13212 .sparsity(0.0f)
13213 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13214 }
13215 }
13216 }
13217
13218 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, m_lt_4) {
13219 for (uint32_t m = 1; m < 4; m++) {
13220 for (uint32_t n = 1; n < 10; n += 2) {
13221 for (size_t k = 1; k <= 5; k += 2) {
13222 SpMMMicrokernelTester()
13223 .mr(4)
13224 .nr(1)
13225 .m(m)
13226 .n(n)
13227 .k(k)
13228 .sparsity(0.0f)
13229 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13230 }
13231 }
13232 }
13233 }
13234
13235 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, m_div_4) {
13236 for (uint32_t m = 8; m <= 12; m += 4) {
13237 for (uint32_t n = 1; n < 10; n += 2) {
13238 for (size_t k = 1; k <= 5; k += 2) {
13239 SpMMMicrokernelTester()
13240 .mr(4)
13241 .nr(1)
13242 .m(m)
13243 .n(n)
13244 .k(k)
13245 .sparsity(0.0f)
13246 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13247 }
13248 }
13249 }
13250 }
13251
13252 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, m_gt_4) {
13253 for (uint32_t m = 5; m < 8; m++) {
13254 for (uint32_t n = 1; n < 10; n += 2) {
13255 for (size_t k = 1; k <= 5; k += 2) {
13256 SpMMMicrokernelTester()
13257 .mr(4)
13258 .nr(1)
13259 .m(m)
13260 .n(n)
13261 .k(k)
13262 .sparsity(0.0f)
13263 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13264 }
13265 }
13266 }
13267 }
13268
Marat Dukhane8bfcc82020-11-16 12:28:13 -080013269 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, output_stride) {
13270 for (uint32_t n = 1; n < 10; n += 2) {
13271 for (size_t k = 1; k <= 5; k += 2) {
13272 SpMMMicrokernelTester()
13273 .mr(4)
13274 .nr(1)
13275 .m(8)
13276 .n(n)
13277 .k(k)
13278 .output_stride(11)
13279 .sparsity(0.0f)
13280 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13281 }
13282 }
13283 }
13284
Frank Barchard8ef44cd2020-11-03 12:30:23 -080013285 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, qmin) {
13286 for (uint32_t n = 1; n < 10; n += 2) {
13287 for (size_t k = 1; k <= 5; k += 2) {
13288 SpMMMicrokernelTester()
13289 .mr(4)
13290 .nr(1)
13291 .m(8)
13292 .n(n)
13293 .k(k)
13294 .sparsity(0.0f)
13295 .qmin(128)
13296 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13297 }
13298 }
13299 }
13300
13301 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, qmax) {
13302 for (uint32_t n = 1; n < 10; n += 2) {
13303 for (size_t k = 1; k <= 5; k += 2) {
13304 SpMMMicrokernelTester()
13305 .mr(4)
13306 .nr(1)
13307 .m(8)
13308 .n(n)
13309 .k(k)
13310 .sparsity(0.0f)
13311 .qmax(128)
13312 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13313 }
13314 }
13315 }
13316
13317 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, half_sparse) {
13318 for (uint32_t n = 1; n < 10; n += 2) {
13319 for (size_t k = 1; k <= 5; k += 2) {
13320 SpMMMicrokernelTester()
13321 .mr(4)
13322 .nr(1)
13323 .m(8)
13324 .n(n)
13325 .k(k)
13326 .sparsity(0.5f)
13327 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13328 }
13329 }
13330 }
13331
13332 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED, zero_weights) {
13333 for (uint32_t n = 1; n < 10; n += 2) {
13334 for (size_t k = 1; k <= 5; k += 2) {
13335 SpMMMicrokernelTester()
13336 .mr(4)
13337 .nr(1)
13338 .m(8)
13339 .n(n)
13340 .k(k)
13341 .sparsity(1.0f)
13342 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined);
13343 }
13344 }
13345 }
13346#endif // XNN_ARCH_WASMSIMD
13347
13348
13349#if XNN_ARCH_WASMSIMD
13350 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, k_eq_1) {
13351 SpMMMicrokernelTester()
13352 .mr(8)
13353 .nr(1)
13354 .m(8)
13355 .n(1)
13356 .k(1)
13357 .sparsity(0.0f)
13358 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13359 }
13360
13361 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, k_gt_1) {
13362 for (size_t k = 2; k < 10; k++) {
13363 SpMMMicrokernelTester()
13364 .mr(8)
13365 .nr(1)
13366 .m(8)
13367 .n(1)
13368 .k(k)
13369 .sparsity(0.0f)
13370 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13371 }
13372 }
13373
13374 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, n_gt_1) {
13375 for (uint32_t n = 2; n < 10; n++) {
13376 for (size_t k = 1; k <= 5; k += 2) {
13377 SpMMMicrokernelTester()
13378 .mr(8)
13379 .nr(1)
13380 .m(8)
13381 .n(n)
13382 .k(k)
13383 .sparsity(0.0f)
13384 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13385 }
13386 }
13387 }
13388
13389 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, m_lt_8) {
13390 for (uint32_t m = 1; m < 8; m++) {
13391 for (uint32_t n = 1; n < 10; n += 2) {
13392 for (size_t k = 1; k <= 5; k += 2) {
13393 SpMMMicrokernelTester()
13394 .mr(8)
13395 .nr(1)
13396 .m(m)
13397 .n(n)
13398 .k(k)
13399 .sparsity(0.0f)
13400 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13401 }
13402 }
13403 }
13404 }
13405
13406 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, m_div_8) {
13407 for (uint32_t m = 16; m <= 24; m += 8) {
13408 for (uint32_t n = 1; n < 10; n += 2) {
13409 for (size_t k = 1; k <= 5; k += 2) {
13410 SpMMMicrokernelTester()
13411 .mr(8)
13412 .nr(1)
13413 .m(m)
13414 .n(n)
13415 .k(k)
13416 .sparsity(0.0f)
13417 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13418 }
13419 }
13420 }
13421 }
13422
13423 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, m_gt_8) {
13424 for (uint32_t m = 9; m < 16; m++) {
13425 for (uint32_t n = 1; n < 10; n += 2) {
13426 for (size_t k = 1; k <= 5; k += 2) {
13427 SpMMMicrokernelTester()
13428 .mr(8)
13429 .nr(1)
13430 .m(m)
13431 .n(n)
13432 .k(k)
13433 .sparsity(0.0f)
13434 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13435 }
13436 }
13437 }
13438 }
13439
Marat Dukhane8bfcc82020-11-16 12:28:13 -080013440 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, output_stride) {
13441 for (uint32_t n = 1; n < 10; n += 2) {
13442 for (size_t k = 1; k <= 5; k += 2) {
13443 SpMMMicrokernelTester()
13444 .mr(8)
13445 .nr(1)
13446 .m(16)
13447 .n(n)
13448 .k(k)
13449 .output_stride(19)
13450 .sparsity(0.0f)
13451 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13452 }
13453 }
13454 }
13455
Frank Barchard8ef44cd2020-11-03 12:30:23 -080013456 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, qmin) {
13457 for (uint32_t n = 1; n < 10; n += 2) {
13458 for (size_t k = 1; k <= 5; k += 2) {
13459 SpMMMicrokernelTester()
13460 .mr(8)
13461 .nr(1)
13462 .m(16)
13463 .n(n)
13464 .k(k)
13465 .sparsity(0.0f)
13466 .qmin(128)
13467 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13468 }
13469 }
13470 }
13471
13472 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, qmax) {
13473 for (uint32_t n = 1; n < 10; n += 2) {
13474 for (size_t k = 1; k <= 5; k += 2) {
13475 SpMMMicrokernelTester()
13476 .mr(8)
13477 .nr(1)
13478 .m(16)
13479 .n(n)
13480 .k(k)
13481 .sparsity(0.0f)
13482 .qmax(128)
13483 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13484 }
13485 }
13486 }
13487
13488 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, half_sparse) {
13489 for (uint32_t n = 1; n < 10; n += 2) {
13490 for (size_t k = 1; k <= 5; k += 2) {
13491 SpMMMicrokernelTester()
13492 .mr(8)
13493 .nr(1)
13494 .m(16)
13495 .n(n)
13496 .k(k)
13497 .sparsity(0.5f)
13498 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13499 }
13500 }
13501 }
13502
13503 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED, zero_weights) {
13504 for (uint32_t n = 1; n < 10; n += 2) {
13505 for (size_t k = 1; k <= 5; k += 2) {
13506 SpMMMicrokernelTester()
13507 .mr(8)
13508 .nr(1)
13509 .m(16)
13510 .n(n)
13511 .k(k)
13512 .sparsity(1.0f)
13513 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined);
13514 }
13515 }
13516 }
13517#endif // XNN_ARCH_WASMSIMD
13518
13519
13520#if XNN_ARCH_WASMSIMD
13521 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, k_eq_1) {
13522 SpMMMicrokernelTester()
13523 .mr(16)
13524 .nr(1)
13525 .m(16)
13526 .n(1)
13527 .k(1)
13528 .sparsity(0.0f)
13529 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13530 }
13531
13532 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, k_gt_1) {
13533 for (size_t k = 2; k < 10; k++) {
13534 SpMMMicrokernelTester()
13535 .mr(16)
13536 .nr(1)
13537 .m(16)
13538 .n(1)
13539 .k(k)
13540 .sparsity(0.0f)
13541 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13542 }
13543 }
13544
13545 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, n_gt_1) {
13546 for (uint32_t n = 2; n < 10; n++) {
13547 for (size_t k = 1; k <= 5; k += 2) {
13548 SpMMMicrokernelTester()
13549 .mr(16)
13550 .nr(1)
13551 .m(16)
13552 .n(n)
13553 .k(k)
13554 .sparsity(0.0f)
13555 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13556 }
13557 }
13558 }
13559
13560 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, m_lt_16) {
13561 for (uint32_t m = 1; m < 16; m++) {
13562 for (uint32_t n = 1; n < 10; n += 2) {
13563 for (size_t k = 1; k <= 5; k += 2) {
13564 SpMMMicrokernelTester()
13565 .mr(16)
13566 .nr(1)
13567 .m(m)
13568 .n(n)
13569 .k(k)
13570 .sparsity(0.0f)
13571 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13572 }
13573 }
13574 }
13575 }
13576
13577 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, m_div_16) {
13578 for (uint32_t m = 32; m <= 48; m += 16) {
13579 for (uint32_t n = 1; n < 10; n += 2) {
13580 for (size_t k = 1; k <= 5; k += 2) {
13581 SpMMMicrokernelTester()
13582 .mr(16)
13583 .nr(1)
13584 .m(m)
13585 .n(n)
13586 .k(k)
13587 .sparsity(0.0f)
13588 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13589 }
13590 }
13591 }
13592 }
13593
13594 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, m_gt_16) {
13595 for (uint32_t m = 17; m < 32; m++) {
13596 for (uint32_t n = 1; n < 10; n += 2) {
13597 for (size_t k = 1; k <= 5; k += 2) {
13598 SpMMMicrokernelTester()
13599 .mr(16)
13600 .nr(1)
13601 .m(m)
13602 .n(n)
13603 .k(k)
13604 .sparsity(0.0f)
13605 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13606 }
13607 }
13608 }
13609 }
13610
Marat Dukhane8bfcc82020-11-16 12:28:13 -080013611 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, output_stride) {
13612 for (uint32_t n = 1; n < 10; n += 2) {
13613 for (size_t k = 1; k <= 5; k += 2) {
13614 SpMMMicrokernelTester()
13615 .mr(16)
13616 .nr(1)
13617 .m(32)
13618 .n(n)
13619 .k(k)
13620 .output_stride(37)
13621 .sparsity(0.0f)
13622 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13623 }
13624 }
13625 }
13626
Frank Barchard8ef44cd2020-11-03 12:30:23 -080013627 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, qmin) {
13628 for (uint32_t n = 1; n < 10; n += 2) {
13629 for (size_t k = 1; k <= 5; k += 2) {
13630 SpMMMicrokernelTester()
13631 .mr(16)
13632 .nr(1)
13633 .m(32)
13634 .n(n)
13635 .k(k)
13636 .sparsity(0.0f)
13637 .qmin(128)
13638 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13639 }
13640 }
13641 }
13642
13643 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, qmax) {
13644 for (uint32_t n = 1; n < 10; n += 2) {
13645 for (size_t k = 1; k <= 5; k += 2) {
13646 SpMMMicrokernelTester()
13647 .mr(16)
13648 .nr(1)
13649 .m(32)
13650 .n(n)
13651 .k(k)
13652 .sparsity(0.0f)
13653 .qmax(128)
13654 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13655 }
13656 }
13657 }
13658
13659 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, half_sparse) {
13660 for (uint32_t n = 1; n < 10; n += 2) {
13661 for (size_t k = 1; k <= 5; k += 2) {
13662 SpMMMicrokernelTester()
13663 .mr(16)
13664 .nr(1)
13665 .m(32)
13666 .n(n)
13667 .k(k)
13668 .sparsity(0.5f)
13669 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13670 }
13671 }
13672 }
13673
13674 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED, zero_weights) {
13675 for (uint32_t n = 1; n < 10; n += 2) {
13676 for (size_t k = 1; k <= 5; k += 2) {
13677 SpMMMicrokernelTester()
13678 .mr(16)
13679 .nr(1)
13680 .m(32)
13681 .n(n)
13682 .k(k)
13683 .sparsity(1.0f)
13684 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined);
13685 }
13686 }
13687 }
13688#endif // XNN_ARCH_WASMSIMD
13689
13690
13691#if XNN_ARCH_WASMSIMD
13692 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, k_eq_1) {
13693 SpMMMicrokernelTester()
13694 .mr(32)
13695 .nr(1)
13696 .m(32)
13697 .n(1)
13698 .k(1)
13699 .sparsity(0.0f)
13700 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13701 }
13702
13703 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, k_gt_1) {
13704 for (size_t k = 2; k < 10; k++) {
13705 SpMMMicrokernelTester()
13706 .mr(32)
13707 .nr(1)
13708 .m(32)
13709 .n(1)
13710 .k(k)
13711 .sparsity(0.0f)
13712 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13713 }
13714 }
13715
13716 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, n_gt_1) {
13717 for (uint32_t n = 2; n < 10; n++) {
13718 for (size_t k = 1; k <= 5; k += 2) {
13719 SpMMMicrokernelTester()
13720 .mr(32)
13721 .nr(1)
13722 .m(32)
13723 .n(n)
13724 .k(k)
13725 .sparsity(0.0f)
13726 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13727 }
13728 }
13729 }
13730
13731 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, m_lt_32) {
13732 for (uint32_t m = 1; m < 32; m++) {
13733 for (uint32_t n = 1; n < 10; n += 2) {
13734 for (size_t k = 1; k <= 5; k += 2) {
13735 SpMMMicrokernelTester()
13736 .mr(32)
13737 .nr(1)
13738 .m(m)
13739 .n(n)
13740 .k(k)
13741 .sparsity(0.0f)
13742 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13743 }
13744 }
13745 }
13746 }
13747
13748 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, m_div_32) {
13749 for (uint32_t m = 64; m <= 96; m += 32) {
13750 for (uint32_t n = 1; n < 10; n += 2) {
13751 for (size_t k = 1; k <= 5; k += 2) {
13752 SpMMMicrokernelTester()
13753 .mr(32)
13754 .nr(1)
13755 .m(m)
13756 .n(n)
13757 .k(k)
13758 .sparsity(0.0f)
13759 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13760 }
13761 }
13762 }
13763 }
13764
13765 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, m_gt_32) {
13766 for (uint32_t m = 33; m < 64; m++) {
13767 for (uint32_t n = 1; n < 10; n += 2) {
13768 for (size_t k = 1; k <= 5; k += 2) {
13769 SpMMMicrokernelTester()
13770 .mr(32)
13771 .nr(1)
13772 .m(m)
13773 .n(n)
13774 .k(k)
13775 .sparsity(0.0f)
13776 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13777 }
13778 }
13779 }
13780 }
13781
Marat Dukhane8bfcc82020-11-16 12:28:13 -080013782 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, output_stride) {
13783 for (uint32_t n = 1; n < 10; n += 2) {
13784 for (size_t k = 1; k <= 5; k += 2) {
13785 SpMMMicrokernelTester()
13786 .mr(32)
13787 .nr(1)
13788 .m(64)
13789 .n(n)
13790 .k(k)
13791 .output_stride(67)
13792 .sparsity(0.0f)
13793 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13794 }
13795 }
13796 }
13797
Frank Barchard8ef44cd2020-11-03 12:30:23 -080013798 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, qmin) {
13799 for (uint32_t n = 1; n < 10; n += 2) {
13800 for (size_t k = 1; k <= 5; k += 2) {
13801 SpMMMicrokernelTester()
13802 .mr(32)
13803 .nr(1)
13804 .m(64)
13805 .n(n)
13806 .k(k)
13807 .sparsity(0.0f)
13808 .qmin(128)
13809 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13810 }
13811 }
13812 }
13813
13814 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, qmax) {
13815 for (uint32_t n = 1; n < 10; n += 2) {
13816 for (size_t k = 1; k <= 5; k += 2) {
13817 SpMMMicrokernelTester()
13818 .mr(32)
13819 .nr(1)
13820 .m(64)
13821 .n(n)
13822 .k(k)
13823 .sparsity(0.0f)
13824 .qmax(128)
13825 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13826 }
13827 }
13828 }
13829
13830 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, half_sparse) {
13831 for (uint32_t n = 1; n < 10; n += 2) {
13832 for (size_t k = 1; k <= 5; k += 2) {
13833 SpMMMicrokernelTester()
13834 .mr(32)
13835 .nr(1)
13836 .m(64)
13837 .n(n)
13838 .k(k)
13839 .sparsity(0.5f)
13840 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13841 }
13842 }
13843 }
13844
13845 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED, zero_weights) {
13846 for (uint32_t n = 1; n < 10; n += 2) {
13847 for (size_t k = 1; k <= 5; k += 2) {
13848 SpMMMicrokernelTester()
13849 .mr(32)
13850 .nr(1)
13851 .m(64)
13852 .n(n)
13853 .k(k)
13854 .sparsity(1.0f)
13855 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined);
13856 }
13857 }
13858 }
13859#endif // XNN_ARCH_WASMSIMD
13860
13861
13862#if XNN_ARCH_WASMSIMD
13863 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, k_eq_2) {
13864 SpMMMicrokernelTester()
13865 .mr(4)
13866 .nr(1)
13867 .m(4)
13868 .n(1)
13869 .k(2)
13870 .sparsity(0.0f)
13871 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
13872 }
13873
13874 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, k_lt_2) {
13875 for (size_t k = 1; k < 2; k++) {
13876 SpMMMicrokernelTester()
13877 .mr(4)
13878 .nr(1)
13879 .m(4)
13880 .n(1)
13881 .k(k)
13882 .sparsity(0.0f)
13883 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
13884 }
13885 }
13886
13887 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, k_gt_2) {
13888 for (size_t k = 3; k < 4; k++) {
13889 SpMMMicrokernelTester()
13890 .mr(4)
13891 .nr(1)
13892 .m(4)
13893 .n(1)
13894 .k(k)
13895 .sparsity(0.0f)
13896 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
13897 }
13898 }
13899
13900 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, k_div_2) {
13901 for (size_t k = 4; k <= 20; k += 2) {
13902 SpMMMicrokernelTester()
13903 .mr(4)
13904 .nr(1)
13905 .m(4)
13906 .n(1)
13907 .k(k)
13908 .sparsity(0.0f)
13909 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
13910 }
13911 }
13912
13913 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, n_gt_1) {
13914 for (uint32_t n = 2; n < 10; n++) {
13915 for (size_t k = 1; k <= 10; k += 3) {
13916 SpMMMicrokernelTester()
13917 .mr(4)
13918 .nr(1)
13919 .m(4)
13920 .n(n)
13921 .k(k)
13922 .sparsity(0.0f)
13923 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
13924 }
13925 }
13926 }
13927
13928 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, m_lt_4) {
13929 for (uint32_t m = 1; m < 4; m++) {
13930 for (uint32_t n = 1; n < 10; n += 2) {
13931 for (size_t k = 1; k <= 10; k += 3) {
13932 SpMMMicrokernelTester()
13933 .mr(4)
13934 .nr(1)
13935 .m(m)
13936 .n(n)
13937 .k(k)
13938 .sparsity(0.0f)
13939 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
13940 }
13941 }
13942 }
13943 }
13944
13945 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, m_div_4) {
13946 for (uint32_t m = 8; m <= 12; m += 4) {
13947 for (uint32_t n = 1; n < 10; n += 2) {
13948 for (size_t k = 1; k <= 10; k += 3) {
13949 SpMMMicrokernelTester()
13950 .mr(4)
13951 .nr(1)
13952 .m(m)
13953 .n(n)
13954 .k(k)
13955 .sparsity(0.0f)
13956 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
13957 }
13958 }
13959 }
13960 }
13961
13962 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, m_gt_4) {
13963 for (uint32_t m = 5; m < 8; m++) {
13964 for (uint32_t n = 1; n < 10; n += 2) {
13965 for (size_t k = 1; k <= 10; k += 3) {
13966 SpMMMicrokernelTester()
13967 .mr(4)
13968 .nr(1)
13969 .m(m)
13970 .n(n)
13971 .k(k)
13972 .sparsity(0.0f)
13973 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
13974 }
13975 }
13976 }
13977 }
13978
Marat Dukhane8bfcc82020-11-16 12:28:13 -080013979 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, output_stride) {
13980 for (uint32_t n = 1; n < 10; n += 2) {
13981 for (size_t k = 1; k <= 10; k += 3) {
13982 SpMMMicrokernelTester()
13983 .mr(4)
13984 .nr(1)
13985 .m(8)
13986 .n(n)
13987 .k(k)
13988 .output_stride(11)
13989 .sparsity(0.0f)
13990 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
13991 }
13992 }
13993 }
13994
Frank Barchard8ef44cd2020-11-03 12:30:23 -080013995 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, qmin) {
13996 for (uint32_t n = 1; n < 10; n += 2) {
13997 for (size_t k = 1; k <= 10; k += 3) {
13998 SpMMMicrokernelTester()
13999 .mr(4)
14000 .nr(1)
14001 .m(8)
14002 .n(n)
14003 .k(k)
14004 .sparsity(0.0f)
14005 .qmin(128)
14006 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
14007 }
14008 }
14009 }
14010
14011 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, qmax) {
14012 for (uint32_t n = 1; n < 10; n += 2) {
14013 for (size_t k = 1; k <= 10; k += 3) {
14014 SpMMMicrokernelTester()
14015 .mr(4)
14016 .nr(1)
14017 .m(8)
14018 .n(n)
14019 .k(k)
14020 .sparsity(0.0f)
14021 .qmax(128)
14022 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
14023 }
14024 }
14025 }
14026
14027 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, half_sparse) {
14028 for (uint32_t n = 1; n < 10; n += 2) {
14029 for (size_t k = 1; k <= 10; k += 3) {
14030 SpMMMicrokernelTester()
14031 .mr(4)
14032 .nr(1)
14033 .m(8)
14034 .n(n)
14035 .k(k)
14036 .sparsity(0.5f)
14037 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
14038 }
14039 }
14040 }
14041
14042 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_ARM_PIPELINED_X2, zero_weights) {
14043 for (uint32_t n = 1; n < 10; n += 2) {
14044 for (size_t k = 1; k <= 10; k += 3) {
14045 SpMMMicrokernelTester()
14046 .mr(4)
14047 .nr(1)
14048 .m(8)
14049 .n(n)
14050 .k(k)
14051 .sparsity(1.0f)
14052 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_pipelined_x2);
14053 }
14054 }
14055 }
14056#endif // XNN_ARCH_WASMSIMD
14057
14058
14059#if XNN_ARCH_WASMSIMD
14060 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, k_eq_2) {
14061 SpMMMicrokernelTester()
14062 .mr(8)
14063 .nr(1)
14064 .m(8)
14065 .n(1)
14066 .k(2)
14067 .sparsity(0.0f)
14068 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14069 }
14070
14071 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, k_lt_2) {
14072 for (size_t k = 1; k < 2; k++) {
14073 SpMMMicrokernelTester()
14074 .mr(8)
14075 .nr(1)
14076 .m(8)
14077 .n(1)
14078 .k(k)
14079 .sparsity(0.0f)
14080 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14081 }
14082 }
14083
14084 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, k_gt_2) {
14085 for (size_t k = 3; k < 4; k++) {
14086 SpMMMicrokernelTester()
14087 .mr(8)
14088 .nr(1)
14089 .m(8)
14090 .n(1)
14091 .k(k)
14092 .sparsity(0.0f)
14093 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14094 }
14095 }
14096
14097 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, k_div_2) {
14098 for (size_t k = 4; k <= 20; k += 2) {
14099 SpMMMicrokernelTester()
14100 .mr(8)
14101 .nr(1)
14102 .m(8)
14103 .n(1)
14104 .k(k)
14105 .sparsity(0.0f)
14106 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14107 }
14108 }
14109
14110 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, n_gt_1) {
14111 for (uint32_t n = 2; n < 10; n++) {
14112 for (size_t k = 1; k <= 10; k += 3) {
14113 SpMMMicrokernelTester()
14114 .mr(8)
14115 .nr(1)
14116 .m(8)
14117 .n(n)
14118 .k(k)
14119 .sparsity(0.0f)
14120 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14121 }
14122 }
14123 }
14124
14125 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, m_lt_8) {
14126 for (uint32_t m = 1; m < 8; m++) {
14127 for (uint32_t n = 1; n < 10; n += 2) {
14128 for (size_t k = 1; k <= 10; k += 3) {
14129 SpMMMicrokernelTester()
14130 .mr(8)
14131 .nr(1)
14132 .m(m)
14133 .n(n)
14134 .k(k)
14135 .sparsity(0.0f)
14136 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14137 }
14138 }
14139 }
14140 }
14141
14142 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, m_div_8) {
14143 for (uint32_t m = 16; m <= 24; m += 8) {
14144 for (uint32_t n = 1; n < 10; n += 2) {
14145 for (size_t k = 1; k <= 10; k += 3) {
14146 SpMMMicrokernelTester()
14147 .mr(8)
14148 .nr(1)
14149 .m(m)
14150 .n(n)
14151 .k(k)
14152 .sparsity(0.0f)
14153 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14154 }
14155 }
14156 }
14157 }
14158
14159 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, m_gt_8) {
14160 for (uint32_t m = 9; m < 16; m++) {
14161 for (uint32_t n = 1; n < 10; n += 2) {
14162 for (size_t k = 1; k <= 10; k += 3) {
14163 SpMMMicrokernelTester()
14164 .mr(8)
14165 .nr(1)
14166 .m(m)
14167 .n(n)
14168 .k(k)
14169 .sparsity(0.0f)
14170 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14171 }
14172 }
14173 }
14174 }
14175
Marat Dukhane8bfcc82020-11-16 12:28:13 -080014176 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, output_stride) {
14177 for (uint32_t n = 1; n < 10; n += 2) {
14178 for (size_t k = 1; k <= 10; k += 3) {
14179 SpMMMicrokernelTester()
14180 .mr(8)
14181 .nr(1)
14182 .m(16)
14183 .n(n)
14184 .k(k)
14185 .output_stride(19)
14186 .sparsity(0.0f)
14187 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14188 }
14189 }
14190 }
14191
Frank Barchard8ef44cd2020-11-03 12:30:23 -080014192 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, qmin) {
14193 for (uint32_t n = 1; n < 10; n += 2) {
14194 for (size_t k = 1; k <= 10; k += 3) {
14195 SpMMMicrokernelTester()
14196 .mr(8)
14197 .nr(1)
14198 .m(16)
14199 .n(n)
14200 .k(k)
14201 .sparsity(0.0f)
14202 .qmin(128)
14203 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14204 }
14205 }
14206 }
14207
14208 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, qmax) {
14209 for (uint32_t n = 1; n < 10; n += 2) {
14210 for (size_t k = 1; k <= 10; k += 3) {
14211 SpMMMicrokernelTester()
14212 .mr(8)
14213 .nr(1)
14214 .m(16)
14215 .n(n)
14216 .k(k)
14217 .sparsity(0.0f)
14218 .qmax(128)
14219 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14220 }
14221 }
14222 }
14223
14224 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, half_sparse) {
14225 for (uint32_t n = 1; n < 10; n += 2) {
14226 for (size_t k = 1; k <= 10; k += 3) {
14227 SpMMMicrokernelTester()
14228 .mr(8)
14229 .nr(1)
14230 .m(16)
14231 .n(n)
14232 .k(k)
14233 .sparsity(0.5f)
14234 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14235 }
14236 }
14237 }
14238
14239 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_ARM_PIPELINED_X2, zero_weights) {
14240 for (uint32_t n = 1; n < 10; n += 2) {
14241 for (size_t k = 1; k <= 10; k += 3) {
14242 SpMMMicrokernelTester()
14243 .mr(8)
14244 .nr(1)
14245 .m(16)
14246 .n(n)
14247 .k(k)
14248 .sparsity(1.0f)
14249 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined_x2);
14250 }
14251 }
14252 }
14253#endif // XNN_ARCH_WASMSIMD
14254
14255
14256#if XNN_ARCH_WASMSIMD
14257 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, k_eq_2) {
14258 SpMMMicrokernelTester()
14259 .mr(16)
14260 .nr(1)
14261 .m(16)
14262 .n(1)
14263 .k(2)
14264 .sparsity(0.0f)
14265 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14266 }
14267
14268 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, k_lt_2) {
14269 for (size_t k = 1; k < 2; k++) {
14270 SpMMMicrokernelTester()
14271 .mr(16)
14272 .nr(1)
14273 .m(16)
14274 .n(1)
14275 .k(k)
14276 .sparsity(0.0f)
14277 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14278 }
14279 }
14280
14281 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, k_gt_2) {
14282 for (size_t k = 3; k < 4; k++) {
14283 SpMMMicrokernelTester()
14284 .mr(16)
14285 .nr(1)
14286 .m(16)
14287 .n(1)
14288 .k(k)
14289 .sparsity(0.0f)
14290 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14291 }
14292 }
14293
14294 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, k_div_2) {
14295 for (size_t k = 4; k <= 20; k += 2) {
14296 SpMMMicrokernelTester()
14297 .mr(16)
14298 .nr(1)
14299 .m(16)
14300 .n(1)
14301 .k(k)
14302 .sparsity(0.0f)
14303 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14304 }
14305 }
14306
14307 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, n_gt_1) {
14308 for (uint32_t n = 2; n < 10; n++) {
14309 for (size_t k = 1; k <= 10; k += 3) {
14310 SpMMMicrokernelTester()
14311 .mr(16)
14312 .nr(1)
14313 .m(16)
14314 .n(n)
14315 .k(k)
14316 .sparsity(0.0f)
14317 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14318 }
14319 }
14320 }
14321
14322 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, m_lt_16) {
14323 for (uint32_t m = 1; m < 16; m++) {
14324 for (uint32_t n = 1; n < 10; n += 2) {
14325 for (size_t k = 1; k <= 10; k += 3) {
14326 SpMMMicrokernelTester()
14327 .mr(16)
14328 .nr(1)
14329 .m(m)
14330 .n(n)
14331 .k(k)
14332 .sparsity(0.0f)
14333 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14334 }
14335 }
14336 }
14337 }
14338
14339 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, m_div_16) {
14340 for (uint32_t m = 32; m <= 48; m += 16) {
14341 for (uint32_t n = 1; n < 10; n += 2) {
14342 for (size_t k = 1; k <= 10; k += 3) {
14343 SpMMMicrokernelTester()
14344 .mr(16)
14345 .nr(1)
14346 .m(m)
14347 .n(n)
14348 .k(k)
14349 .sparsity(0.0f)
14350 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14351 }
14352 }
14353 }
14354 }
14355
14356 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, m_gt_16) {
14357 for (uint32_t m = 17; m < 32; m++) {
14358 for (uint32_t n = 1; n < 10; n += 2) {
14359 for (size_t k = 1; k <= 10; k += 3) {
14360 SpMMMicrokernelTester()
14361 .mr(16)
14362 .nr(1)
14363 .m(m)
14364 .n(n)
14365 .k(k)
14366 .sparsity(0.0f)
14367 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14368 }
14369 }
14370 }
14371 }
14372
Marat Dukhane8bfcc82020-11-16 12:28:13 -080014373 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, output_stride) {
14374 for (uint32_t n = 1; n < 10; n += 2) {
14375 for (size_t k = 1; k <= 10; k += 3) {
14376 SpMMMicrokernelTester()
14377 .mr(16)
14378 .nr(1)
14379 .m(32)
14380 .n(n)
14381 .k(k)
14382 .output_stride(37)
14383 .sparsity(0.0f)
14384 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14385 }
14386 }
14387 }
14388
Frank Barchard8ef44cd2020-11-03 12:30:23 -080014389 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, qmin) {
14390 for (uint32_t n = 1; n < 10; n += 2) {
14391 for (size_t k = 1; k <= 10; k += 3) {
14392 SpMMMicrokernelTester()
14393 .mr(16)
14394 .nr(1)
14395 .m(32)
14396 .n(n)
14397 .k(k)
14398 .sparsity(0.0f)
14399 .qmin(128)
14400 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14401 }
14402 }
14403 }
14404
14405 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, qmax) {
14406 for (uint32_t n = 1; n < 10; n += 2) {
14407 for (size_t k = 1; k <= 10; k += 3) {
14408 SpMMMicrokernelTester()
14409 .mr(16)
14410 .nr(1)
14411 .m(32)
14412 .n(n)
14413 .k(k)
14414 .sparsity(0.0f)
14415 .qmax(128)
14416 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14417 }
14418 }
14419 }
14420
14421 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, half_sparse) {
14422 for (uint32_t n = 1; n < 10; n += 2) {
14423 for (size_t k = 1; k <= 10; k += 3) {
14424 SpMMMicrokernelTester()
14425 .mr(16)
14426 .nr(1)
14427 .m(32)
14428 .n(n)
14429 .k(k)
14430 .sparsity(0.5f)
14431 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14432 }
14433 }
14434 }
14435
14436 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_ARM_PIPELINED_X2, zero_weights) {
14437 for (uint32_t n = 1; n < 10; n += 2) {
14438 for (size_t k = 1; k <= 10; k += 3) {
14439 SpMMMicrokernelTester()
14440 .mr(16)
14441 .nr(1)
14442 .m(32)
14443 .n(n)
14444 .k(k)
14445 .sparsity(1.0f)
14446 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2);
14447 }
14448 }
14449 }
14450#endif // XNN_ARCH_WASMSIMD
14451
14452
14453#if XNN_ARCH_WASMSIMD
14454 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, k_eq_2) {
14455 SpMMMicrokernelTester()
14456 .mr(32)
14457 .nr(1)
14458 .m(32)
14459 .n(1)
14460 .k(2)
14461 .sparsity(0.0f)
14462 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14463 }
14464
14465 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, k_lt_2) {
14466 for (size_t k = 1; k < 2; k++) {
14467 SpMMMicrokernelTester()
14468 .mr(32)
14469 .nr(1)
14470 .m(32)
14471 .n(1)
14472 .k(k)
14473 .sparsity(0.0f)
14474 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14475 }
14476 }
14477
14478 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, k_gt_2) {
14479 for (size_t k = 3; k < 4; k++) {
14480 SpMMMicrokernelTester()
14481 .mr(32)
14482 .nr(1)
14483 .m(32)
14484 .n(1)
14485 .k(k)
14486 .sparsity(0.0f)
14487 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14488 }
14489 }
14490
14491 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, k_div_2) {
14492 for (size_t k = 4; k <= 20; k += 2) {
14493 SpMMMicrokernelTester()
14494 .mr(32)
14495 .nr(1)
14496 .m(32)
14497 .n(1)
14498 .k(k)
14499 .sparsity(0.0f)
14500 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14501 }
14502 }
14503
14504 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, n_gt_1) {
14505 for (uint32_t n = 2; n < 10; n++) {
14506 for (size_t k = 1; k <= 10; k += 3) {
14507 SpMMMicrokernelTester()
14508 .mr(32)
14509 .nr(1)
14510 .m(32)
14511 .n(n)
14512 .k(k)
14513 .sparsity(0.0f)
14514 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14515 }
14516 }
14517 }
14518
14519 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, m_lt_32) {
14520 for (uint32_t m = 1; m < 32; m++) {
14521 for (uint32_t n = 1; n < 10; n += 2) {
14522 for (size_t k = 1; k <= 10; k += 3) {
14523 SpMMMicrokernelTester()
14524 .mr(32)
14525 .nr(1)
14526 .m(m)
14527 .n(n)
14528 .k(k)
14529 .sparsity(0.0f)
14530 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14531 }
14532 }
14533 }
14534 }
14535
14536 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, m_div_32) {
14537 for (uint32_t m = 64; m <= 96; m += 32) {
14538 for (uint32_t n = 1; n < 10; n += 2) {
14539 for (size_t k = 1; k <= 10; k += 3) {
14540 SpMMMicrokernelTester()
14541 .mr(32)
14542 .nr(1)
14543 .m(m)
14544 .n(n)
14545 .k(k)
14546 .sparsity(0.0f)
14547 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14548 }
14549 }
14550 }
14551 }
14552
14553 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, m_gt_32) {
14554 for (uint32_t m = 33; m < 64; m++) {
14555 for (uint32_t n = 1; n < 10; n += 2) {
14556 for (size_t k = 1; k <= 10; k += 3) {
14557 SpMMMicrokernelTester()
14558 .mr(32)
14559 .nr(1)
14560 .m(m)
14561 .n(n)
14562 .k(k)
14563 .sparsity(0.0f)
14564 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14565 }
14566 }
14567 }
14568 }
14569
Marat Dukhane8bfcc82020-11-16 12:28:13 -080014570 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, output_stride) {
14571 for (uint32_t n = 1; n < 10; n += 2) {
14572 for (size_t k = 1; k <= 10; k += 3) {
14573 SpMMMicrokernelTester()
14574 .mr(32)
14575 .nr(1)
14576 .m(64)
14577 .n(n)
14578 .k(k)
14579 .output_stride(67)
14580 .sparsity(0.0f)
14581 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14582 }
14583 }
14584 }
14585
Frank Barchard8ef44cd2020-11-03 12:30:23 -080014586 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, qmin) {
14587 for (uint32_t n = 1; n < 10; n += 2) {
14588 for (size_t k = 1; k <= 10; k += 3) {
14589 SpMMMicrokernelTester()
14590 .mr(32)
14591 .nr(1)
14592 .m(64)
14593 .n(n)
14594 .k(k)
14595 .sparsity(0.0f)
14596 .qmin(128)
14597 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14598 }
14599 }
14600 }
14601
14602 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, qmax) {
14603 for (uint32_t n = 1; n < 10; n += 2) {
14604 for (size_t k = 1; k <= 10; k += 3) {
14605 SpMMMicrokernelTester()
14606 .mr(32)
14607 .nr(1)
14608 .m(64)
14609 .n(n)
14610 .k(k)
14611 .sparsity(0.0f)
14612 .qmax(128)
14613 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14614 }
14615 }
14616 }
14617
14618 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, half_sparse) {
14619 for (uint32_t n = 1; n < 10; n += 2) {
14620 for (size_t k = 1; k <= 10; k += 3) {
14621 SpMMMicrokernelTester()
14622 .mr(32)
14623 .nr(1)
14624 .m(64)
14625 .n(n)
14626 .k(k)
14627 .sparsity(0.5f)
14628 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14629 }
14630 }
14631 }
14632
14633 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_ARM_PIPELINED_X2, zero_weights) {
14634 for (uint32_t n = 1; n < 10; n += 2) {
14635 for (size_t k = 1; k <= 10; k += 3) {
14636 SpMMMicrokernelTester()
14637 .mr(32)
14638 .nr(1)
14639 .m(64)
14640 .n(n)
14641 .k(k)
14642 .sparsity(1.0f)
14643 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2);
14644 }
14645 }
14646 }
14647#endif // XNN_ARCH_WASMSIMD
14648
14649
14650#if XNN_ARCH_WASMSIMD
14651 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, k_eq_2) {
14652 SpMMMicrokernelTester()
14653 .mr(4)
14654 .nr(1)
14655 .m(4)
14656 .n(1)
14657 .k(2)
14658 .sparsity(0.0f)
14659 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14660 }
14661
14662 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, k_lt_2) {
14663 for (size_t k = 1; k < 2; k++) {
14664 SpMMMicrokernelTester()
14665 .mr(4)
14666 .nr(1)
14667 .m(4)
14668 .n(1)
14669 .k(k)
14670 .sparsity(0.0f)
14671 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14672 }
14673 }
14674
14675 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, k_gt_2) {
14676 for (size_t k = 3; k < 4; k++) {
14677 SpMMMicrokernelTester()
14678 .mr(4)
14679 .nr(1)
14680 .m(4)
14681 .n(1)
14682 .k(k)
14683 .sparsity(0.0f)
14684 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14685 }
14686 }
14687
14688 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, k_div_2) {
14689 for (size_t k = 4; k <= 20; k += 2) {
14690 SpMMMicrokernelTester()
14691 .mr(4)
14692 .nr(1)
14693 .m(4)
14694 .n(1)
14695 .k(k)
14696 .sparsity(0.0f)
14697 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14698 }
14699 }
14700
14701 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, n_gt_1) {
14702 for (uint32_t n = 2; n < 10; n++) {
14703 for (size_t k = 1; k <= 10; k += 3) {
14704 SpMMMicrokernelTester()
14705 .mr(4)
14706 .nr(1)
14707 .m(4)
14708 .n(n)
14709 .k(k)
14710 .sparsity(0.0f)
14711 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14712 }
14713 }
14714 }
14715
14716 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, m_lt_4) {
14717 for (uint32_t m = 1; m < 4; m++) {
14718 for (uint32_t n = 1; n < 10; n += 2) {
14719 for (size_t k = 1; k <= 10; k += 3) {
14720 SpMMMicrokernelTester()
14721 .mr(4)
14722 .nr(1)
14723 .m(m)
14724 .n(n)
14725 .k(k)
14726 .sparsity(0.0f)
14727 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14728 }
14729 }
14730 }
14731 }
14732
14733 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, m_div_4) {
14734 for (uint32_t m = 8; m <= 12; m += 4) {
14735 for (uint32_t n = 1; n < 10; n += 2) {
14736 for (size_t k = 1; k <= 10; k += 3) {
14737 SpMMMicrokernelTester()
14738 .mr(4)
14739 .nr(1)
14740 .m(m)
14741 .n(n)
14742 .k(k)
14743 .sparsity(0.0f)
14744 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14745 }
14746 }
14747 }
14748 }
14749
14750 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, m_gt_4) {
14751 for (uint32_t m = 5; m < 8; m++) {
14752 for (uint32_t n = 1; n < 10; n += 2) {
14753 for (size_t k = 1; k <= 10; k += 3) {
14754 SpMMMicrokernelTester()
14755 .mr(4)
14756 .nr(1)
14757 .m(m)
14758 .n(n)
14759 .k(k)
14760 .sparsity(0.0f)
14761 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14762 }
14763 }
14764 }
14765 }
14766
Marat Dukhane8bfcc82020-11-16 12:28:13 -080014767 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, output_stride) {
14768 for (uint32_t n = 1; n < 10; n += 2) {
14769 for (size_t k = 1; k <= 10; k += 3) {
14770 SpMMMicrokernelTester()
14771 .mr(4)
14772 .nr(1)
14773 .m(8)
14774 .n(n)
14775 .k(k)
14776 .output_stride(11)
14777 .sparsity(0.0f)
14778 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14779 }
14780 }
14781 }
14782
Frank Barchard8ef44cd2020-11-03 12:30:23 -080014783 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, qmin) {
14784 for (uint32_t n = 1; n < 10; n += 2) {
14785 for (size_t k = 1; k <= 10; k += 3) {
14786 SpMMMicrokernelTester()
14787 .mr(4)
14788 .nr(1)
14789 .m(8)
14790 .n(n)
14791 .k(k)
14792 .sparsity(0.0f)
14793 .qmin(128)
14794 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14795 }
14796 }
14797 }
14798
14799 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, qmax) {
14800 for (uint32_t n = 1; n < 10; n += 2) {
14801 for (size_t k = 1; k <= 10; k += 3) {
14802 SpMMMicrokernelTester()
14803 .mr(4)
14804 .nr(1)
14805 .m(8)
14806 .n(n)
14807 .k(k)
14808 .sparsity(0.0f)
14809 .qmax(128)
14810 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14811 }
14812 }
14813 }
14814
14815 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, half_sparse) {
14816 for (uint32_t n = 1; n < 10; n += 2) {
14817 for (size_t k = 1; k <= 10; k += 3) {
14818 SpMMMicrokernelTester()
14819 .mr(4)
14820 .nr(1)
14821 .m(8)
14822 .n(n)
14823 .k(k)
14824 .sparsity(0.5f)
14825 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14826 }
14827 }
14828 }
14829
14830 TEST(F32_SPMM_MINMAX_4X1__WASMSIMD_X86_PIPELINED_X2, zero_weights) {
14831 for (uint32_t n = 1; n < 10; n += 2) {
14832 for (size_t k = 1; k <= 10; k += 3) {
14833 SpMMMicrokernelTester()
14834 .mr(4)
14835 .nr(1)
14836 .m(8)
14837 .n(n)
14838 .k(k)
14839 .sparsity(1.0f)
14840 .Test(xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_pipelined_x2);
14841 }
14842 }
14843 }
14844#endif // XNN_ARCH_WASMSIMD
14845
14846
14847#if XNN_ARCH_WASMSIMD
14848 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, k_eq_2) {
14849 SpMMMicrokernelTester()
14850 .mr(8)
14851 .nr(1)
14852 .m(8)
14853 .n(1)
14854 .k(2)
14855 .sparsity(0.0f)
14856 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14857 }
14858
14859 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, k_lt_2) {
14860 for (size_t k = 1; k < 2; k++) {
14861 SpMMMicrokernelTester()
14862 .mr(8)
14863 .nr(1)
14864 .m(8)
14865 .n(1)
14866 .k(k)
14867 .sparsity(0.0f)
14868 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14869 }
14870 }
14871
14872 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, k_gt_2) {
14873 for (size_t k = 3; k < 4; k++) {
14874 SpMMMicrokernelTester()
14875 .mr(8)
14876 .nr(1)
14877 .m(8)
14878 .n(1)
14879 .k(k)
14880 .sparsity(0.0f)
14881 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14882 }
14883 }
14884
14885 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, k_div_2) {
14886 for (size_t k = 4; k <= 20; k += 2) {
14887 SpMMMicrokernelTester()
14888 .mr(8)
14889 .nr(1)
14890 .m(8)
14891 .n(1)
14892 .k(k)
14893 .sparsity(0.0f)
14894 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14895 }
14896 }
14897
14898 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, n_gt_1) {
14899 for (uint32_t n = 2; n < 10; n++) {
14900 for (size_t k = 1; k <= 10; k += 3) {
14901 SpMMMicrokernelTester()
14902 .mr(8)
14903 .nr(1)
14904 .m(8)
14905 .n(n)
14906 .k(k)
14907 .sparsity(0.0f)
14908 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14909 }
14910 }
14911 }
14912
14913 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, m_lt_8) {
14914 for (uint32_t m = 1; m < 8; m++) {
14915 for (uint32_t n = 1; n < 10; n += 2) {
14916 for (size_t k = 1; k <= 10; k += 3) {
14917 SpMMMicrokernelTester()
14918 .mr(8)
14919 .nr(1)
14920 .m(m)
14921 .n(n)
14922 .k(k)
14923 .sparsity(0.0f)
14924 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14925 }
14926 }
14927 }
14928 }
14929
14930 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, m_div_8) {
14931 for (uint32_t m = 16; m <= 24; m += 8) {
14932 for (uint32_t n = 1; n < 10; n += 2) {
14933 for (size_t k = 1; k <= 10; k += 3) {
14934 SpMMMicrokernelTester()
14935 .mr(8)
14936 .nr(1)
14937 .m(m)
14938 .n(n)
14939 .k(k)
14940 .sparsity(0.0f)
14941 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14942 }
14943 }
14944 }
14945 }
14946
14947 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, m_gt_8) {
14948 for (uint32_t m = 9; m < 16; m++) {
14949 for (uint32_t n = 1; n < 10; n += 2) {
14950 for (size_t k = 1; k <= 10; k += 3) {
14951 SpMMMicrokernelTester()
14952 .mr(8)
14953 .nr(1)
14954 .m(m)
14955 .n(n)
14956 .k(k)
14957 .sparsity(0.0f)
14958 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14959 }
14960 }
14961 }
14962 }
14963
Marat Dukhane8bfcc82020-11-16 12:28:13 -080014964 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, output_stride) {
14965 for (uint32_t n = 1; n < 10; n += 2) {
14966 for (size_t k = 1; k <= 10; k += 3) {
14967 SpMMMicrokernelTester()
14968 .mr(8)
14969 .nr(1)
14970 .m(16)
14971 .n(n)
14972 .k(k)
14973 .output_stride(19)
14974 .sparsity(0.0f)
14975 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14976 }
14977 }
14978 }
14979
Frank Barchard8ef44cd2020-11-03 12:30:23 -080014980 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, qmin) {
14981 for (uint32_t n = 1; n < 10; n += 2) {
14982 for (size_t k = 1; k <= 10; k += 3) {
14983 SpMMMicrokernelTester()
14984 .mr(8)
14985 .nr(1)
14986 .m(16)
14987 .n(n)
14988 .k(k)
14989 .sparsity(0.0f)
14990 .qmin(128)
14991 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
14992 }
14993 }
14994 }
14995
14996 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, qmax) {
14997 for (uint32_t n = 1; n < 10; n += 2) {
14998 for (size_t k = 1; k <= 10; k += 3) {
14999 SpMMMicrokernelTester()
15000 .mr(8)
15001 .nr(1)
15002 .m(16)
15003 .n(n)
15004 .k(k)
15005 .sparsity(0.0f)
15006 .qmax(128)
15007 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
15008 }
15009 }
15010 }
15011
15012 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, half_sparse) {
15013 for (uint32_t n = 1; n < 10; n += 2) {
15014 for (size_t k = 1; k <= 10; k += 3) {
15015 SpMMMicrokernelTester()
15016 .mr(8)
15017 .nr(1)
15018 .m(16)
15019 .n(n)
15020 .k(k)
15021 .sparsity(0.5f)
15022 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
15023 }
15024 }
15025 }
15026
15027 TEST(F32_SPMM_MINMAX_8X1__WASMSIMD_X86_PIPELINED_X2, zero_weights) {
15028 for (uint32_t n = 1; n < 10; n += 2) {
15029 for (size_t k = 1; k <= 10; k += 3) {
15030 SpMMMicrokernelTester()
15031 .mr(8)
15032 .nr(1)
15033 .m(16)
15034 .n(n)
15035 .k(k)
15036 .sparsity(1.0f)
15037 .Test(xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined_x2);
15038 }
15039 }
15040 }
15041#endif // XNN_ARCH_WASMSIMD
15042
15043
15044#if XNN_ARCH_WASMSIMD
15045 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, k_eq_2) {
15046 SpMMMicrokernelTester()
15047 .mr(16)
15048 .nr(1)
15049 .m(16)
15050 .n(1)
15051 .k(2)
15052 .sparsity(0.0f)
15053 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15054 }
15055
15056 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, k_lt_2) {
15057 for (size_t k = 1; k < 2; k++) {
15058 SpMMMicrokernelTester()
15059 .mr(16)
15060 .nr(1)
15061 .m(16)
15062 .n(1)
15063 .k(k)
15064 .sparsity(0.0f)
15065 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15066 }
15067 }
15068
15069 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, k_gt_2) {
15070 for (size_t k = 3; k < 4; k++) {
15071 SpMMMicrokernelTester()
15072 .mr(16)
15073 .nr(1)
15074 .m(16)
15075 .n(1)
15076 .k(k)
15077 .sparsity(0.0f)
15078 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15079 }
15080 }
15081
15082 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, k_div_2) {
15083 for (size_t k = 4; k <= 20; k += 2) {
15084 SpMMMicrokernelTester()
15085 .mr(16)
15086 .nr(1)
15087 .m(16)
15088 .n(1)
15089 .k(k)
15090 .sparsity(0.0f)
15091 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15092 }
15093 }
15094
15095 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, n_gt_1) {
15096 for (uint32_t n = 2; n < 10; n++) {
15097 for (size_t k = 1; k <= 10; k += 3) {
15098 SpMMMicrokernelTester()
15099 .mr(16)
15100 .nr(1)
15101 .m(16)
15102 .n(n)
15103 .k(k)
15104 .sparsity(0.0f)
15105 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15106 }
15107 }
15108 }
15109
15110 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, m_lt_16) {
15111 for (uint32_t m = 1; m < 16; m++) {
15112 for (uint32_t n = 1; n < 10; n += 2) {
15113 for (size_t k = 1; k <= 10; k += 3) {
15114 SpMMMicrokernelTester()
15115 .mr(16)
15116 .nr(1)
15117 .m(m)
15118 .n(n)
15119 .k(k)
15120 .sparsity(0.0f)
15121 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15122 }
15123 }
15124 }
15125 }
15126
15127 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, m_div_16) {
15128 for (uint32_t m = 32; m <= 48; m += 16) {
15129 for (uint32_t n = 1; n < 10; n += 2) {
15130 for (size_t k = 1; k <= 10; k += 3) {
15131 SpMMMicrokernelTester()
15132 .mr(16)
15133 .nr(1)
15134 .m(m)
15135 .n(n)
15136 .k(k)
15137 .sparsity(0.0f)
15138 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15139 }
15140 }
15141 }
15142 }
15143
15144 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, m_gt_16) {
15145 for (uint32_t m = 17; m < 32; m++) {
15146 for (uint32_t n = 1; n < 10; n += 2) {
15147 for (size_t k = 1; k <= 10; k += 3) {
15148 SpMMMicrokernelTester()
15149 .mr(16)
15150 .nr(1)
15151 .m(m)
15152 .n(n)
15153 .k(k)
15154 .sparsity(0.0f)
15155 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15156 }
15157 }
15158 }
15159 }
15160
Marat Dukhane8bfcc82020-11-16 12:28:13 -080015161 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, output_stride) {
15162 for (uint32_t n = 1; n < 10; n += 2) {
15163 for (size_t k = 1; k <= 10; k += 3) {
15164 SpMMMicrokernelTester()
15165 .mr(16)
15166 .nr(1)
15167 .m(32)
15168 .n(n)
15169 .k(k)
15170 .output_stride(37)
15171 .sparsity(0.0f)
15172 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15173 }
15174 }
15175 }
15176
Frank Barchard8ef44cd2020-11-03 12:30:23 -080015177 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, qmin) {
15178 for (uint32_t n = 1; n < 10; n += 2) {
15179 for (size_t k = 1; k <= 10; k += 3) {
15180 SpMMMicrokernelTester()
15181 .mr(16)
15182 .nr(1)
15183 .m(32)
15184 .n(n)
15185 .k(k)
15186 .sparsity(0.0f)
15187 .qmin(128)
15188 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15189 }
15190 }
15191 }
15192
15193 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, qmax) {
15194 for (uint32_t n = 1; n < 10; n += 2) {
15195 for (size_t k = 1; k <= 10; k += 3) {
15196 SpMMMicrokernelTester()
15197 .mr(16)
15198 .nr(1)
15199 .m(32)
15200 .n(n)
15201 .k(k)
15202 .sparsity(0.0f)
15203 .qmax(128)
15204 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15205 }
15206 }
15207 }
15208
15209 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, half_sparse) {
15210 for (uint32_t n = 1; n < 10; n += 2) {
15211 for (size_t k = 1; k <= 10; k += 3) {
15212 SpMMMicrokernelTester()
15213 .mr(16)
15214 .nr(1)
15215 .m(32)
15216 .n(n)
15217 .k(k)
15218 .sparsity(0.5f)
15219 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15220 }
15221 }
15222 }
15223
15224 TEST(F32_SPMM_MINMAX_16X1__WASMSIMD_X86_PIPELINED_X2, zero_weights) {
15225 for (uint32_t n = 1; n < 10; n += 2) {
15226 for (size_t k = 1; k <= 10; k += 3) {
15227 SpMMMicrokernelTester()
15228 .mr(16)
15229 .nr(1)
15230 .m(32)
15231 .n(n)
15232 .k(k)
15233 .sparsity(1.0f)
15234 .Test(xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2);
15235 }
15236 }
15237 }
15238#endif // XNN_ARCH_WASMSIMD
15239
15240
15241#if XNN_ARCH_WASMSIMD
15242 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, k_eq_2) {
15243 SpMMMicrokernelTester()
15244 .mr(32)
15245 .nr(1)
15246 .m(32)
15247 .n(1)
15248 .k(2)
15249 .sparsity(0.0f)
15250 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15251 }
15252
15253 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, k_lt_2) {
15254 for (size_t k = 1; k < 2; k++) {
15255 SpMMMicrokernelTester()
15256 .mr(32)
15257 .nr(1)
15258 .m(32)
15259 .n(1)
15260 .k(k)
15261 .sparsity(0.0f)
15262 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15263 }
15264 }
15265
15266 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, k_gt_2) {
15267 for (size_t k = 3; k < 4; k++) {
15268 SpMMMicrokernelTester()
15269 .mr(32)
15270 .nr(1)
15271 .m(32)
15272 .n(1)
15273 .k(k)
15274 .sparsity(0.0f)
15275 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15276 }
15277 }
15278
15279 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, k_div_2) {
15280 for (size_t k = 4; k <= 20; k += 2) {
15281 SpMMMicrokernelTester()
15282 .mr(32)
15283 .nr(1)
15284 .m(32)
15285 .n(1)
15286 .k(k)
15287 .sparsity(0.0f)
15288 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15289 }
15290 }
15291
15292 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, n_gt_1) {
15293 for (uint32_t n = 2; n < 10; n++) {
15294 for (size_t k = 1; k <= 10; k += 3) {
15295 SpMMMicrokernelTester()
15296 .mr(32)
15297 .nr(1)
15298 .m(32)
15299 .n(n)
15300 .k(k)
15301 .sparsity(0.0f)
15302 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15303 }
15304 }
15305 }
15306
15307 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, m_lt_32) {
15308 for (uint32_t m = 1; m < 32; m++) {
15309 for (uint32_t n = 1; n < 10; n += 2) {
15310 for (size_t k = 1; k <= 10; k += 3) {
15311 SpMMMicrokernelTester()
15312 .mr(32)
15313 .nr(1)
15314 .m(m)
15315 .n(n)
15316 .k(k)
15317 .sparsity(0.0f)
15318 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15319 }
15320 }
15321 }
15322 }
15323
15324 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, m_div_32) {
15325 for (uint32_t m = 64; m <= 96; m += 32) {
15326 for (uint32_t n = 1; n < 10; n += 2) {
15327 for (size_t k = 1; k <= 10; k += 3) {
15328 SpMMMicrokernelTester()
15329 .mr(32)
15330 .nr(1)
15331 .m(m)
15332 .n(n)
15333 .k(k)
15334 .sparsity(0.0f)
15335 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15336 }
15337 }
15338 }
15339 }
15340
15341 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, m_gt_32) {
15342 for (uint32_t m = 33; m < 64; m++) {
15343 for (uint32_t n = 1; n < 10; n += 2) {
15344 for (size_t k = 1; k <= 10; k += 3) {
15345 SpMMMicrokernelTester()
15346 .mr(32)
15347 .nr(1)
15348 .m(m)
15349 .n(n)
15350 .k(k)
15351 .sparsity(0.0f)
15352 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15353 }
15354 }
15355 }
15356 }
15357
Marat Dukhane8bfcc82020-11-16 12:28:13 -080015358 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, output_stride) {
15359 for (uint32_t n = 1; n < 10; n += 2) {
15360 for (size_t k = 1; k <= 10; k += 3) {
15361 SpMMMicrokernelTester()
15362 .mr(32)
15363 .nr(1)
15364 .m(64)
15365 .n(n)
15366 .k(k)
15367 .output_stride(67)
15368 .sparsity(0.0f)
15369 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15370 }
15371 }
15372 }
15373
Frank Barchard8ef44cd2020-11-03 12:30:23 -080015374 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, qmin) {
15375 for (uint32_t n = 1; n < 10; n += 2) {
15376 for (size_t k = 1; k <= 10; k += 3) {
15377 SpMMMicrokernelTester()
15378 .mr(32)
15379 .nr(1)
15380 .m(64)
15381 .n(n)
15382 .k(k)
15383 .sparsity(0.0f)
15384 .qmin(128)
15385 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15386 }
15387 }
15388 }
15389
15390 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, qmax) {
15391 for (uint32_t n = 1; n < 10; n += 2) {
15392 for (size_t k = 1; k <= 10; k += 3) {
15393 SpMMMicrokernelTester()
15394 .mr(32)
15395 .nr(1)
15396 .m(64)
15397 .n(n)
15398 .k(k)
15399 .sparsity(0.0f)
15400 .qmax(128)
15401 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15402 }
15403 }
15404 }
15405
15406 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, half_sparse) {
15407 for (uint32_t n = 1; n < 10; n += 2) {
15408 for (size_t k = 1; k <= 10; k += 3) {
15409 SpMMMicrokernelTester()
15410 .mr(32)
15411 .nr(1)
15412 .m(64)
15413 .n(n)
15414 .k(k)
15415 .sparsity(0.5f)
15416 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15417 }
15418 }
15419 }
15420
15421 TEST(F32_SPMM_MINMAX_32X1__WASMSIMD_X86_PIPELINED_X2, zero_weights) {
15422 for (uint32_t n = 1; n < 10; n += 2) {
15423 for (size_t k = 1; k <= 10; k += 3) {
15424 SpMMMicrokernelTester()
15425 .mr(32)
15426 .nr(1)
15427 .m(64)
15428 .n(n)
15429 .k(k)
15430 .sparsity(1.0f)
15431 .Test(xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2);
15432 }
15433 }
15434 }
15435#endif // XNN_ARCH_WASMSIMD
15436
15437
Marat Dukhan355ab432020-04-09 19:01:52 -070015438TEST(F32_SPMM_MINMAX_1X1__SCALAR, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015439 SpMMMicrokernelTester()
15440 .mr(1)
15441 .nr(1)
15442 .m(1)
15443 .n(1)
15444 .k(1)
15445 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015446 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015447}
15448
Marat Dukhan355ab432020-04-09 19:01:52 -070015449TEST(F32_SPMM_MINMAX_1X1__SCALAR, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015450 for (size_t k = 2; k < 10; k++) {
15451 SpMMMicrokernelTester()
15452 .mr(1)
15453 .nr(1)
15454 .m(1)
15455 .n(1)
15456 .k(k)
15457 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015458 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015459 }
15460}
15461
Marat Dukhan355ab432020-04-09 19:01:52 -070015462TEST(F32_SPMM_MINMAX_1X1__SCALAR, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015463 for (uint32_t n = 2; n < 10; n++) {
15464 for (size_t k = 1; k <= 5; k += 2) {
15465 SpMMMicrokernelTester()
15466 .mr(1)
15467 .nr(1)
15468 .m(1)
15469 .n(n)
15470 .k(k)
15471 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015472 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015473 }
15474 }
15475}
15476
Marat Dukhan355ab432020-04-09 19:01:52 -070015477TEST(F32_SPMM_MINMAX_1X1__SCALAR, m_lt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015478 for (uint32_t m = 1; m < 1; m++) {
15479 for (uint32_t n = 1; n < 10; n += 2) {
15480 for (size_t k = 1; k <= 5; k += 2) {
15481 SpMMMicrokernelTester()
15482 .mr(1)
15483 .nr(1)
15484 .m(m)
15485 .n(n)
15486 .k(k)
15487 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015488 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015489 }
15490 }
15491 }
15492}
15493
Marat Dukhan355ab432020-04-09 19:01:52 -070015494TEST(F32_SPMM_MINMAX_1X1__SCALAR, m_div_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015495 for (uint32_t m = 2; m <= 3; m += 1) {
15496 for (uint32_t n = 1; n < 10; n += 2) {
15497 for (size_t k = 1; k <= 5; k += 2) {
15498 SpMMMicrokernelTester()
15499 .mr(1)
15500 .nr(1)
15501 .m(m)
15502 .n(n)
15503 .k(k)
15504 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015505 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015506 }
15507 }
15508 }
15509}
15510
Marat Dukhan355ab432020-04-09 19:01:52 -070015511TEST(F32_SPMM_MINMAX_1X1__SCALAR, m_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015512 for (uint32_t m = 2; m < 2; m++) {
15513 for (uint32_t n = 1; n < 10; n += 2) {
15514 for (size_t k = 1; k <= 5; k += 2) {
15515 SpMMMicrokernelTester()
15516 .mr(1)
15517 .nr(1)
15518 .m(m)
15519 .n(n)
15520 .k(k)
15521 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015522 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015523 }
15524 }
15525 }
15526}
15527
Marat Dukhane8bfcc82020-11-16 12:28:13 -080015528TEST(F32_SPMM_MINMAX_1X1__SCALAR, output_stride) {
15529 for (uint32_t n = 1; n < 10; n += 2) {
15530 for (size_t k = 1; k <= 5; k += 2) {
15531 SpMMMicrokernelTester()
15532 .mr(1)
15533 .nr(1)
15534 .m(2)
15535 .n(n)
15536 .k(k)
15537 .output_stride(5)
15538 .sparsity(0.0f)
15539 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
15540 }
15541 }
15542}
15543
Marat Dukhan355ab432020-04-09 19:01:52 -070015544TEST(F32_SPMM_MINMAX_1X1__SCALAR, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015545 for (uint32_t n = 1; n < 10; n += 2) {
15546 for (size_t k = 1; k <= 5; k += 2) {
15547 SpMMMicrokernelTester()
15548 .mr(1)
15549 .nr(1)
15550 .m(2)
15551 .n(n)
15552 .k(k)
15553 .sparsity(0.0f)
15554 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070015555 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015556 }
15557 }
15558}
15559
Marat Dukhan355ab432020-04-09 19:01:52 -070015560TEST(F32_SPMM_MINMAX_1X1__SCALAR, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015561 for (uint32_t n = 1; n < 10; n += 2) {
15562 for (size_t k = 1; k <= 5; k += 2) {
15563 SpMMMicrokernelTester()
15564 .mr(1)
15565 .nr(1)
15566 .m(2)
15567 .n(n)
15568 .k(k)
15569 .sparsity(0.0f)
15570 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070015571 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015572 }
15573 }
15574}
15575
Marat Dukhan355ab432020-04-09 19:01:52 -070015576TEST(F32_SPMM_MINMAX_1X1__SCALAR, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015577 for (uint32_t n = 1; n < 10; n += 2) {
15578 for (size_t k = 1; k <= 5; k += 2) {
15579 SpMMMicrokernelTester()
15580 .mr(1)
15581 .nr(1)
15582 .m(2)
15583 .n(n)
15584 .k(k)
15585 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015586 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015587 }
15588 }
15589}
15590
Marat Dukhan355ab432020-04-09 19:01:52 -070015591TEST(F32_SPMM_MINMAX_1X1__SCALAR, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015592 for (uint32_t n = 1; n < 10; n += 2) {
15593 for (size_t k = 1; k <= 5; k += 2) {
15594 SpMMMicrokernelTester()
15595 .mr(1)
15596 .nr(1)
15597 .m(2)
15598 .n(n)
15599 .k(k)
15600 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015601 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015602 }
15603 }
15604}
15605
Marat Dukhan355ab432020-04-09 19:01:52 -070015606TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015607 SpMMMicrokernelTester()
15608 .mr(1)
15609 .nr(1)
15610 .m(1)
15611 .n(1)
15612 .k(1)
15613 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015614 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015615}
15616
Marat Dukhan355ab432020-04-09 19:01:52 -070015617TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015618 for (size_t k = 2; k < 10; k++) {
15619 SpMMMicrokernelTester()
15620 .mr(1)
15621 .nr(1)
15622 .m(1)
15623 .n(1)
15624 .k(k)
15625 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015626 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015627 }
15628}
15629
Marat Dukhan355ab432020-04-09 19:01:52 -070015630TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015631 for (uint32_t n = 2; n < 10; n++) {
15632 for (size_t k = 1; k <= 5; k += 2) {
15633 SpMMMicrokernelTester()
15634 .mr(1)
15635 .nr(1)
15636 .m(1)
15637 .n(n)
15638 .k(k)
15639 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015640 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015641 }
15642 }
15643}
15644
Marat Dukhan355ab432020-04-09 19:01:52 -070015645TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, m_lt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015646 for (uint32_t m = 1; m < 1; m++) {
15647 for (uint32_t n = 1; n < 10; n += 2) {
15648 for (size_t k = 1; k <= 5; k += 2) {
15649 SpMMMicrokernelTester()
15650 .mr(1)
15651 .nr(1)
15652 .m(m)
15653 .n(n)
15654 .k(k)
15655 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015656 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015657 }
15658 }
15659 }
15660}
15661
Marat Dukhan355ab432020-04-09 19:01:52 -070015662TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, m_div_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015663 for (uint32_t m = 2; m <= 3; m += 1) {
15664 for (uint32_t n = 1; n < 10; n += 2) {
15665 for (size_t k = 1; k <= 5; k += 2) {
15666 SpMMMicrokernelTester()
15667 .mr(1)
15668 .nr(1)
15669 .m(m)
15670 .n(n)
15671 .k(k)
15672 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015673 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015674 }
15675 }
15676 }
15677}
15678
Marat Dukhan355ab432020-04-09 19:01:52 -070015679TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, m_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015680 for (uint32_t m = 2; m < 2; m++) {
15681 for (uint32_t n = 1; n < 10; n += 2) {
15682 for (size_t k = 1; k <= 5; k += 2) {
15683 SpMMMicrokernelTester()
15684 .mr(1)
15685 .nr(1)
15686 .m(m)
15687 .n(n)
15688 .k(k)
15689 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015690 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015691 }
15692 }
15693 }
15694}
15695
Marat Dukhane8bfcc82020-11-16 12:28:13 -080015696TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, output_stride) {
15697 for (uint32_t n = 1; n < 10; n += 2) {
15698 for (size_t k = 1; k <= 5; k += 2) {
15699 SpMMMicrokernelTester()
15700 .mr(1)
15701 .nr(1)
15702 .m(2)
15703 .n(n)
15704 .k(k)
15705 .output_stride(5)
15706 .sparsity(0.0f)
15707 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
15708 }
15709 }
15710}
15711
Marat Dukhan355ab432020-04-09 19:01:52 -070015712TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015713 for (uint32_t n = 1; n < 10; n += 2) {
15714 for (size_t k = 1; k <= 5; k += 2) {
15715 SpMMMicrokernelTester()
15716 .mr(1)
15717 .nr(1)
15718 .m(2)
15719 .n(n)
15720 .k(k)
15721 .sparsity(0.0f)
15722 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070015723 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015724 }
15725 }
15726}
15727
Marat Dukhan355ab432020-04-09 19:01:52 -070015728TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015729 for (uint32_t n = 1; n < 10; n += 2) {
15730 for (size_t k = 1; k <= 5; k += 2) {
15731 SpMMMicrokernelTester()
15732 .mr(1)
15733 .nr(1)
15734 .m(2)
15735 .n(n)
15736 .k(k)
15737 .sparsity(0.0f)
15738 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070015739 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015740 }
15741 }
15742}
15743
Marat Dukhan355ab432020-04-09 19:01:52 -070015744TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015745 for (uint32_t n = 1; n < 10; n += 2) {
15746 for (size_t k = 1; k <= 5; k += 2) {
15747 SpMMMicrokernelTester()
15748 .mr(1)
15749 .nr(1)
15750 .m(2)
15751 .n(n)
15752 .k(k)
15753 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015754 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015755 }
15756 }
15757}
15758
Marat Dukhan355ab432020-04-09 19:01:52 -070015759TEST(F32_SPMM_MINMAX_1X1__SCALAR_PIPELINED, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015760 for (uint32_t n = 1; n < 10; n += 2) {
15761 for (size_t k = 1; k <= 5; k += 2) {
15762 SpMMMicrokernelTester()
15763 .mr(1)
15764 .nr(1)
15765 .m(2)
15766 .n(n)
15767 .k(k)
15768 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015769 .Test(xnn_f32_spmm_minmax_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015770 }
15771 }
15772}
15773
Marat Dukhan355ab432020-04-09 19:01:52 -070015774TEST(F32_SPMM_MINMAX_2X1__SCALAR, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015775 SpMMMicrokernelTester()
15776 .mr(2)
15777 .nr(1)
15778 .m(2)
15779 .n(1)
15780 .k(1)
15781 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015782 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015783}
15784
Marat Dukhan355ab432020-04-09 19:01:52 -070015785TEST(F32_SPMM_MINMAX_2X1__SCALAR, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015786 for (size_t k = 2; k < 10; k++) {
15787 SpMMMicrokernelTester()
15788 .mr(2)
15789 .nr(1)
15790 .m(2)
15791 .n(1)
15792 .k(k)
15793 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015794 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015795 }
15796}
15797
Marat Dukhan355ab432020-04-09 19:01:52 -070015798TEST(F32_SPMM_MINMAX_2X1__SCALAR, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015799 for (uint32_t n = 2; n < 10; n++) {
15800 for (size_t k = 1; k <= 5; k += 2) {
15801 SpMMMicrokernelTester()
15802 .mr(2)
15803 .nr(1)
15804 .m(2)
15805 .n(n)
15806 .k(k)
15807 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015808 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015809 }
15810 }
15811}
15812
Marat Dukhan355ab432020-04-09 19:01:52 -070015813TEST(F32_SPMM_MINMAX_2X1__SCALAR, m_lt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015814 for (uint32_t m = 1; m < 2; m++) {
15815 for (uint32_t n = 1; n < 10; n += 2) {
15816 for (size_t k = 1; k <= 5; k += 2) {
15817 SpMMMicrokernelTester()
15818 .mr(2)
15819 .nr(1)
15820 .m(m)
15821 .n(n)
15822 .k(k)
15823 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015824 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015825 }
15826 }
15827 }
15828}
15829
Marat Dukhan355ab432020-04-09 19:01:52 -070015830TEST(F32_SPMM_MINMAX_2X1__SCALAR, m_div_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015831 for (uint32_t m = 4; m <= 6; m += 2) {
15832 for (uint32_t n = 1; n < 10; n += 2) {
15833 for (size_t k = 1; k <= 5; k += 2) {
15834 SpMMMicrokernelTester()
15835 .mr(2)
15836 .nr(1)
15837 .m(m)
15838 .n(n)
15839 .k(k)
15840 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015841 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015842 }
15843 }
15844 }
15845}
15846
Marat Dukhan355ab432020-04-09 19:01:52 -070015847TEST(F32_SPMM_MINMAX_2X1__SCALAR, m_gt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015848 for (uint32_t m = 3; m < 4; m++) {
15849 for (uint32_t n = 1; n < 10; n += 2) {
15850 for (size_t k = 1; k <= 5; k += 2) {
15851 SpMMMicrokernelTester()
15852 .mr(2)
15853 .nr(1)
15854 .m(m)
15855 .n(n)
15856 .k(k)
15857 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015858 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015859 }
15860 }
15861 }
15862}
15863
Marat Dukhane8bfcc82020-11-16 12:28:13 -080015864TEST(F32_SPMM_MINMAX_2X1__SCALAR, output_stride) {
15865 for (uint32_t n = 1; n < 10; n += 2) {
15866 for (size_t k = 1; k <= 5; k += 2) {
15867 SpMMMicrokernelTester()
15868 .mr(2)
15869 .nr(1)
15870 .m(4)
15871 .n(n)
15872 .k(k)
15873 .output_stride(7)
15874 .sparsity(0.0f)
15875 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
15876 }
15877 }
15878}
15879
Marat Dukhan355ab432020-04-09 19:01:52 -070015880TEST(F32_SPMM_MINMAX_2X1__SCALAR, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015881 for (uint32_t n = 1; n < 10; n += 2) {
15882 for (size_t k = 1; k <= 5; k += 2) {
15883 SpMMMicrokernelTester()
15884 .mr(2)
15885 .nr(1)
15886 .m(4)
15887 .n(n)
15888 .k(k)
15889 .sparsity(0.0f)
15890 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070015891 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015892 }
15893 }
15894}
15895
Marat Dukhan355ab432020-04-09 19:01:52 -070015896TEST(F32_SPMM_MINMAX_2X1__SCALAR, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015897 for (uint32_t n = 1; n < 10; n += 2) {
15898 for (size_t k = 1; k <= 5; k += 2) {
15899 SpMMMicrokernelTester()
15900 .mr(2)
15901 .nr(1)
15902 .m(4)
15903 .n(n)
15904 .k(k)
15905 .sparsity(0.0f)
15906 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070015907 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015908 }
15909 }
15910}
15911
Marat Dukhan355ab432020-04-09 19:01:52 -070015912TEST(F32_SPMM_MINMAX_2X1__SCALAR, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015913 for (uint32_t n = 1; n < 10; n += 2) {
15914 for (size_t k = 1; k <= 5; k += 2) {
15915 SpMMMicrokernelTester()
15916 .mr(2)
15917 .nr(1)
15918 .m(4)
15919 .n(n)
15920 .k(k)
15921 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015922 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015923 }
15924 }
15925}
15926
Marat Dukhan355ab432020-04-09 19:01:52 -070015927TEST(F32_SPMM_MINMAX_2X1__SCALAR, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015928 for (uint32_t n = 1; n < 10; n += 2) {
15929 for (size_t k = 1; k <= 5; k += 2) {
15930 SpMMMicrokernelTester()
15931 .mr(2)
15932 .nr(1)
15933 .m(4)
15934 .n(n)
15935 .k(k)
15936 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015937 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015938 }
15939 }
15940}
15941
Marat Dukhan355ab432020-04-09 19:01:52 -070015942TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015943 SpMMMicrokernelTester()
15944 .mr(2)
15945 .nr(1)
15946 .m(2)
15947 .n(1)
15948 .k(1)
15949 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015950 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015951}
15952
Marat Dukhan355ab432020-04-09 19:01:52 -070015953TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015954 for (size_t k = 2; k < 10; k++) {
15955 SpMMMicrokernelTester()
15956 .mr(2)
15957 .nr(1)
15958 .m(2)
15959 .n(1)
15960 .k(k)
15961 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015962 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015963 }
15964}
15965
Marat Dukhan355ab432020-04-09 19:01:52 -070015966TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015967 for (uint32_t n = 2; n < 10; n++) {
15968 for (size_t k = 1; k <= 5; k += 2) {
15969 SpMMMicrokernelTester()
15970 .mr(2)
15971 .nr(1)
15972 .m(2)
15973 .n(n)
15974 .k(k)
15975 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015976 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015977 }
15978 }
15979}
15980
Marat Dukhan355ab432020-04-09 19:01:52 -070015981TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, m_lt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015982 for (uint32_t m = 1; m < 2; m++) {
15983 for (uint32_t n = 1; n < 10; n += 2) {
15984 for (size_t k = 1; k <= 5; k += 2) {
15985 SpMMMicrokernelTester()
15986 .mr(2)
15987 .nr(1)
15988 .m(m)
15989 .n(n)
15990 .k(k)
15991 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070015992 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070015993 }
15994 }
15995 }
15996}
15997
Marat Dukhan355ab432020-04-09 19:01:52 -070015998TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, m_div_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070015999 for (uint32_t m = 4; m <= 6; m += 2) {
16000 for (uint32_t n = 1; n < 10; n += 2) {
16001 for (size_t k = 1; k <= 5; k += 2) {
16002 SpMMMicrokernelTester()
16003 .mr(2)
16004 .nr(1)
16005 .m(m)
16006 .n(n)
16007 .k(k)
16008 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016009 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016010 }
16011 }
16012 }
16013}
16014
Marat Dukhan355ab432020-04-09 19:01:52 -070016015TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, m_gt_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016016 for (uint32_t m = 3; m < 4; m++) {
16017 for (uint32_t n = 1; n < 10; n += 2) {
16018 for (size_t k = 1; k <= 5; k += 2) {
16019 SpMMMicrokernelTester()
16020 .mr(2)
16021 .nr(1)
16022 .m(m)
16023 .n(n)
16024 .k(k)
16025 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016026 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016027 }
16028 }
16029 }
16030}
16031
Marat Dukhane8bfcc82020-11-16 12:28:13 -080016032TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, output_stride) {
16033 for (uint32_t n = 1; n < 10; n += 2) {
16034 for (size_t k = 1; k <= 5; k += 2) {
16035 SpMMMicrokernelTester()
16036 .mr(2)
16037 .nr(1)
16038 .m(4)
16039 .n(n)
16040 .k(k)
16041 .output_stride(7)
16042 .sparsity(0.0f)
16043 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
16044 }
16045 }
16046}
16047
Marat Dukhan355ab432020-04-09 19:01:52 -070016048TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016049 for (uint32_t n = 1; n < 10; n += 2) {
16050 for (size_t k = 1; k <= 5; k += 2) {
16051 SpMMMicrokernelTester()
16052 .mr(2)
16053 .nr(1)
16054 .m(4)
16055 .n(n)
16056 .k(k)
16057 .sparsity(0.0f)
16058 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016059 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016060 }
16061 }
16062}
16063
Marat Dukhan355ab432020-04-09 19:01:52 -070016064TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016065 for (uint32_t n = 1; n < 10; n += 2) {
16066 for (size_t k = 1; k <= 5; k += 2) {
16067 SpMMMicrokernelTester()
16068 .mr(2)
16069 .nr(1)
16070 .m(4)
16071 .n(n)
16072 .k(k)
16073 .sparsity(0.0f)
16074 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016075 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016076 }
16077 }
16078}
16079
Marat Dukhan355ab432020-04-09 19:01:52 -070016080TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016081 for (uint32_t n = 1; n < 10; n += 2) {
16082 for (size_t k = 1; k <= 5; k += 2) {
16083 SpMMMicrokernelTester()
16084 .mr(2)
16085 .nr(1)
16086 .m(4)
16087 .n(n)
16088 .k(k)
16089 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016090 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016091 }
16092 }
16093}
16094
Marat Dukhan355ab432020-04-09 19:01:52 -070016095TEST(F32_SPMM_MINMAX_2X1__SCALAR_PIPELINED, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016096 for (uint32_t n = 1; n < 10; n += 2) {
16097 for (size_t k = 1; k <= 5; k += 2) {
16098 SpMMMicrokernelTester()
16099 .mr(2)
16100 .nr(1)
16101 .m(4)
16102 .n(n)
16103 .k(k)
16104 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016105 .Test(xnn_f32_spmm_minmax_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016106 }
16107 }
16108}
16109
Marat Dukhan355ab432020-04-09 19:01:52 -070016110TEST(F32_SPMM_MINMAX_4X1__SCALAR, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016111 SpMMMicrokernelTester()
16112 .mr(4)
16113 .nr(1)
16114 .m(4)
16115 .n(1)
16116 .k(1)
16117 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016118 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016119}
16120
Marat Dukhan355ab432020-04-09 19:01:52 -070016121TEST(F32_SPMM_MINMAX_4X1__SCALAR, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016122 for (size_t k = 2; k < 10; k++) {
16123 SpMMMicrokernelTester()
16124 .mr(4)
16125 .nr(1)
16126 .m(4)
16127 .n(1)
16128 .k(k)
16129 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016130 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016131 }
16132}
16133
Marat Dukhan355ab432020-04-09 19:01:52 -070016134TEST(F32_SPMM_MINMAX_4X1__SCALAR, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016135 for (uint32_t n = 2; n < 10; n++) {
16136 for (size_t k = 1; k <= 5; k += 2) {
16137 SpMMMicrokernelTester()
16138 .mr(4)
16139 .nr(1)
16140 .m(4)
16141 .n(n)
16142 .k(k)
16143 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016144 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016145 }
16146 }
16147}
16148
Marat Dukhan355ab432020-04-09 19:01:52 -070016149TEST(F32_SPMM_MINMAX_4X1__SCALAR, m_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016150 for (uint32_t m = 1; m < 4; m++) {
16151 for (uint32_t n = 1; n < 10; n += 2) {
16152 for (size_t k = 1; k <= 5; k += 2) {
16153 SpMMMicrokernelTester()
16154 .mr(4)
16155 .nr(1)
16156 .m(m)
16157 .n(n)
16158 .k(k)
16159 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016160 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016161 }
16162 }
16163 }
16164}
16165
Marat Dukhan355ab432020-04-09 19:01:52 -070016166TEST(F32_SPMM_MINMAX_4X1__SCALAR, m_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016167 for (uint32_t m = 8; m <= 12; m += 4) {
16168 for (uint32_t n = 1; n < 10; n += 2) {
16169 for (size_t k = 1; k <= 5; k += 2) {
16170 SpMMMicrokernelTester()
16171 .mr(4)
16172 .nr(1)
16173 .m(m)
16174 .n(n)
16175 .k(k)
16176 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016177 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016178 }
16179 }
16180 }
16181}
16182
Marat Dukhan355ab432020-04-09 19:01:52 -070016183TEST(F32_SPMM_MINMAX_4X1__SCALAR, m_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016184 for (uint32_t m = 5; m < 8; m++) {
16185 for (uint32_t n = 1; n < 10; n += 2) {
16186 for (size_t k = 1; k <= 5; k += 2) {
16187 SpMMMicrokernelTester()
16188 .mr(4)
16189 .nr(1)
16190 .m(m)
16191 .n(n)
16192 .k(k)
16193 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016194 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016195 }
16196 }
16197 }
16198}
16199
Marat Dukhane8bfcc82020-11-16 12:28:13 -080016200TEST(F32_SPMM_MINMAX_4X1__SCALAR, output_stride) {
16201 for (uint32_t n = 1; n < 10; n += 2) {
16202 for (size_t k = 1; k <= 5; k += 2) {
16203 SpMMMicrokernelTester()
16204 .mr(4)
16205 .nr(1)
16206 .m(8)
16207 .n(n)
16208 .k(k)
16209 .output_stride(11)
16210 .sparsity(0.0f)
16211 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
16212 }
16213 }
16214}
16215
Marat Dukhan355ab432020-04-09 19:01:52 -070016216TEST(F32_SPMM_MINMAX_4X1__SCALAR, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016217 for (uint32_t n = 1; n < 10; n += 2) {
16218 for (size_t k = 1; k <= 5; k += 2) {
16219 SpMMMicrokernelTester()
16220 .mr(4)
16221 .nr(1)
16222 .m(8)
16223 .n(n)
16224 .k(k)
16225 .sparsity(0.0f)
16226 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016227 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016228 }
16229 }
16230}
16231
Marat Dukhan355ab432020-04-09 19:01:52 -070016232TEST(F32_SPMM_MINMAX_4X1__SCALAR, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016233 for (uint32_t n = 1; n < 10; n += 2) {
16234 for (size_t k = 1; k <= 5; k += 2) {
16235 SpMMMicrokernelTester()
16236 .mr(4)
16237 .nr(1)
16238 .m(8)
16239 .n(n)
16240 .k(k)
16241 .sparsity(0.0f)
16242 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016243 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016244 }
16245 }
16246}
16247
Marat Dukhan355ab432020-04-09 19:01:52 -070016248TEST(F32_SPMM_MINMAX_4X1__SCALAR, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016249 for (uint32_t n = 1; n < 10; n += 2) {
16250 for (size_t k = 1; k <= 5; k += 2) {
16251 SpMMMicrokernelTester()
16252 .mr(4)
16253 .nr(1)
16254 .m(8)
16255 .n(n)
16256 .k(k)
16257 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016258 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016259 }
16260 }
16261}
16262
Marat Dukhan355ab432020-04-09 19:01:52 -070016263TEST(F32_SPMM_MINMAX_4X1__SCALAR, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016264 for (uint32_t n = 1; n < 10; n += 2) {
16265 for (size_t k = 1; k <= 5; k += 2) {
16266 SpMMMicrokernelTester()
16267 .mr(4)
16268 .nr(1)
16269 .m(8)
16270 .n(n)
16271 .k(k)
16272 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016273 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016274 }
16275 }
16276}
16277
Marat Dukhan355ab432020-04-09 19:01:52 -070016278TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016279 SpMMMicrokernelTester()
16280 .mr(4)
16281 .nr(1)
16282 .m(4)
16283 .n(1)
16284 .k(1)
16285 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016286 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016287}
16288
Marat Dukhan355ab432020-04-09 19:01:52 -070016289TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016290 for (size_t k = 2; k < 10; k++) {
16291 SpMMMicrokernelTester()
16292 .mr(4)
16293 .nr(1)
16294 .m(4)
16295 .n(1)
16296 .k(k)
16297 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016298 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016299 }
16300}
16301
Marat Dukhan355ab432020-04-09 19:01:52 -070016302TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016303 for (uint32_t n = 2; n < 10; n++) {
16304 for (size_t k = 1; k <= 5; k += 2) {
16305 SpMMMicrokernelTester()
16306 .mr(4)
16307 .nr(1)
16308 .m(4)
16309 .n(n)
16310 .k(k)
16311 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016312 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016313 }
16314 }
16315}
16316
Marat Dukhan355ab432020-04-09 19:01:52 -070016317TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, m_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016318 for (uint32_t m = 1; m < 4; m++) {
16319 for (uint32_t n = 1; n < 10; n += 2) {
16320 for (size_t k = 1; k <= 5; k += 2) {
16321 SpMMMicrokernelTester()
16322 .mr(4)
16323 .nr(1)
16324 .m(m)
16325 .n(n)
16326 .k(k)
16327 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016328 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016329 }
16330 }
16331 }
16332}
16333
Marat Dukhan355ab432020-04-09 19:01:52 -070016334TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, m_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016335 for (uint32_t m = 8; m <= 12; m += 4) {
16336 for (uint32_t n = 1; n < 10; n += 2) {
16337 for (size_t k = 1; k <= 5; k += 2) {
16338 SpMMMicrokernelTester()
16339 .mr(4)
16340 .nr(1)
16341 .m(m)
16342 .n(n)
16343 .k(k)
16344 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016345 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016346 }
16347 }
16348 }
16349}
16350
Marat Dukhan355ab432020-04-09 19:01:52 -070016351TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, m_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016352 for (uint32_t m = 5; m < 8; m++) {
16353 for (uint32_t n = 1; n < 10; n += 2) {
16354 for (size_t k = 1; k <= 5; k += 2) {
16355 SpMMMicrokernelTester()
16356 .mr(4)
16357 .nr(1)
16358 .m(m)
16359 .n(n)
16360 .k(k)
16361 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016362 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016363 }
16364 }
16365 }
16366}
16367
Marat Dukhane8bfcc82020-11-16 12:28:13 -080016368TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, output_stride) {
16369 for (uint32_t n = 1; n < 10; n += 2) {
16370 for (size_t k = 1; k <= 5; k += 2) {
16371 SpMMMicrokernelTester()
16372 .mr(4)
16373 .nr(1)
16374 .m(8)
16375 .n(n)
16376 .k(k)
16377 .output_stride(11)
16378 .sparsity(0.0f)
16379 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
16380 }
16381 }
16382}
16383
Marat Dukhan355ab432020-04-09 19:01:52 -070016384TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016385 for (uint32_t n = 1; n < 10; n += 2) {
16386 for (size_t k = 1; k <= 5; k += 2) {
16387 SpMMMicrokernelTester()
16388 .mr(4)
16389 .nr(1)
16390 .m(8)
16391 .n(n)
16392 .k(k)
16393 .sparsity(0.0f)
16394 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016395 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016396 }
16397 }
16398}
16399
Marat Dukhan355ab432020-04-09 19:01:52 -070016400TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016401 for (uint32_t n = 1; n < 10; n += 2) {
16402 for (size_t k = 1; k <= 5; k += 2) {
16403 SpMMMicrokernelTester()
16404 .mr(4)
16405 .nr(1)
16406 .m(8)
16407 .n(n)
16408 .k(k)
16409 .sparsity(0.0f)
16410 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016411 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016412 }
16413 }
16414}
16415
Marat Dukhan355ab432020-04-09 19:01:52 -070016416TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016417 for (uint32_t n = 1; n < 10; n += 2) {
16418 for (size_t k = 1; k <= 5; k += 2) {
16419 SpMMMicrokernelTester()
16420 .mr(4)
16421 .nr(1)
16422 .m(8)
16423 .n(n)
16424 .k(k)
16425 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016426 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016427 }
16428 }
16429}
16430
Marat Dukhan355ab432020-04-09 19:01:52 -070016431TEST(F32_SPMM_MINMAX_4X1__SCALAR_PIPELINED, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016432 for (uint32_t n = 1; n < 10; n += 2) {
16433 for (size_t k = 1; k <= 5; k += 2) {
16434 SpMMMicrokernelTester()
16435 .mr(4)
16436 .nr(1)
16437 .m(8)
16438 .n(n)
16439 .k(k)
16440 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016441 .Test(xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016442 }
16443 }
16444}
16445
Marat Dukhan355ab432020-04-09 19:01:52 -070016446TEST(F32_SPMM_MINMAX_8X1__SCALAR, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016447 SpMMMicrokernelTester()
16448 .mr(8)
16449 .nr(1)
16450 .m(8)
16451 .n(1)
16452 .k(1)
16453 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016454 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016455}
16456
Marat Dukhan355ab432020-04-09 19:01:52 -070016457TEST(F32_SPMM_MINMAX_8X1__SCALAR, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016458 for (size_t k = 2; k < 10; k++) {
16459 SpMMMicrokernelTester()
16460 .mr(8)
16461 .nr(1)
16462 .m(8)
16463 .n(1)
16464 .k(k)
16465 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016466 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016467 }
16468}
16469
Marat Dukhan355ab432020-04-09 19:01:52 -070016470TEST(F32_SPMM_MINMAX_8X1__SCALAR, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016471 for (uint32_t n = 2; n < 10; n++) {
16472 for (size_t k = 1; k <= 5; k += 2) {
16473 SpMMMicrokernelTester()
16474 .mr(8)
16475 .nr(1)
16476 .m(8)
16477 .n(n)
16478 .k(k)
16479 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016480 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016481 }
16482 }
16483}
16484
Marat Dukhan355ab432020-04-09 19:01:52 -070016485TEST(F32_SPMM_MINMAX_8X1__SCALAR, m_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016486 for (uint32_t m = 1; m < 8; m++) {
16487 for (uint32_t n = 1; n < 10; n += 2) {
16488 for (size_t k = 1; k <= 5; k += 2) {
16489 SpMMMicrokernelTester()
16490 .mr(8)
16491 .nr(1)
16492 .m(m)
16493 .n(n)
16494 .k(k)
16495 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016496 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016497 }
16498 }
16499 }
16500}
16501
Marat Dukhan355ab432020-04-09 19:01:52 -070016502TEST(F32_SPMM_MINMAX_8X1__SCALAR, m_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016503 for (uint32_t m = 16; m <= 24; m += 8) {
16504 for (uint32_t n = 1; n < 10; n += 2) {
16505 for (size_t k = 1; k <= 5; k += 2) {
16506 SpMMMicrokernelTester()
16507 .mr(8)
16508 .nr(1)
16509 .m(m)
16510 .n(n)
16511 .k(k)
16512 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016513 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016514 }
16515 }
16516 }
16517}
16518
Marat Dukhan355ab432020-04-09 19:01:52 -070016519TEST(F32_SPMM_MINMAX_8X1__SCALAR, m_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016520 for (uint32_t m = 9; m < 16; m++) {
16521 for (uint32_t n = 1; n < 10; n += 2) {
16522 for (size_t k = 1; k <= 5; k += 2) {
16523 SpMMMicrokernelTester()
16524 .mr(8)
16525 .nr(1)
16526 .m(m)
16527 .n(n)
16528 .k(k)
16529 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016530 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016531 }
16532 }
16533 }
16534}
16535
Marat Dukhane8bfcc82020-11-16 12:28:13 -080016536TEST(F32_SPMM_MINMAX_8X1__SCALAR, output_stride) {
16537 for (uint32_t n = 1; n < 10; n += 2) {
16538 for (size_t k = 1; k <= 5; k += 2) {
16539 SpMMMicrokernelTester()
16540 .mr(8)
16541 .nr(1)
16542 .m(16)
16543 .n(n)
16544 .k(k)
16545 .output_stride(19)
16546 .sparsity(0.0f)
16547 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
16548 }
16549 }
16550}
16551
Marat Dukhan355ab432020-04-09 19:01:52 -070016552TEST(F32_SPMM_MINMAX_8X1__SCALAR, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016553 for (uint32_t n = 1; n < 10; n += 2) {
16554 for (size_t k = 1; k <= 5; k += 2) {
16555 SpMMMicrokernelTester()
16556 .mr(8)
16557 .nr(1)
16558 .m(16)
16559 .n(n)
16560 .k(k)
16561 .sparsity(0.0f)
16562 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016563 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016564 }
16565 }
16566}
16567
Marat Dukhan355ab432020-04-09 19:01:52 -070016568TEST(F32_SPMM_MINMAX_8X1__SCALAR, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016569 for (uint32_t n = 1; n < 10; n += 2) {
16570 for (size_t k = 1; k <= 5; k += 2) {
16571 SpMMMicrokernelTester()
16572 .mr(8)
16573 .nr(1)
16574 .m(16)
16575 .n(n)
16576 .k(k)
16577 .sparsity(0.0f)
16578 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016579 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016580 }
16581 }
16582}
16583
Marat Dukhan355ab432020-04-09 19:01:52 -070016584TEST(F32_SPMM_MINMAX_8X1__SCALAR, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016585 for (uint32_t n = 1; n < 10; n += 2) {
16586 for (size_t k = 1; k <= 5; k += 2) {
16587 SpMMMicrokernelTester()
16588 .mr(8)
16589 .nr(1)
16590 .m(16)
16591 .n(n)
16592 .k(k)
16593 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016594 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016595 }
16596 }
16597}
16598
Marat Dukhan355ab432020-04-09 19:01:52 -070016599TEST(F32_SPMM_MINMAX_8X1__SCALAR, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016600 for (uint32_t n = 1; n < 10; n += 2) {
16601 for (size_t k = 1; k <= 5; k += 2) {
16602 SpMMMicrokernelTester()
16603 .mr(8)
16604 .nr(1)
16605 .m(16)
16606 .n(n)
16607 .k(k)
16608 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016609 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016610 }
16611 }
16612}
16613
Marat Dukhan355ab432020-04-09 19:01:52 -070016614TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016615 SpMMMicrokernelTester()
16616 .mr(8)
16617 .nr(1)
16618 .m(8)
16619 .n(1)
16620 .k(1)
16621 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016622 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016623}
16624
Marat Dukhan355ab432020-04-09 19:01:52 -070016625TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, k_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016626 for (size_t k = 2; k < 10; k++) {
16627 SpMMMicrokernelTester()
16628 .mr(8)
16629 .nr(1)
16630 .m(8)
16631 .n(1)
16632 .k(k)
16633 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016634 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016635 }
16636}
16637
Marat Dukhan355ab432020-04-09 19:01:52 -070016638TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, n_gt_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016639 for (uint32_t n = 2; n < 10; n++) {
16640 for (size_t k = 1; k <= 5; k += 2) {
16641 SpMMMicrokernelTester()
16642 .mr(8)
16643 .nr(1)
16644 .m(8)
16645 .n(n)
16646 .k(k)
16647 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016648 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016649 }
16650 }
16651}
16652
Marat Dukhan355ab432020-04-09 19:01:52 -070016653TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, m_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016654 for (uint32_t m = 1; m < 8; m++) {
16655 for (uint32_t n = 1; n < 10; n += 2) {
16656 for (size_t k = 1; k <= 5; k += 2) {
16657 SpMMMicrokernelTester()
16658 .mr(8)
16659 .nr(1)
16660 .m(m)
16661 .n(n)
16662 .k(k)
16663 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016664 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016665 }
16666 }
16667 }
16668}
16669
Marat Dukhan355ab432020-04-09 19:01:52 -070016670TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, m_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016671 for (uint32_t m = 16; m <= 24; m += 8) {
16672 for (uint32_t n = 1; n < 10; n += 2) {
16673 for (size_t k = 1; k <= 5; k += 2) {
16674 SpMMMicrokernelTester()
16675 .mr(8)
16676 .nr(1)
16677 .m(m)
16678 .n(n)
16679 .k(k)
16680 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016681 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016682 }
16683 }
16684 }
16685}
16686
Marat Dukhan355ab432020-04-09 19:01:52 -070016687TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, m_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016688 for (uint32_t m = 9; m < 16; m++) {
16689 for (uint32_t n = 1; n < 10; n += 2) {
16690 for (size_t k = 1; k <= 5; k += 2) {
16691 SpMMMicrokernelTester()
16692 .mr(8)
16693 .nr(1)
16694 .m(m)
16695 .n(n)
16696 .k(k)
16697 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016698 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016699 }
16700 }
16701 }
16702}
16703
Marat Dukhane8bfcc82020-11-16 12:28:13 -080016704TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, output_stride) {
16705 for (uint32_t n = 1; n < 10; n += 2) {
16706 for (size_t k = 1; k <= 5; k += 2) {
16707 SpMMMicrokernelTester()
16708 .mr(8)
16709 .nr(1)
16710 .m(16)
16711 .n(n)
16712 .k(k)
16713 .output_stride(19)
16714 .sparsity(0.0f)
16715 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
16716 }
16717 }
16718}
16719
Marat Dukhan355ab432020-04-09 19:01:52 -070016720TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016721 for (uint32_t n = 1; n < 10; n += 2) {
16722 for (size_t k = 1; k <= 5; k += 2) {
16723 SpMMMicrokernelTester()
16724 .mr(8)
16725 .nr(1)
16726 .m(16)
16727 .n(n)
16728 .k(k)
16729 .sparsity(0.0f)
16730 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016731 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016732 }
16733 }
16734}
16735
Marat Dukhan355ab432020-04-09 19:01:52 -070016736TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016737 for (uint32_t n = 1; n < 10; n += 2) {
16738 for (size_t k = 1; k <= 5; k += 2) {
16739 SpMMMicrokernelTester()
16740 .mr(8)
16741 .nr(1)
16742 .m(16)
16743 .n(n)
16744 .k(k)
16745 .sparsity(0.0f)
16746 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016747 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016748 }
16749 }
16750}
16751
Marat Dukhan355ab432020-04-09 19:01:52 -070016752TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, half_sparse) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016753 for (uint32_t n = 1; n < 10; n += 2) {
16754 for (size_t k = 1; k <= 5; k += 2) {
16755 SpMMMicrokernelTester()
16756 .mr(8)
16757 .nr(1)
16758 .m(16)
16759 .n(n)
16760 .k(k)
16761 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016762 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016763 }
16764 }
16765}
16766
Marat Dukhan355ab432020-04-09 19:01:52 -070016767TEST(F32_SPMM_MINMAX_8X1__SCALAR_PIPELINED, zero_weights) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016768 for (uint32_t n = 1; n < 10; n += 2) {
16769 for (size_t k = 1; k <= 5; k += 2) {
16770 SpMMMicrokernelTester()
16771 .mr(8)
16772 .nr(1)
16773 .m(16)
16774 .n(n)
16775 .k(k)
16776 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016777 .Test(xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016778 }
16779 }
16780}
16781
Marat Dukhan355ab432020-04-09 19:01:52 -070016782TEST(F32_SPMM_MINMAX_8X2__SCALAR, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016783 SpMMMicrokernelTester()
16784 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016785 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016786 .m(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016787 .n(2)
16788 .k(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016789 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016790 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016791}
16792
Marat Dukhan355ab432020-04-09 19:01:52 -070016793TEST(F32_SPMM_MINMAX_8X2__SCALAR, k_eq_1_subtile) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016794 for (uint32_t n = 1; n <= 2; n++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016795 SpMMMicrokernelTester()
16796 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016797 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016798 .m(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016799 .n(n)
16800 .k(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016801 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016802 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016803 }
16804}
16805
Marat Dukhan355ab432020-04-09 19:01:52 -070016806TEST(F32_SPMM_MINMAX_8X2__SCALAR, k_gt_1) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016807 for (size_t k = 2; k < 10; k++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016808 SpMMMicrokernelTester()
16809 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016810 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016811 .m(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016812 .n(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016813 .k(k)
16814 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016815 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016816 }
16817}
16818
Marat Dukhan355ab432020-04-09 19:01:52 -070016819TEST(F32_SPMM_MINMAX_8X2__SCALAR, k_gt_1_subtile) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016820 for (size_t k = 2; k < 10; k++) {
16821 for (uint32_t n = 1; n <= 2; n++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016822 SpMMMicrokernelTester()
16823 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016824 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016825 .m(8)
16826 .n(n)
16827 .k(k)
16828 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016829 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016830 }
16831 }
16832}
16833
Marat Dukhan355ab432020-04-09 19:01:52 -070016834TEST(F32_SPMM_MINMAX_8X2__SCALAR, n_gt_2) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070016835 for (uint32_t n = 3; n < 10; n++) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016836 for (size_t k = 1; k <= 5; k += 2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016837 SpMMMicrokernelTester()
16838 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016839 .nr(2)
16840 .m(8)
16841 .n(n)
16842 .k(k)
16843 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016844 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016845 }
16846 }
16847}
16848
Marat Dukhan355ab432020-04-09 19:01:52 -070016849TEST(F32_SPMM_MINMAX_8X2__SCALAR, n_div_2) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070016850 for (uint32_t n = 4; n <= 6; n += 2) {
16851 for (size_t k = 1; k <= 5; k += 2) {
16852 SpMMMicrokernelTester()
16853 .mr(8)
16854 .nr(2)
16855 .m(8)
16856 .n(n)
16857 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -070016858 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
Marat Dukhanc452eb12019-10-25 19:19:34 -070016859 }
16860 }
16861}
16862
Marat Dukhan355ab432020-04-09 19:01:52 -070016863TEST(F32_SPMM_MINMAX_8X2__SCALAR, m_lt_8) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016864 for (uint32_t m = 1; m < 8; m++) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070016865 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016866 for (size_t k = 1; k <= 5; k += 2) {
16867 SpMMMicrokernelTester()
16868 .mr(8)
16869 .nr(2)
16870 .m(m)
16871 .n(n)
16872 .k(k)
16873 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016874 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016875 }
16876 }
16877 }
16878}
16879
Marat Dukhan355ab432020-04-09 19:01:52 -070016880TEST(F32_SPMM_MINMAX_8X2__SCALAR, m_div_8) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016881 for (uint32_t m = 16; m <= 24; m += 8) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070016882 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016883 for (size_t k = 1; k <= 5; k += 2) {
16884 SpMMMicrokernelTester()
16885 .mr(8)
16886 .nr(2)
16887 .m(m)
16888 .n(n)
16889 .k(k)
16890 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016891 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016892 }
16893 }
16894 }
16895}
16896
Marat Dukhan355ab432020-04-09 19:01:52 -070016897TEST(F32_SPMM_MINMAX_8X2__SCALAR, m_gt_8) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016898 for (uint32_t m = 9; m < 16; m++) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070016899 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016900 for (size_t k = 1; k <= 5; k += 2) {
16901 SpMMMicrokernelTester()
16902 .mr(8)
16903 .nr(2)
16904 .m(m)
16905 .n(n)
16906 .k(k)
16907 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016908 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016909 }
16910 }
16911 }
16912}
16913
Marat Dukhane8bfcc82020-11-16 12:28:13 -080016914TEST(F32_SPMM_MINMAX_8X2__SCALAR, output_stride) {
16915 for (uint32_t n = 1; n < 10; n += 3) {
16916 for (size_t k = 1; k <= 5; k += 2) {
16917 SpMMMicrokernelTester()
16918 .mr(8)
16919 .nr(2)
16920 .m(16)
16921 .n(n)
16922 .k(k)
16923 .output_stride(19)
16924 .sparsity(0.0f)
16925 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
16926 }
16927 }
16928}
16929
Marat Dukhan355ab432020-04-09 19:01:52 -070016930TEST(F32_SPMM_MINMAX_8X2__SCALAR, qmin) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070016931 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016932 for (size_t k = 1; k <= 5; k += 2) {
16933 SpMMMicrokernelTester()
16934 .mr(8)
16935 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016936 .m(16)
16937 .n(n)
16938 .k(k)
16939 .sparsity(0.0f)
16940 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016941 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016942 }
16943 }
16944}
16945
Marat Dukhan355ab432020-04-09 19:01:52 -070016946TEST(F32_SPMM_MINMAX_8X2__SCALAR, qmax) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070016947 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016948 for (size_t k = 1; k <= 5; k += 2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016949 SpMMMicrokernelTester()
16950 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016951 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016952 .m(16)
16953 .n(n)
16954 .k(k)
16955 .sparsity(0.0f)
16956 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070016957 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016958 }
16959 }
16960}
16961
Marat Dukhan355ab432020-04-09 19:01:52 -070016962TEST(F32_SPMM_MINMAX_8X2__SCALAR, half_sparse) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070016963 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016964 for (size_t k = 1; k <= 5; k += 2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016965 SpMMMicrokernelTester()
16966 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016967 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016968 .m(16)
16969 .n(n)
16970 .k(k)
16971 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016972 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016973 }
16974 }
16975}
16976
Marat Dukhan355ab432020-04-09 19:01:52 -070016977TEST(F32_SPMM_MINMAX_8X2__SCALAR, zero_weights) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070016978 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016979 for (size_t k = 1; k <= 5; k += 2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070016980 SpMMMicrokernelTester()
16981 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016982 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -070016983 .m(16)
16984 .n(n)
16985 .k(k)
16986 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070016987 .Test(xnn_f32_spmm_minmax_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -070016988 }
16989 }
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016990}
16991
Marat Dukhan355ab432020-04-09 19:01:52 -070016992TEST(F32_SPMM_MINMAX_8X4__SCALAR, k_eq_1) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016993 SpMMMicrokernelTester()
16994 .mr(8)
16995 .nr(4)
16996 .m(8)
Marat Dukhanc452eb12019-10-25 19:19:34 -070016997 .n(4)
Erich Elsenc6afd9b2019-10-24 16:10:53 -070016998 .k(1)
16999 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017000 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017001}
17002
Marat Dukhan355ab432020-04-09 19:01:52 -070017003TEST(F32_SPMM_MINMAX_8X4__SCALAR, k_eq_1_subtile) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017004 for (uint32_t n = 1; n <= 4; n++) {
17005 SpMMMicrokernelTester()
17006 .mr(8)
17007 .nr(4)
17008 .m(8)
17009 .n(n)
17010 .k(1)
17011 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017012 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017013 }
17014}
17015
Marat Dukhan355ab432020-04-09 19:01:52 -070017016TEST(F32_SPMM_MINMAX_8X4__SCALAR, k_gt_1) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017017 for (size_t k = 2; k < 10; k++) {
17018 SpMMMicrokernelTester()
17019 .mr(8)
17020 .nr(4)
17021 .m(8)
17022 .n(4)
17023 .k(k)
17024 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017025 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017026 }
17027}
17028
Marat Dukhan355ab432020-04-09 19:01:52 -070017029TEST(F32_SPMM_MINMAX_8X4__SCALAR, k_gt_1_subtile) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017030 for (size_t k = 2; k < 10; k++) {
17031 for (uint32_t n = 1; n <= 4; n++) {
17032 SpMMMicrokernelTester()
17033 .mr(8)
17034 .nr(4)
17035 .m(8)
17036 .n(n)
17037 .k(k)
17038 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017039 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017040 }
17041 }
17042}
17043
Marat Dukhan355ab432020-04-09 19:01:52 -070017044TEST(F32_SPMM_MINMAX_8X4__SCALAR, n_gt_4) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070017045 for (uint32_t n = 5; n < 10; n++) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017046 for (size_t k = 1; k <= 5; k += 2) {
17047 SpMMMicrokernelTester()
17048 .mr(8)
17049 .nr(4)
17050 .m(8)
17051 .n(n)
17052 .k(k)
17053 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017054 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017055 }
17056 }
17057}
17058
Marat Dukhan355ab432020-04-09 19:01:52 -070017059TEST(F32_SPMM_MINMAX_8X4__SCALAR, n_div_4) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070017060 for (uint32_t n = 8; n <= 12; n += 4) {
17061 for (size_t k = 1; k <= 5; k += 2) {
17062 SpMMMicrokernelTester()
17063 .mr(8)
17064 .nr(4)
17065 .m(8)
17066 .n(n)
17067 .k(k)
Marat Dukhan355ab432020-04-09 19:01:52 -070017068 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Marat Dukhanc452eb12019-10-25 19:19:34 -070017069 }
17070 }
17071}
17072
Marat Dukhan355ab432020-04-09 19:01:52 -070017073TEST(F32_SPMM_MINMAX_8X4__SCALAR, m_lt_8) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017074 for (uint32_t m = 1; m < 8; m++) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070017075 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017076 for (size_t k = 1; k <= 5; k += 2) {
17077 SpMMMicrokernelTester()
17078 .mr(8)
17079 .nr(4)
17080 .m(m)
17081 .n(n)
17082 .k(k)
17083 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017084 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017085 }
17086 }
17087 }
17088}
17089
Marat Dukhan355ab432020-04-09 19:01:52 -070017090TEST(F32_SPMM_MINMAX_8X4__SCALAR, m_div_8) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017091 for (uint32_t m = 16; m <= 24; m += 8) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070017092 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017093 for (size_t k = 1; k <= 5; k += 2) {
17094 SpMMMicrokernelTester()
17095 .mr(8)
17096 .nr(4)
17097 .m(m)
17098 .n(n)
17099 .k(k)
17100 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017101 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017102 }
17103 }
17104 }
17105}
17106
Marat Dukhan355ab432020-04-09 19:01:52 -070017107TEST(F32_SPMM_MINMAX_8X4__SCALAR, m_gt_8) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017108 for (uint32_t m = 9; m < 16; m++) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070017109 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017110 for (size_t k = 1; k <= 5; k += 2) {
17111 SpMMMicrokernelTester()
17112 .mr(8)
17113 .nr(4)
17114 .m(m)
17115 .n(n)
17116 .k(k)
17117 .sparsity(0.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017118 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017119 }
17120 }
17121 }
17122}
17123
Marat Dukhane8bfcc82020-11-16 12:28:13 -080017124TEST(F32_SPMM_MINMAX_8X4__SCALAR, output_stride) {
17125 for (uint32_t n = 1; n < 20; n += 5) {
17126 for (size_t k = 1; k <= 5; k += 2) {
17127 SpMMMicrokernelTester()
17128 .mr(8)
17129 .nr(4)
17130 .m(16)
17131 .n(n)
17132 .k(k)
17133 .output_stride(19)
17134 .sparsity(0.0f)
17135 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
17136 }
17137 }
17138}
17139
Marat Dukhan355ab432020-04-09 19:01:52 -070017140TEST(F32_SPMM_MINMAX_8X4__SCALAR, qmin) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070017141 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017142 for (size_t k = 1; k <= 5; k += 2) {
17143 SpMMMicrokernelTester()
17144 .mr(8)
17145 .nr(4)
17146 .m(16)
17147 .n(n)
17148 .k(k)
17149 .sparsity(0.0f)
17150 .qmin(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070017151 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017152 }
17153 }
17154}
17155
Marat Dukhan355ab432020-04-09 19:01:52 -070017156TEST(F32_SPMM_MINMAX_8X4__SCALAR, qmax) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070017157 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017158 for (size_t k = 1; k <= 5; k += 2) {
17159 SpMMMicrokernelTester()
17160 .mr(8)
17161 .nr(4)
17162 .m(16)
17163 .n(n)
17164 .k(k)
17165 .sparsity(0.0f)
17166 .qmax(128)
Marat Dukhan355ab432020-04-09 19:01:52 -070017167 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017168 }
17169 }
17170}
17171
Marat Dukhan355ab432020-04-09 19:01:52 -070017172TEST(F32_SPMM_MINMAX_8X4__SCALAR, half_sparse) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070017173 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017174 for (size_t k = 1; k <= 5; k += 2) {
17175 SpMMMicrokernelTester()
17176 .mr(8)
17177 .nr(4)
17178 .m(16)
17179 .n(n)
17180 .k(k)
17181 .sparsity(0.5f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017182 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017183 }
17184 }
17185}
17186
Marat Dukhan355ab432020-04-09 19:01:52 -070017187TEST(F32_SPMM_MINMAX_8X4__SCALAR, zero_weights) {
Marat Dukhanc452eb12019-10-25 19:19:34 -070017188 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017189 for (size_t k = 1; k <= 5; k += 2) {
17190 SpMMMicrokernelTester()
17191 .mr(8)
17192 .nr(4)
17193 .m(16)
17194 .n(n)
17195 .k(k)
17196 .sparsity(1.0f)
Marat Dukhan355ab432020-04-09 19:01:52 -070017197 .Test(xnn_f32_spmm_minmax_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
Erich Elsenc6afd9b2019-10-24 16:10:53 -070017198 }
17199 }
Marat Dukhanc452eb12019-10-25 19:19:34 -070017200}