blob: 49d397f126125e16eab2c9caff1dda0cdaab1f4e [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/f32-spmm.yaml
8// Generator: tools/generate-spmm-test.py
9
10
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <gtest/gtest.h>
12
Marat Dukhan1dadbf72019-10-01 10:46:20 -070013#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/isa-checks.h>
15
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/spmm.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017#include "spmm-microkernel-tester.h"
18
19
Marat Dukhan1dadbf72019-10-01 10:46:20 -070020#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070021 TEST(F32_SPMM_4X1__NEONFMA, k_eq_1) {
22 TEST_REQUIRES_ARM_NEON_FMA;
23 SpMMMicrokernelTester()
24 .mr(4)
25 .nr(1)
26 .m(4)
27 .n(1)
28 .k(1)
29 .sparsity(0.0f)
30 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
31 }
32
33 TEST(F32_SPMM_4X1__NEONFMA, k_gt_1) {
34 TEST_REQUIRES_ARM_NEON_FMA;
35 for (size_t k = 2; k < 10; k++) {
36 SpMMMicrokernelTester()
37 .mr(4)
38 .nr(1)
39 .m(4)
40 .n(1)
41 .k(k)
42 .sparsity(0.0f)
43 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
44 }
45 }
46
47 TEST(F32_SPMM_4X1__NEONFMA, n_gt_1) {
48 TEST_REQUIRES_ARM_NEON_FMA;
49 for (uint32_t n = 2; n < 10; n++) {
50 for (size_t k = 1; k <= 5; k += 2) {
51 SpMMMicrokernelTester()
52 .mr(4)
53 .nr(1)
54 .m(4)
55 .n(n)
56 .k(k)
57 .sparsity(0.0f)
58 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
59 }
60 }
61 }
62
63 TEST(F32_SPMM_4X1__NEONFMA, m_lt_4) {
64 TEST_REQUIRES_ARM_NEON_FMA;
65 for (uint32_t m = 1; m < 4; m++) {
66 for (uint32_t n = 1; n < 10; n += 2) {
67 for (size_t k = 1; k <= 5; k += 2) {
68 SpMMMicrokernelTester()
69 .mr(4)
70 .nr(1)
71 .m(m)
72 .n(n)
73 .k(k)
74 .sparsity(0.0f)
75 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
76 }
77 }
78 }
79 }
80
81 TEST(F32_SPMM_4X1__NEONFMA, m_div_4) {
82 TEST_REQUIRES_ARM_NEON_FMA;
83 for (uint32_t m = 8; m <= 12; m += 4) {
84 for (uint32_t n = 1; n < 10; n += 2) {
85 for (size_t k = 1; k <= 5; k += 2) {
86 SpMMMicrokernelTester()
87 .mr(4)
88 .nr(1)
89 .m(m)
90 .n(n)
91 .k(k)
92 .sparsity(0.0f)
93 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
94 }
95 }
96 }
97 }
98
99 TEST(F32_SPMM_4X1__NEONFMA, m_gt_4) {
100 TEST_REQUIRES_ARM_NEON_FMA;
101 for (uint32_t m = 5; m < 8; m++) {
102 for (uint32_t n = 1; n < 10; n += 2) {
103 for (size_t k = 1; k <= 5; k += 2) {
104 SpMMMicrokernelTester()
105 .mr(4)
106 .nr(1)
107 .m(m)
108 .n(n)
109 .k(k)
110 .sparsity(0.0f)
111 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
112 }
113 }
114 }
115 }
116
117 TEST(F32_SPMM_4X1__NEONFMA, qmin) {
118 TEST_REQUIRES_ARM_NEON_FMA;
119 for (uint32_t n = 1; n < 10; n += 2) {
120 for (size_t k = 1; k <= 5; k += 2) {
121 SpMMMicrokernelTester()
122 .mr(4)
123 .nr(1)
124 .m(8)
125 .n(n)
126 .k(k)
127 .sparsity(0.0f)
128 .qmin(128)
129 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
130 }
131 }
132 }
133
134 TEST(F32_SPMM_4X1__NEONFMA, qmax) {
135 TEST_REQUIRES_ARM_NEON_FMA;
136 for (uint32_t n = 1; n < 10; n += 2) {
137 for (size_t k = 1; k <= 5; k += 2) {
138 SpMMMicrokernelTester()
139 .mr(4)
140 .nr(1)
141 .m(8)
142 .n(n)
143 .k(k)
144 .sparsity(0.0f)
145 .qmax(128)
146 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
147 }
148 }
149 }
150
151 TEST(F32_SPMM_4X1__NEONFMA, half_sparse) {
152 TEST_REQUIRES_ARM_NEON_FMA;
153 for (uint32_t n = 1; n < 10; n += 2) {
154 for (size_t k = 1; k <= 5; k += 2) {
155 SpMMMicrokernelTester()
156 .mr(4)
157 .nr(1)
158 .m(8)
159 .n(n)
160 .k(k)
161 .sparsity(0.5f)
162 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
163 }
164 }
165 }
166
167 TEST(F32_SPMM_4X1__NEONFMA, zero_weights) {
168 TEST_REQUIRES_ARM_NEON_FMA;
169 for (uint32_t n = 1; n < 10; n += 2) {
170 for (size_t k = 1; k <= 5; k += 2) {
171 SpMMMicrokernelTester()
172 .mr(4)
173 .nr(1)
174 .m(8)
175 .n(n)
176 .k(k)
177 .sparsity(1.0f)
178 .Test(xnn_f32_spmm_ukernel_4x1__neonfma);
179 }
180 }
181 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700182#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700183
184
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700185#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700186 TEST(F32_SPMM_4X2__NEONFMA, k_eq_1) {
187 TEST_REQUIRES_ARM_NEON_FMA;
188 SpMMMicrokernelTester()
189 .mr(4)
190 .nr(2)
191 .m(4)
192 .n(2)
193 .k(1)
194 .sparsity(0.0f)
195 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
196 }
197
198 TEST(F32_SPMM_4X2__NEONFMA, k_eq_1_subtile) {
199 TEST_REQUIRES_ARM_NEON_FMA;
200 for (uint32_t n = 1; n <= 2; n++) {
201 SpMMMicrokernelTester()
202 .mr(4)
203 .nr(2)
204 .m(4)
205 .n(n)
206 .k(1)
207 .sparsity(0.0f)
208 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
209 }
210 }
211
212 TEST(F32_SPMM_4X2__NEONFMA, k_gt_1) {
213 TEST_REQUIRES_ARM_NEON_FMA;
214 for (size_t k = 2; k < 10; k++) {
215 SpMMMicrokernelTester()
216 .mr(4)
217 .nr(2)
218 .m(4)
219 .n(2)
220 .k(k)
221 .sparsity(0.0f)
222 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
223 }
224 }
225
226 TEST(F32_SPMM_4X2__NEONFMA, k_gt_1_subtile) {
227 TEST_REQUIRES_ARM_NEON_FMA;
228 for (size_t k = 2; k < 10; k++) {
229 for (uint32_t n = 1; n <= 2; n++) {
230 SpMMMicrokernelTester()
231 .mr(4)
232 .nr(2)
233 .m(4)
234 .n(n)
235 .k(k)
236 .sparsity(0.0f)
237 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
238 }
239 }
240 }
241
242 TEST(F32_SPMM_4X2__NEONFMA, n_gt_2) {
243 TEST_REQUIRES_ARM_NEON_FMA;
244 for (uint32_t n = 3; n < 10; n++) {
245 for (size_t k = 1; k <= 5; k += 2) {
246 SpMMMicrokernelTester()
247 .mr(4)
248 .nr(2)
249 .m(4)
250 .n(n)
251 .k(k)
252 .sparsity(0.0f)
253 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
254 }
255 }
256 }
257
258 TEST(F32_SPMM_4X2__NEONFMA, n_div_2) {
259 TEST_REQUIRES_ARM_NEON_FMA;
260 for (uint32_t n = 4; n <= 6; n += 2) {
261 for (size_t k = 1; k <= 5; k += 2) {
262 SpMMMicrokernelTester()
263 .mr(4)
264 .nr(2)
265 .m(4)
266 .n(n)
267 .k(k)
268 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
269 }
270 }
271 }
272
273 TEST(F32_SPMM_4X2__NEONFMA, m_lt_4) {
274 TEST_REQUIRES_ARM_NEON_FMA;
275 for (uint32_t m = 1; m < 4; m++) {
276 for (uint32_t n = 1; n < 10; n += 3) {
277 for (size_t k = 1; k <= 5; k += 2) {
278 SpMMMicrokernelTester()
279 .mr(4)
280 .nr(2)
281 .m(m)
282 .n(n)
283 .k(k)
284 .sparsity(0.0f)
285 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
286 }
287 }
288 }
289 }
290
291 TEST(F32_SPMM_4X2__NEONFMA, m_div_4) {
292 TEST_REQUIRES_ARM_NEON_FMA;
293 for (uint32_t m = 8; m <= 12; m += 4) {
294 for (uint32_t n = 1; n < 10; n += 3) {
295 for (size_t k = 1; k <= 5; k += 2) {
296 SpMMMicrokernelTester()
297 .mr(4)
298 .nr(2)
299 .m(m)
300 .n(n)
301 .k(k)
302 .sparsity(0.0f)
303 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
304 }
305 }
306 }
307 }
308
309 TEST(F32_SPMM_4X2__NEONFMA, m_gt_4) {
310 TEST_REQUIRES_ARM_NEON_FMA;
311 for (uint32_t m = 5; m < 8; m++) {
312 for (uint32_t n = 1; n < 10; n += 3) {
313 for (size_t k = 1; k <= 5; k += 2) {
314 SpMMMicrokernelTester()
315 .mr(4)
316 .nr(2)
317 .m(m)
318 .n(n)
319 .k(k)
320 .sparsity(0.0f)
321 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
322 }
323 }
324 }
325 }
326
327 TEST(F32_SPMM_4X2__NEONFMA, qmin) {
328 TEST_REQUIRES_ARM_NEON_FMA;
329 for (uint32_t n = 1; n < 10; n += 3) {
330 for (size_t k = 1; k <= 5; k += 2) {
331 SpMMMicrokernelTester()
332 .mr(4)
333 .nr(2)
334 .m(8)
335 .n(n)
336 .k(k)
337 .sparsity(0.0f)
338 .qmin(128)
339 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
340 }
341 }
342 }
343
344 TEST(F32_SPMM_4X2__NEONFMA, qmax) {
345 TEST_REQUIRES_ARM_NEON_FMA;
346 for (uint32_t n = 1; n < 10; n += 3) {
347 for (size_t k = 1; k <= 5; k += 2) {
348 SpMMMicrokernelTester()
349 .mr(4)
350 .nr(2)
351 .m(8)
352 .n(n)
353 .k(k)
354 .sparsity(0.0f)
355 .qmax(128)
356 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
357 }
358 }
359 }
360
361 TEST(F32_SPMM_4X2__NEONFMA, half_sparse) {
362 TEST_REQUIRES_ARM_NEON_FMA;
363 for (uint32_t n = 1; n < 10; n += 3) {
364 for (size_t k = 1; k <= 5; k += 2) {
365 SpMMMicrokernelTester()
366 .mr(4)
367 .nr(2)
368 .m(8)
369 .n(n)
370 .k(k)
371 .sparsity(0.5f)
372 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
373 }
374 }
375 }
376
377 TEST(F32_SPMM_4X2__NEONFMA, zero_weights) {
378 TEST_REQUIRES_ARM_NEON_FMA;
379 for (uint32_t n = 1; n < 10; n += 3) {
380 for (size_t k = 1; k <= 5; k += 2) {
381 SpMMMicrokernelTester()
382 .mr(4)
383 .nr(2)
384 .m(8)
385 .n(n)
386 .k(k)
387 .sparsity(1.0f)
388 .Test(xnn_f32_spmm_ukernel_4x2__neonfma);
389 }
390 }
391 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700392#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700393
394
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700395#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700396 TEST(F32_SPMM_4X4__NEONFMA, k_eq_1) {
397 TEST_REQUIRES_ARM_NEON_FMA;
398 SpMMMicrokernelTester()
399 .mr(4)
400 .nr(4)
401 .m(4)
402 .n(4)
403 .k(1)
404 .sparsity(0.0f)
405 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
406 }
407
408 TEST(F32_SPMM_4X4__NEONFMA, k_eq_1_subtile) {
409 TEST_REQUIRES_ARM_NEON_FMA;
410 for (uint32_t n = 1; n <= 4; n++) {
411 SpMMMicrokernelTester()
412 .mr(4)
413 .nr(4)
414 .m(4)
415 .n(n)
416 .k(1)
417 .sparsity(0.0f)
418 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
419 }
420 }
421
422 TEST(F32_SPMM_4X4__NEONFMA, k_gt_1) {
423 TEST_REQUIRES_ARM_NEON_FMA;
424 for (size_t k = 2; k < 10; k++) {
425 SpMMMicrokernelTester()
426 .mr(4)
427 .nr(4)
428 .m(4)
429 .n(4)
430 .k(k)
431 .sparsity(0.0f)
432 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
433 }
434 }
435
436 TEST(F32_SPMM_4X4__NEONFMA, k_gt_1_subtile) {
437 TEST_REQUIRES_ARM_NEON_FMA;
438 for (size_t k = 2; k < 10; k++) {
439 for (uint32_t n = 1; n <= 4; n++) {
440 SpMMMicrokernelTester()
441 .mr(4)
442 .nr(4)
443 .m(4)
444 .n(n)
445 .k(k)
446 .sparsity(0.0f)
447 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
448 }
449 }
450 }
451
452 TEST(F32_SPMM_4X4__NEONFMA, n_gt_4) {
453 TEST_REQUIRES_ARM_NEON_FMA;
454 for (uint32_t n = 5; n < 10; n++) {
455 for (size_t k = 1; k <= 5; k += 2) {
456 SpMMMicrokernelTester()
457 .mr(4)
458 .nr(4)
459 .m(4)
460 .n(n)
461 .k(k)
462 .sparsity(0.0f)
463 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
464 }
465 }
466 }
467
468 TEST(F32_SPMM_4X4__NEONFMA, n_div_4) {
469 TEST_REQUIRES_ARM_NEON_FMA;
470 for (uint32_t n = 8; n <= 12; n += 4) {
471 for (size_t k = 1; k <= 5; k += 2) {
472 SpMMMicrokernelTester()
473 .mr(4)
474 .nr(4)
475 .m(4)
476 .n(n)
477 .k(k)
478 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
479 }
480 }
481 }
482
483 TEST(F32_SPMM_4X4__NEONFMA, m_lt_4) {
484 TEST_REQUIRES_ARM_NEON_FMA;
485 for (uint32_t m = 1; m < 4; m++) {
486 for (uint32_t n = 1; n < 20; n += 5) {
487 for (size_t k = 1; k <= 5; k += 2) {
488 SpMMMicrokernelTester()
489 .mr(4)
490 .nr(4)
491 .m(m)
492 .n(n)
493 .k(k)
494 .sparsity(0.0f)
495 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
496 }
497 }
498 }
499 }
500
501 TEST(F32_SPMM_4X4__NEONFMA, m_div_4) {
502 TEST_REQUIRES_ARM_NEON_FMA;
503 for (uint32_t m = 8; m <= 12; m += 4) {
504 for (uint32_t n = 1; n < 20; n += 5) {
505 for (size_t k = 1; k <= 5; k += 2) {
506 SpMMMicrokernelTester()
507 .mr(4)
508 .nr(4)
509 .m(m)
510 .n(n)
511 .k(k)
512 .sparsity(0.0f)
513 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
514 }
515 }
516 }
517 }
518
519 TEST(F32_SPMM_4X4__NEONFMA, m_gt_4) {
520 TEST_REQUIRES_ARM_NEON_FMA;
521 for (uint32_t m = 5; m < 8; m++) {
522 for (uint32_t n = 1; n < 20; n += 5) {
523 for (size_t k = 1; k <= 5; k += 2) {
524 SpMMMicrokernelTester()
525 .mr(4)
526 .nr(4)
527 .m(m)
528 .n(n)
529 .k(k)
530 .sparsity(0.0f)
531 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
532 }
533 }
534 }
535 }
536
537 TEST(F32_SPMM_4X4__NEONFMA, qmin) {
538 TEST_REQUIRES_ARM_NEON_FMA;
539 for (uint32_t n = 1; n < 20; n += 5) {
540 for (size_t k = 1; k <= 5; k += 2) {
541 SpMMMicrokernelTester()
542 .mr(4)
543 .nr(4)
544 .m(8)
545 .n(n)
546 .k(k)
547 .sparsity(0.0f)
548 .qmin(128)
549 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
550 }
551 }
552 }
553
554 TEST(F32_SPMM_4X4__NEONFMA, qmax) {
555 TEST_REQUIRES_ARM_NEON_FMA;
556 for (uint32_t n = 1; n < 20; n += 5) {
557 for (size_t k = 1; k <= 5; k += 2) {
558 SpMMMicrokernelTester()
559 .mr(4)
560 .nr(4)
561 .m(8)
562 .n(n)
563 .k(k)
564 .sparsity(0.0f)
565 .qmax(128)
566 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
567 }
568 }
569 }
570
571 TEST(F32_SPMM_4X4__NEONFMA, half_sparse) {
572 TEST_REQUIRES_ARM_NEON_FMA;
573 for (uint32_t n = 1; n < 20; n += 5) {
574 for (size_t k = 1; k <= 5; k += 2) {
575 SpMMMicrokernelTester()
576 .mr(4)
577 .nr(4)
578 .m(8)
579 .n(n)
580 .k(k)
581 .sparsity(0.5f)
582 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
583 }
584 }
585 }
586
587 TEST(F32_SPMM_4X4__NEONFMA, zero_weights) {
588 TEST_REQUIRES_ARM_NEON_FMA;
589 for (uint32_t n = 1; n < 20; n += 5) {
590 for (size_t k = 1; k <= 5; k += 2) {
591 SpMMMicrokernelTester()
592 .mr(4)
593 .nr(4)
594 .m(8)
595 .n(n)
596 .k(k)
597 .sparsity(1.0f)
598 .Test(xnn_f32_spmm_ukernel_4x4__neonfma);
599 }
600 }
601 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700602#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700603
604
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700605#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700606 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, k_eq_1) {
607 TEST_REQUIRES_ARM_NEON_FMA;
608 SpMMMicrokernelTester()
609 .mr(4)
610 .nr(1)
611 .m(4)
612 .n(1)
613 .k(1)
614 .sparsity(0.0f)
615 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
616 }
617
618 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, k_gt_1) {
619 TEST_REQUIRES_ARM_NEON_FMA;
620 for (size_t k = 2; k < 10; k++) {
621 SpMMMicrokernelTester()
622 .mr(4)
623 .nr(1)
624 .m(4)
625 .n(1)
626 .k(k)
627 .sparsity(0.0f)
628 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
629 }
630 }
631
632 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, n_gt_1) {
633 TEST_REQUIRES_ARM_NEON_FMA;
634 for (uint32_t n = 2; n < 10; n++) {
635 for (size_t k = 1; k <= 5; k += 2) {
636 SpMMMicrokernelTester()
637 .mr(4)
638 .nr(1)
639 .m(4)
640 .n(n)
641 .k(k)
642 .sparsity(0.0f)
643 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
644 }
645 }
646 }
647
648 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, m_lt_4) {
649 TEST_REQUIRES_ARM_NEON_FMA;
650 for (uint32_t m = 1; m < 4; m++) {
651 for (uint32_t n = 1; n < 10; n += 2) {
652 for (size_t k = 1; k <= 5; k += 2) {
653 SpMMMicrokernelTester()
654 .mr(4)
655 .nr(1)
656 .m(m)
657 .n(n)
658 .k(k)
659 .sparsity(0.0f)
660 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
661 }
662 }
663 }
664 }
665
666 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, m_div_4) {
667 TEST_REQUIRES_ARM_NEON_FMA;
668 for (uint32_t m = 8; m <= 12; m += 4) {
669 for (uint32_t n = 1; n < 10; n += 2) {
670 for (size_t k = 1; k <= 5; k += 2) {
671 SpMMMicrokernelTester()
672 .mr(4)
673 .nr(1)
674 .m(m)
675 .n(n)
676 .k(k)
677 .sparsity(0.0f)
678 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
679 }
680 }
681 }
682 }
683
684 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, m_gt_4) {
685 TEST_REQUIRES_ARM_NEON_FMA;
686 for (uint32_t m = 5; m < 8; m++) {
687 for (uint32_t n = 1; n < 10; n += 2) {
688 for (size_t k = 1; k <= 5; k += 2) {
689 SpMMMicrokernelTester()
690 .mr(4)
691 .nr(1)
692 .m(m)
693 .n(n)
694 .k(k)
695 .sparsity(0.0f)
696 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
697 }
698 }
699 }
700 }
701
702 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, qmin) {
703 TEST_REQUIRES_ARM_NEON_FMA;
704 for (uint32_t n = 1; n < 10; n += 2) {
705 for (size_t k = 1; k <= 5; k += 2) {
706 SpMMMicrokernelTester()
707 .mr(4)
708 .nr(1)
709 .m(8)
710 .n(n)
711 .k(k)
712 .sparsity(0.0f)
713 .qmin(128)
714 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
715 }
716 }
717 }
718
719 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, qmax) {
720 TEST_REQUIRES_ARM_NEON_FMA;
721 for (uint32_t n = 1; n < 10; n += 2) {
722 for (size_t k = 1; k <= 5; k += 2) {
723 SpMMMicrokernelTester()
724 .mr(4)
725 .nr(1)
726 .m(8)
727 .n(n)
728 .k(k)
729 .sparsity(0.0f)
730 .qmax(128)
731 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
732 }
733 }
734 }
735
736 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, half_sparse) {
737 TEST_REQUIRES_ARM_NEON_FMA;
738 for (uint32_t n = 1; n < 10; n += 2) {
739 for (size_t k = 1; k <= 5; k += 2) {
740 SpMMMicrokernelTester()
741 .mr(4)
742 .nr(1)
743 .m(8)
744 .n(n)
745 .k(k)
746 .sparsity(0.5f)
747 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
748 }
749 }
750 }
751
752 TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, zero_weights) {
753 TEST_REQUIRES_ARM_NEON_FMA;
754 for (uint32_t n = 1; n < 10; n += 2) {
755 for (size_t k = 1; k <= 5; k += 2) {
756 SpMMMicrokernelTester()
757 .mr(4)
758 .nr(1)
759 .m(8)
760 .n(n)
761 .k(k)
762 .sparsity(1.0f)
763 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_pipelined);
764 }
765 }
766 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700767#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700768
769
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700770#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700771 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, k_eq_2) {
772 TEST_REQUIRES_ARM_NEON_FMA;
773 SpMMMicrokernelTester()
774 .mr(4)
775 .nr(1)
776 .m(4)
777 .n(1)
778 .k(2)
779 .sparsity(0.0f)
780 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
781 }
782
783 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, k_lt_2) {
784 TEST_REQUIRES_ARM_NEON_FMA;
785 for (size_t k = 1; k < 2; k++) {
786 SpMMMicrokernelTester()
787 .mr(4)
788 .nr(1)
789 .m(4)
790 .n(1)
791 .k(k)
792 .sparsity(0.0f)
793 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
794 }
795 }
796
797 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, k_gt_2) {
798 TEST_REQUIRES_ARM_NEON_FMA;
799 for (size_t k = 3; k < 4; k++) {
800 SpMMMicrokernelTester()
801 .mr(4)
802 .nr(1)
803 .m(4)
804 .n(1)
805 .k(k)
806 .sparsity(0.0f)
807 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
808 }
809 }
810
811 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, k_div_2) {
812 TEST_REQUIRES_ARM_NEON_FMA;
813 for (size_t k = 4; k <= 20; k += 2) {
814 SpMMMicrokernelTester()
815 .mr(4)
816 .nr(1)
817 .m(4)
818 .n(1)
819 .k(k)
820 .sparsity(0.0f)
821 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
822 }
823 }
824
825 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, n_gt_1) {
826 TEST_REQUIRES_ARM_NEON_FMA;
827 for (uint32_t n = 2; n < 10; n++) {
828 for (size_t k = 1; k <= 10; k += 3) {
829 SpMMMicrokernelTester()
830 .mr(4)
831 .nr(1)
832 .m(4)
833 .n(n)
834 .k(k)
835 .sparsity(0.0f)
836 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
837 }
838 }
839 }
840
841 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, m_lt_4) {
842 TEST_REQUIRES_ARM_NEON_FMA;
843 for (uint32_t m = 1; m < 4; m++) {
844 for (uint32_t n = 1; n < 10; n += 2) {
845 for (size_t k = 1; k <= 10; k += 3) {
846 SpMMMicrokernelTester()
847 .mr(4)
848 .nr(1)
849 .m(m)
850 .n(n)
851 .k(k)
852 .sparsity(0.0f)
853 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
854 }
855 }
856 }
857 }
858
859 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, m_div_4) {
860 TEST_REQUIRES_ARM_NEON_FMA;
861 for (uint32_t m = 8; m <= 12; m += 4) {
862 for (uint32_t n = 1; n < 10; n += 2) {
863 for (size_t k = 1; k <= 10; k += 3) {
864 SpMMMicrokernelTester()
865 .mr(4)
866 .nr(1)
867 .m(m)
868 .n(n)
869 .k(k)
870 .sparsity(0.0f)
871 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
872 }
873 }
874 }
875 }
876
877 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, m_gt_4) {
878 TEST_REQUIRES_ARM_NEON_FMA;
879 for (uint32_t m = 5; m < 8; m++) {
880 for (uint32_t n = 1; n < 10; n += 2) {
881 for (size_t k = 1; k <= 10; k += 3) {
882 SpMMMicrokernelTester()
883 .mr(4)
884 .nr(1)
885 .m(m)
886 .n(n)
887 .k(k)
888 .sparsity(0.0f)
889 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
890 }
891 }
892 }
893 }
894
895 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, qmin) {
896 TEST_REQUIRES_ARM_NEON_FMA;
897 for (uint32_t n = 1; n < 10; n += 2) {
898 for (size_t k = 1; k <= 10; k += 3) {
899 SpMMMicrokernelTester()
900 .mr(4)
901 .nr(1)
902 .m(8)
903 .n(n)
904 .k(k)
905 .sparsity(0.0f)
906 .qmin(128)
907 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
908 }
909 }
910 }
911
912 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, qmax) {
913 TEST_REQUIRES_ARM_NEON_FMA;
914 for (uint32_t n = 1; n < 10; n += 2) {
915 for (size_t k = 1; k <= 10; k += 3) {
916 SpMMMicrokernelTester()
917 .mr(4)
918 .nr(1)
919 .m(8)
920 .n(n)
921 .k(k)
922 .sparsity(0.0f)
923 .qmax(128)
924 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
925 }
926 }
927 }
928
929 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, half_sparse) {
930 TEST_REQUIRES_ARM_NEON_FMA;
931 for (uint32_t n = 1; n < 10; n += 2) {
932 for (size_t k = 1; k <= 10; k += 3) {
933 SpMMMicrokernelTester()
934 .mr(4)
935 .nr(1)
936 .m(8)
937 .n(n)
938 .k(k)
939 .sparsity(0.5f)
940 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
941 }
942 }
943 }
944
945 TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, zero_weights) {
946 TEST_REQUIRES_ARM_NEON_FMA;
947 for (uint32_t n = 1; n < 10; n += 2) {
948 for (size_t k = 1; k <= 10; k += 3) {
949 SpMMMicrokernelTester()
950 .mr(4)
951 .nr(1)
952 .m(8)
953 .n(n)
954 .k(k)
955 .sparsity(1.0f)
956 .Test(xnn_f32_spmm_ukernel_4x1__neonfma_unroll2);
957 }
958 }
959 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700960#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700961
962
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700963#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700964 TEST(F32_SPMM_8X1__NEONFMA, k_eq_1) {
965 TEST_REQUIRES_ARM_NEON_FMA;
966 SpMMMicrokernelTester()
967 .mr(8)
968 .nr(1)
969 .m(8)
970 .n(1)
971 .k(1)
972 .sparsity(0.0f)
973 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
974 }
975
976 TEST(F32_SPMM_8X1__NEONFMA, k_gt_1) {
977 TEST_REQUIRES_ARM_NEON_FMA;
978 for (size_t k = 2; k < 10; k++) {
979 SpMMMicrokernelTester()
980 .mr(8)
981 .nr(1)
982 .m(8)
983 .n(1)
984 .k(k)
985 .sparsity(0.0f)
986 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
987 }
988 }
989
990 TEST(F32_SPMM_8X1__NEONFMA, n_gt_1) {
991 TEST_REQUIRES_ARM_NEON_FMA;
992 for (uint32_t n = 2; n < 10; n++) {
993 for (size_t k = 1; k <= 5; k += 2) {
994 SpMMMicrokernelTester()
995 .mr(8)
996 .nr(1)
997 .m(8)
998 .n(n)
999 .k(k)
1000 .sparsity(0.0f)
1001 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
1002 }
1003 }
1004 }
1005
1006 TEST(F32_SPMM_8X1__NEONFMA, m_lt_8) {
1007 TEST_REQUIRES_ARM_NEON_FMA;
1008 for (uint32_t m = 1; m < 8; m++) {
1009 for (uint32_t n = 1; n < 10; n += 2) {
1010 for (size_t k = 1; k <= 5; k += 2) {
1011 SpMMMicrokernelTester()
1012 .mr(8)
1013 .nr(1)
1014 .m(m)
1015 .n(n)
1016 .k(k)
1017 .sparsity(0.0f)
1018 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
1019 }
1020 }
1021 }
1022 }
1023
1024 TEST(F32_SPMM_8X1__NEONFMA, m_div_8) {
1025 TEST_REQUIRES_ARM_NEON_FMA;
1026 for (uint32_t m = 16; m <= 24; m += 8) {
1027 for (uint32_t n = 1; n < 10; n += 2) {
1028 for (size_t k = 1; k <= 5; k += 2) {
1029 SpMMMicrokernelTester()
1030 .mr(8)
1031 .nr(1)
1032 .m(m)
1033 .n(n)
1034 .k(k)
1035 .sparsity(0.0f)
1036 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
1037 }
1038 }
1039 }
1040 }
1041
1042 TEST(F32_SPMM_8X1__NEONFMA, m_gt_8) {
1043 TEST_REQUIRES_ARM_NEON_FMA;
1044 for (uint32_t m = 9; m < 16; m++) {
1045 for (uint32_t n = 1; n < 10; n += 2) {
1046 for (size_t k = 1; k <= 5; k += 2) {
1047 SpMMMicrokernelTester()
1048 .mr(8)
1049 .nr(1)
1050 .m(m)
1051 .n(n)
1052 .k(k)
1053 .sparsity(0.0f)
1054 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
1055 }
1056 }
1057 }
1058 }
1059
1060 TEST(F32_SPMM_8X1__NEONFMA, qmin) {
1061 TEST_REQUIRES_ARM_NEON_FMA;
1062 for (uint32_t n = 1; n < 10; n += 2) {
1063 for (size_t k = 1; k <= 5; k += 2) {
1064 SpMMMicrokernelTester()
1065 .mr(8)
1066 .nr(1)
1067 .m(16)
1068 .n(n)
1069 .k(k)
1070 .sparsity(0.0f)
1071 .qmin(128)
1072 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
1073 }
1074 }
1075 }
1076
1077 TEST(F32_SPMM_8X1__NEONFMA, qmax) {
1078 TEST_REQUIRES_ARM_NEON_FMA;
1079 for (uint32_t n = 1; n < 10; n += 2) {
1080 for (size_t k = 1; k <= 5; k += 2) {
1081 SpMMMicrokernelTester()
1082 .mr(8)
1083 .nr(1)
1084 .m(16)
1085 .n(n)
1086 .k(k)
1087 .sparsity(0.0f)
1088 .qmax(128)
1089 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
1090 }
1091 }
1092 }
1093
1094 TEST(F32_SPMM_8X1__NEONFMA, half_sparse) {
1095 TEST_REQUIRES_ARM_NEON_FMA;
1096 for (uint32_t n = 1; n < 10; n += 2) {
1097 for (size_t k = 1; k <= 5; k += 2) {
1098 SpMMMicrokernelTester()
1099 .mr(8)
1100 .nr(1)
1101 .m(16)
1102 .n(n)
1103 .k(k)
1104 .sparsity(0.5f)
1105 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
1106 }
1107 }
1108 }
1109
1110 TEST(F32_SPMM_8X1__NEONFMA, zero_weights) {
1111 TEST_REQUIRES_ARM_NEON_FMA;
1112 for (uint32_t n = 1; n < 10; n += 2) {
1113 for (size_t k = 1; k <= 5; k += 2) {
1114 SpMMMicrokernelTester()
1115 .mr(8)
1116 .nr(1)
1117 .m(16)
1118 .n(n)
1119 .k(k)
1120 .sparsity(1.0f)
1121 .Test(xnn_f32_spmm_ukernel_8x1__neonfma);
1122 }
1123 }
1124 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001125#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001126
1127
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001128#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001129 TEST(F32_SPMM_8X2__NEONFMA, k_eq_1) {
1130 TEST_REQUIRES_ARM_NEON_FMA;
1131 SpMMMicrokernelTester()
1132 .mr(8)
1133 .nr(2)
1134 .m(8)
1135 .n(2)
1136 .k(1)
1137 .sparsity(0.0f)
1138 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1139 }
1140
1141 TEST(F32_SPMM_8X2__NEONFMA, k_eq_1_subtile) {
1142 TEST_REQUIRES_ARM_NEON_FMA;
1143 for (uint32_t n = 1; n <= 2; n++) {
1144 SpMMMicrokernelTester()
1145 .mr(8)
1146 .nr(2)
1147 .m(8)
1148 .n(n)
1149 .k(1)
1150 .sparsity(0.0f)
1151 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1152 }
1153 }
1154
1155 TEST(F32_SPMM_8X2__NEONFMA, k_gt_1) {
1156 TEST_REQUIRES_ARM_NEON_FMA;
1157 for (size_t k = 2; k < 10; k++) {
1158 SpMMMicrokernelTester()
1159 .mr(8)
1160 .nr(2)
1161 .m(8)
1162 .n(2)
1163 .k(k)
1164 .sparsity(0.0f)
1165 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1166 }
1167 }
1168
1169 TEST(F32_SPMM_8X2__NEONFMA, k_gt_1_subtile) {
1170 TEST_REQUIRES_ARM_NEON_FMA;
1171 for (size_t k = 2; k < 10; k++) {
1172 for (uint32_t n = 1; n <= 2; n++) {
1173 SpMMMicrokernelTester()
1174 .mr(8)
1175 .nr(2)
1176 .m(8)
1177 .n(n)
1178 .k(k)
1179 .sparsity(0.0f)
1180 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1181 }
1182 }
1183 }
1184
1185 TEST(F32_SPMM_8X2__NEONFMA, n_gt_2) {
1186 TEST_REQUIRES_ARM_NEON_FMA;
1187 for (uint32_t n = 3; n < 10; n++) {
1188 for (size_t k = 1; k <= 5; k += 2) {
1189 SpMMMicrokernelTester()
1190 .mr(8)
1191 .nr(2)
1192 .m(8)
1193 .n(n)
1194 .k(k)
1195 .sparsity(0.0f)
1196 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1197 }
1198 }
1199 }
1200
1201 TEST(F32_SPMM_8X2__NEONFMA, n_div_2) {
1202 TEST_REQUIRES_ARM_NEON_FMA;
1203 for (uint32_t n = 4; n <= 6; n += 2) {
1204 for (size_t k = 1; k <= 5; k += 2) {
1205 SpMMMicrokernelTester()
1206 .mr(8)
1207 .nr(2)
1208 .m(8)
1209 .n(n)
1210 .k(k)
1211 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1212 }
1213 }
1214 }
1215
1216 TEST(F32_SPMM_8X2__NEONFMA, m_lt_8) {
1217 TEST_REQUIRES_ARM_NEON_FMA;
1218 for (uint32_t m = 1; m < 8; m++) {
1219 for (uint32_t n = 1; n < 10; n += 3) {
1220 for (size_t k = 1; k <= 5; k += 2) {
1221 SpMMMicrokernelTester()
1222 .mr(8)
1223 .nr(2)
1224 .m(m)
1225 .n(n)
1226 .k(k)
1227 .sparsity(0.0f)
1228 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1229 }
1230 }
1231 }
1232 }
1233
1234 TEST(F32_SPMM_8X2__NEONFMA, m_div_8) {
1235 TEST_REQUIRES_ARM_NEON_FMA;
1236 for (uint32_t m = 16; m <= 24; m += 8) {
1237 for (uint32_t n = 1; n < 10; n += 3) {
1238 for (size_t k = 1; k <= 5; k += 2) {
1239 SpMMMicrokernelTester()
1240 .mr(8)
1241 .nr(2)
1242 .m(m)
1243 .n(n)
1244 .k(k)
1245 .sparsity(0.0f)
1246 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1247 }
1248 }
1249 }
1250 }
1251
1252 TEST(F32_SPMM_8X2__NEONFMA, m_gt_8) {
1253 TEST_REQUIRES_ARM_NEON_FMA;
1254 for (uint32_t m = 9; m < 16; m++) {
1255 for (uint32_t n = 1; n < 10; n += 3) {
1256 for (size_t k = 1; k <= 5; k += 2) {
1257 SpMMMicrokernelTester()
1258 .mr(8)
1259 .nr(2)
1260 .m(m)
1261 .n(n)
1262 .k(k)
1263 .sparsity(0.0f)
1264 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1265 }
1266 }
1267 }
1268 }
1269
1270 TEST(F32_SPMM_8X2__NEONFMA, qmin) {
1271 TEST_REQUIRES_ARM_NEON_FMA;
1272 for (uint32_t n = 1; n < 10; n += 3) {
1273 for (size_t k = 1; k <= 5; k += 2) {
1274 SpMMMicrokernelTester()
1275 .mr(8)
1276 .nr(2)
1277 .m(16)
1278 .n(n)
1279 .k(k)
1280 .sparsity(0.0f)
1281 .qmin(128)
1282 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1283 }
1284 }
1285 }
1286
1287 TEST(F32_SPMM_8X2__NEONFMA, qmax) {
1288 TEST_REQUIRES_ARM_NEON_FMA;
1289 for (uint32_t n = 1; n < 10; n += 3) {
1290 for (size_t k = 1; k <= 5; k += 2) {
1291 SpMMMicrokernelTester()
1292 .mr(8)
1293 .nr(2)
1294 .m(16)
1295 .n(n)
1296 .k(k)
1297 .sparsity(0.0f)
1298 .qmax(128)
1299 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1300 }
1301 }
1302 }
1303
1304 TEST(F32_SPMM_8X2__NEONFMA, half_sparse) {
1305 TEST_REQUIRES_ARM_NEON_FMA;
1306 for (uint32_t n = 1; n < 10; n += 3) {
1307 for (size_t k = 1; k <= 5; k += 2) {
1308 SpMMMicrokernelTester()
1309 .mr(8)
1310 .nr(2)
1311 .m(16)
1312 .n(n)
1313 .k(k)
1314 .sparsity(0.5f)
1315 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1316 }
1317 }
1318 }
1319
1320 TEST(F32_SPMM_8X2__NEONFMA, zero_weights) {
1321 TEST_REQUIRES_ARM_NEON_FMA;
1322 for (uint32_t n = 1; n < 10; n += 3) {
1323 for (size_t k = 1; k <= 5; k += 2) {
1324 SpMMMicrokernelTester()
1325 .mr(8)
1326 .nr(2)
1327 .m(16)
1328 .n(n)
1329 .k(k)
1330 .sparsity(1.0f)
1331 .Test(xnn_f32_spmm_ukernel_8x2__neonfma);
1332 }
1333 }
1334 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001335#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001336
1337
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001338#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001339 TEST(F32_SPMM_8X4__NEONFMA, k_eq_1) {
1340 TEST_REQUIRES_ARM_NEON_FMA;
1341 SpMMMicrokernelTester()
1342 .mr(8)
1343 .nr(4)
1344 .m(8)
1345 .n(4)
1346 .k(1)
1347 .sparsity(0.0f)
1348 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1349 }
1350
1351 TEST(F32_SPMM_8X4__NEONFMA, k_eq_1_subtile) {
1352 TEST_REQUIRES_ARM_NEON_FMA;
1353 for (uint32_t n = 1; n <= 4; n++) {
1354 SpMMMicrokernelTester()
1355 .mr(8)
1356 .nr(4)
1357 .m(8)
1358 .n(n)
1359 .k(1)
1360 .sparsity(0.0f)
1361 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1362 }
1363 }
1364
1365 TEST(F32_SPMM_8X4__NEONFMA, k_gt_1) {
1366 TEST_REQUIRES_ARM_NEON_FMA;
1367 for (size_t k = 2; k < 10; k++) {
1368 SpMMMicrokernelTester()
1369 .mr(8)
1370 .nr(4)
1371 .m(8)
1372 .n(4)
1373 .k(k)
1374 .sparsity(0.0f)
1375 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1376 }
1377 }
1378
1379 TEST(F32_SPMM_8X4__NEONFMA, k_gt_1_subtile) {
1380 TEST_REQUIRES_ARM_NEON_FMA;
1381 for (size_t k = 2; k < 10; k++) {
1382 for (uint32_t n = 1; n <= 4; n++) {
1383 SpMMMicrokernelTester()
1384 .mr(8)
1385 .nr(4)
1386 .m(8)
1387 .n(n)
1388 .k(k)
1389 .sparsity(0.0f)
1390 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1391 }
1392 }
1393 }
1394
1395 TEST(F32_SPMM_8X4__NEONFMA, n_gt_4) {
1396 TEST_REQUIRES_ARM_NEON_FMA;
1397 for (uint32_t n = 5; n < 10; n++) {
1398 for (size_t k = 1; k <= 5; k += 2) {
1399 SpMMMicrokernelTester()
1400 .mr(8)
1401 .nr(4)
1402 .m(8)
1403 .n(n)
1404 .k(k)
1405 .sparsity(0.0f)
1406 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1407 }
1408 }
1409 }
1410
1411 TEST(F32_SPMM_8X4__NEONFMA, n_div_4) {
1412 TEST_REQUIRES_ARM_NEON_FMA;
1413 for (uint32_t n = 8; n <= 12; n += 4) {
1414 for (size_t k = 1; k <= 5; k += 2) {
1415 SpMMMicrokernelTester()
1416 .mr(8)
1417 .nr(4)
1418 .m(8)
1419 .n(n)
1420 .k(k)
1421 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1422 }
1423 }
1424 }
1425
1426 TEST(F32_SPMM_8X4__NEONFMA, m_lt_8) {
1427 TEST_REQUIRES_ARM_NEON_FMA;
1428 for (uint32_t m = 1; m < 8; m++) {
1429 for (uint32_t n = 1; n < 20; n += 5) {
1430 for (size_t k = 1; k <= 5; k += 2) {
1431 SpMMMicrokernelTester()
1432 .mr(8)
1433 .nr(4)
1434 .m(m)
1435 .n(n)
1436 .k(k)
1437 .sparsity(0.0f)
1438 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1439 }
1440 }
1441 }
1442 }
1443
1444 TEST(F32_SPMM_8X4__NEONFMA, m_div_8) {
1445 TEST_REQUIRES_ARM_NEON_FMA;
1446 for (uint32_t m = 16; m <= 24; m += 8) {
1447 for (uint32_t n = 1; n < 20; n += 5) {
1448 for (size_t k = 1; k <= 5; k += 2) {
1449 SpMMMicrokernelTester()
1450 .mr(8)
1451 .nr(4)
1452 .m(m)
1453 .n(n)
1454 .k(k)
1455 .sparsity(0.0f)
1456 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1457 }
1458 }
1459 }
1460 }
1461
1462 TEST(F32_SPMM_8X4__NEONFMA, m_gt_8) {
1463 TEST_REQUIRES_ARM_NEON_FMA;
1464 for (uint32_t m = 9; m < 16; m++) {
1465 for (uint32_t n = 1; n < 20; n += 5) {
1466 for (size_t k = 1; k <= 5; k += 2) {
1467 SpMMMicrokernelTester()
1468 .mr(8)
1469 .nr(4)
1470 .m(m)
1471 .n(n)
1472 .k(k)
1473 .sparsity(0.0f)
1474 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1475 }
1476 }
1477 }
1478 }
1479
1480 TEST(F32_SPMM_8X4__NEONFMA, qmin) {
1481 TEST_REQUIRES_ARM_NEON_FMA;
1482 for (uint32_t n = 1; n < 20; n += 5) {
1483 for (size_t k = 1; k <= 5; k += 2) {
1484 SpMMMicrokernelTester()
1485 .mr(8)
1486 .nr(4)
1487 .m(16)
1488 .n(n)
1489 .k(k)
1490 .sparsity(0.0f)
1491 .qmin(128)
1492 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1493 }
1494 }
1495 }
1496
1497 TEST(F32_SPMM_8X4__NEONFMA, qmax) {
1498 TEST_REQUIRES_ARM_NEON_FMA;
1499 for (uint32_t n = 1; n < 20; n += 5) {
1500 for (size_t k = 1; k <= 5; k += 2) {
1501 SpMMMicrokernelTester()
1502 .mr(8)
1503 .nr(4)
1504 .m(16)
1505 .n(n)
1506 .k(k)
1507 .sparsity(0.0f)
1508 .qmax(128)
1509 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1510 }
1511 }
1512 }
1513
1514 TEST(F32_SPMM_8X4__NEONFMA, half_sparse) {
1515 TEST_REQUIRES_ARM_NEON_FMA;
1516 for (uint32_t n = 1; n < 20; n += 5) {
1517 for (size_t k = 1; k <= 5; k += 2) {
1518 SpMMMicrokernelTester()
1519 .mr(8)
1520 .nr(4)
1521 .m(16)
1522 .n(n)
1523 .k(k)
1524 .sparsity(0.5f)
1525 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1526 }
1527 }
1528 }
1529
1530 TEST(F32_SPMM_8X4__NEONFMA, zero_weights) {
1531 TEST_REQUIRES_ARM_NEON_FMA;
1532 for (uint32_t n = 1; n < 20; n += 5) {
1533 for (size_t k = 1; k <= 5; k += 2) {
1534 SpMMMicrokernelTester()
1535 .mr(8)
1536 .nr(4)
1537 .m(16)
1538 .n(n)
1539 .k(k)
1540 .sparsity(1.0f)
1541 .Test(xnn_f32_spmm_ukernel_8x4__neonfma);
1542 }
1543 }
1544 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001545#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001546
1547
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001548#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001549 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, k_eq_1) {
1550 TEST_REQUIRES_ARM_NEON_FMA;
1551 SpMMMicrokernelTester()
1552 .mr(8)
1553 .nr(1)
1554 .m(8)
1555 .n(1)
1556 .k(1)
1557 .sparsity(0.0f)
1558 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1559 }
1560
1561 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, k_gt_1) {
1562 TEST_REQUIRES_ARM_NEON_FMA;
1563 for (size_t k = 2; k < 10; k++) {
1564 SpMMMicrokernelTester()
1565 .mr(8)
1566 .nr(1)
1567 .m(8)
1568 .n(1)
1569 .k(k)
1570 .sparsity(0.0f)
1571 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1572 }
1573 }
1574
1575 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, n_gt_1) {
1576 TEST_REQUIRES_ARM_NEON_FMA;
1577 for (uint32_t n = 2; n < 10; n++) {
1578 for (size_t k = 1; k <= 5; k += 2) {
1579 SpMMMicrokernelTester()
1580 .mr(8)
1581 .nr(1)
1582 .m(8)
1583 .n(n)
1584 .k(k)
1585 .sparsity(0.0f)
1586 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1587 }
1588 }
1589 }
1590
1591 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, m_lt_8) {
1592 TEST_REQUIRES_ARM_NEON_FMA;
1593 for (uint32_t m = 1; m < 8; m++) {
1594 for (uint32_t n = 1; n < 10; n += 2) {
1595 for (size_t k = 1; k <= 5; k += 2) {
1596 SpMMMicrokernelTester()
1597 .mr(8)
1598 .nr(1)
1599 .m(m)
1600 .n(n)
1601 .k(k)
1602 .sparsity(0.0f)
1603 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1604 }
1605 }
1606 }
1607 }
1608
1609 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, m_div_8) {
1610 TEST_REQUIRES_ARM_NEON_FMA;
1611 for (uint32_t m = 16; m <= 24; m += 8) {
1612 for (uint32_t n = 1; n < 10; n += 2) {
1613 for (size_t k = 1; k <= 5; k += 2) {
1614 SpMMMicrokernelTester()
1615 .mr(8)
1616 .nr(1)
1617 .m(m)
1618 .n(n)
1619 .k(k)
1620 .sparsity(0.0f)
1621 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1622 }
1623 }
1624 }
1625 }
1626
1627 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, m_gt_8) {
1628 TEST_REQUIRES_ARM_NEON_FMA;
1629 for (uint32_t m = 9; m < 16; m++) {
1630 for (uint32_t n = 1; n < 10; n += 2) {
1631 for (size_t k = 1; k <= 5; k += 2) {
1632 SpMMMicrokernelTester()
1633 .mr(8)
1634 .nr(1)
1635 .m(m)
1636 .n(n)
1637 .k(k)
1638 .sparsity(0.0f)
1639 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1640 }
1641 }
1642 }
1643 }
1644
1645 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, qmin) {
1646 TEST_REQUIRES_ARM_NEON_FMA;
1647 for (uint32_t n = 1; n < 10; n += 2) {
1648 for (size_t k = 1; k <= 5; k += 2) {
1649 SpMMMicrokernelTester()
1650 .mr(8)
1651 .nr(1)
1652 .m(16)
1653 .n(n)
1654 .k(k)
1655 .sparsity(0.0f)
1656 .qmin(128)
1657 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1658 }
1659 }
1660 }
1661
1662 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, qmax) {
1663 TEST_REQUIRES_ARM_NEON_FMA;
1664 for (uint32_t n = 1; n < 10; n += 2) {
1665 for (size_t k = 1; k <= 5; k += 2) {
1666 SpMMMicrokernelTester()
1667 .mr(8)
1668 .nr(1)
1669 .m(16)
1670 .n(n)
1671 .k(k)
1672 .sparsity(0.0f)
1673 .qmax(128)
1674 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1675 }
1676 }
1677 }
1678
1679 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, half_sparse) {
1680 TEST_REQUIRES_ARM_NEON_FMA;
1681 for (uint32_t n = 1; n < 10; n += 2) {
1682 for (size_t k = 1; k <= 5; k += 2) {
1683 SpMMMicrokernelTester()
1684 .mr(8)
1685 .nr(1)
1686 .m(16)
1687 .n(n)
1688 .k(k)
1689 .sparsity(0.5f)
1690 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1691 }
1692 }
1693 }
1694
1695 TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, zero_weights) {
1696 TEST_REQUIRES_ARM_NEON_FMA;
1697 for (uint32_t n = 1; n < 10; n += 2) {
1698 for (size_t k = 1; k <= 5; k += 2) {
1699 SpMMMicrokernelTester()
1700 .mr(8)
1701 .nr(1)
1702 .m(16)
1703 .n(n)
1704 .k(k)
1705 .sparsity(1.0f)
1706 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_pipelined);
1707 }
1708 }
1709 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001710#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001711
1712
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001713#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001714 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, k_eq_2) {
1715 TEST_REQUIRES_ARM_NEON_FMA;
1716 SpMMMicrokernelTester()
1717 .mr(8)
1718 .nr(1)
1719 .m(8)
1720 .n(1)
1721 .k(2)
1722 .sparsity(0.0f)
1723 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1724 }
1725
1726 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, k_lt_2) {
1727 TEST_REQUIRES_ARM_NEON_FMA;
1728 for (size_t k = 1; k < 2; k++) {
1729 SpMMMicrokernelTester()
1730 .mr(8)
1731 .nr(1)
1732 .m(8)
1733 .n(1)
1734 .k(k)
1735 .sparsity(0.0f)
1736 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1737 }
1738 }
1739
1740 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, k_gt_2) {
1741 TEST_REQUIRES_ARM_NEON_FMA;
1742 for (size_t k = 3; k < 4; k++) {
1743 SpMMMicrokernelTester()
1744 .mr(8)
1745 .nr(1)
1746 .m(8)
1747 .n(1)
1748 .k(k)
1749 .sparsity(0.0f)
1750 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1751 }
1752 }
1753
1754 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, k_div_2) {
1755 TEST_REQUIRES_ARM_NEON_FMA;
1756 for (size_t k = 4; k <= 20; k += 2) {
1757 SpMMMicrokernelTester()
1758 .mr(8)
1759 .nr(1)
1760 .m(8)
1761 .n(1)
1762 .k(k)
1763 .sparsity(0.0f)
1764 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1765 }
1766 }
1767
1768 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, n_gt_1) {
1769 TEST_REQUIRES_ARM_NEON_FMA;
1770 for (uint32_t n = 2; n < 10; n++) {
1771 for (size_t k = 1; k <= 10; k += 3) {
1772 SpMMMicrokernelTester()
1773 .mr(8)
1774 .nr(1)
1775 .m(8)
1776 .n(n)
1777 .k(k)
1778 .sparsity(0.0f)
1779 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1780 }
1781 }
1782 }
1783
1784 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, m_lt_8) {
1785 TEST_REQUIRES_ARM_NEON_FMA;
1786 for (uint32_t m = 1; m < 8; m++) {
1787 for (uint32_t n = 1; n < 10; n += 2) {
1788 for (size_t k = 1; k <= 10; k += 3) {
1789 SpMMMicrokernelTester()
1790 .mr(8)
1791 .nr(1)
1792 .m(m)
1793 .n(n)
1794 .k(k)
1795 .sparsity(0.0f)
1796 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1797 }
1798 }
1799 }
1800 }
1801
1802 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, m_div_8) {
1803 TEST_REQUIRES_ARM_NEON_FMA;
1804 for (uint32_t m = 16; m <= 24; m += 8) {
1805 for (uint32_t n = 1; n < 10; n += 2) {
1806 for (size_t k = 1; k <= 10; k += 3) {
1807 SpMMMicrokernelTester()
1808 .mr(8)
1809 .nr(1)
1810 .m(m)
1811 .n(n)
1812 .k(k)
1813 .sparsity(0.0f)
1814 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1815 }
1816 }
1817 }
1818 }
1819
1820 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, m_gt_8) {
1821 TEST_REQUIRES_ARM_NEON_FMA;
1822 for (uint32_t m = 9; m < 16; m++) {
1823 for (uint32_t n = 1; n < 10; n += 2) {
1824 for (size_t k = 1; k <= 10; k += 3) {
1825 SpMMMicrokernelTester()
1826 .mr(8)
1827 .nr(1)
1828 .m(m)
1829 .n(n)
1830 .k(k)
1831 .sparsity(0.0f)
1832 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1833 }
1834 }
1835 }
1836 }
1837
1838 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, qmin) {
1839 TEST_REQUIRES_ARM_NEON_FMA;
1840 for (uint32_t n = 1; n < 10; n += 2) {
1841 for (size_t k = 1; k <= 10; k += 3) {
1842 SpMMMicrokernelTester()
1843 .mr(8)
1844 .nr(1)
1845 .m(16)
1846 .n(n)
1847 .k(k)
1848 .sparsity(0.0f)
1849 .qmin(128)
1850 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1851 }
1852 }
1853 }
1854
1855 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, qmax) {
1856 TEST_REQUIRES_ARM_NEON_FMA;
1857 for (uint32_t n = 1; n < 10; n += 2) {
1858 for (size_t k = 1; k <= 10; k += 3) {
1859 SpMMMicrokernelTester()
1860 .mr(8)
1861 .nr(1)
1862 .m(16)
1863 .n(n)
1864 .k(k)
1865 .sparsity(0.0f)
1866 .qmax(128)
1867 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1868 }
1869 }
1870 }
1871
1872 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, half_sparse) {
1873 TEST_REQUIRES_ARM_NEON_FMA;
1874 for (uint32_t n = 1; n < 10; n += 2) {
1875 for (size_t k = 1; k <= 10; k += 3) {
1876 SpMMMicrokernelTester()
1877 .mr(8)
1878 .nr(1)
1879 .m(16)
1880 .n(n)
1881 .k(k)
1882 .sparsity(0.5f)
1883 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1884 }
1885 }
1886 }
1887
1888 TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, zero_weights) {
1889 TEST_REQUIRES_ARM_NEON_FMA;
1890 for (uint32_t n = 1; n < 10; n += 2) {
1891 for (size_t k = 1; k <= 10; k += 3) {
1892 SpMMMicrokernelTester()
1893 .mr(8)
1894 .nr(1)
1895 .m(16)
1896 .n(n)
1897 .k(k)
1898 .sparsity(1.0f)
1899 .Test(xnn_f32_spmm_ukernel_8x1__neonfma_unroll2);
1900 }
1901 }
1902 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001903#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001904
1905
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001906#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001907 TEST(F32_SPMM_12X1__NEONFMA, k_eq_1) {
1908 TEST_REQUIRES_ARM_NEON_FMA;
1909 SpMMMicrokernelTester()
1910 .mr(12)
1911 .nr(1)
1912 .m(12)
1913 .n(1)
1914 .k(1)
1915 .sparsity(0.0f)
1916 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
1917 }
1918
1919 TEST(F32_SPMM_12X1__NEONFMA, k_gt_1) {
1920 TEST_REQUIRES_ARM_NEON_FMA;
1921 for (size_t k = 2; k < 10; k++) {
1922 SpMMMicrokernelTester()
1923 .mr(12)
1924 .nr(1)
1925 .m(12)
1926 .n(1)
1927 .k(k)
1928 .sparsity(0.0f)
1929 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
1930 }
1931 }
1932
1933 TEST(F32_SPMM_12X1__NEONFMA, n_gt_1) {
1934 TEST_REQUIRES_ARM_NEON_FMA;
1935 for (uint32_t n = 2; n < 10; n++) {
1936 for (size_t k = 1; k <= 5; k += 2) {
1937 SpMMMicrokernelTester()
1938 .mr(12)
1939 .nr(1)
1940 .m(12)
1941 .n(n)
1942 .k(k)
1943 .sparsity(0.0f)
1944 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
1945 }
1946 }
1947 }
1948
1949 TEST(F32_SPMM_12X1__NEONFMA, m_lt_12) {
1950 TEST_REQUIRES_ARM_NEON_FMA;
1951 for (uint32_t m = 1; m < 12; m++) {
1952 for (uint32_t n = 1; n < 10; n += 2) {
1953 for (size_t k = 1; k <= 5; k += 2) {
1954 SpMMMicrokernelTester()
1955 .mr(12)
1956 .nr(1)
1957 .m(m)
1958 .n(n)
1959 .k(k)
1960 .sparsity(0.0f)
1961 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
1962 }
1963 }
1964 }
1965 }
1966
1967 TEST(F32_SPMM_12X1__NEONFMA, m_div_12) {
1968 TEST_REQUIRES_ARM_NEON_FMA;
1969 for (uint32_t m = 24; m <= 36; m += 12) {
1970 for (uint32_t n = 1; n < 10; n += 2) {
1971 for (size_t k = 1; k <= 5; k += 2) {
1972 SpMMMicrokernelTester()
1973 .mr(12)
1974 .nr(1)
1975 .m(m)
1976 .n(n)
1977 .k(k)
1978 .sparsity(0.0f)
1979 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
1980 }
1981 }
1982 }
1983 }
1984
1985 TEST(F32_SPMM_12X1__NEONFMA, m_gt_12) {
1986 TEST_REQUIRES_ARM_NEON_FMA;
1987 for (uint32_t m = 13; m < 24; m++) {
1988 for (uint32_t n = 1; n < 10; n += 2) {
1989 for (size_t k = 1; k <= 5; k += 2) {
1990 SpMMMicrokernelTester()
1991 .mr(12)
1992 .nr(1)
1993 .m(m)
1994 .n(n)
1995 .k(k)
1996 .sparsity(0.0f)
1997 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
1998 }
1999 }
2000 }
2001 }
2002
2003 TEST(F32_SPMM_12X1__NEONFMA, qmin) {
2004 TEST_REQUIRES_ARM_NEON_FMA;
2005 for (uint32_t n = 1; n < 10; n += 2) {
2006 for (size_t k = 1; k <= 5; k += 2) {
2007 SpMMMicrokernelTester()
2008 .mr(12)
2009 .nr(1)
2010 .m(24)
2011 .n(n)
2012 .k(k)
2013 .sparsity(0.0f)
2014 .qmin(128)
2015 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
2016 }
2017 }
2018 }
2019
2020 TEST(F32_SPMM_12X1__NEONFMA, qmax) {
2021 TEST_REQUIRES_ARM_NEON_FMA;
2022 for (uint32_t n = 1; n < 10; n += 2) {
2023 for (size_t k = 1; k <= 5; k += 2) {
2024 SpMMMicrokernelTester()
2025 .mr(12)
2026 .nr(1)
2027 .m(24)
2028 .n(n)
2029 .k(k)
2030 .sparsity(0.0f)
2031 .qmax(128)
2032 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
2033 }
2034 }
2035 }
2036
2037 TEST(F32_SPMM_12X1__NEONFMA, half_sparse) {
2038 TEST_REQUIRES_ARM_NEON_FMA;
2039 for (uint32_t n = 1; n < 10; n += 2) {
2040 for (size_t k = 1; k <= 5; k += 2) {
2041 SpMMMicrokernelTester()
2042 .mr(12)
2043 .nr(1)
2044 .m(24)
2045 .n(n)
2046 .k(k)
2047 .sparsity(0.5f)
2048 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
2049 }
2050 }
2051 }
2052
2053 TEST(F32_SPMM_12X1__NEONFMA, zero_weights) {
2054 TEST_REQUIRES_ARM_NEON_FMA;
2055 for (uint32_t n = 1; n < 10; n += 2) {
2056 for (size_t k = 1; k <= 5; k += 2) {
2057 SpMMMicrokernelTester()
2058 .mr(12)
2059 .nr(1)
2060 .m(24)
2061 .n(n)
2062 .k(k)
2063 .sparsity(1.0f)
2064 .Test(xnn_f32_spmm_ukernel_12x1__neonfma);
2065 }
2066 }
2067 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002068#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002069
2070
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002071#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002072 TEST(F32_SPMM_12X2__NEONFMA, k_eq_1) {
2073 TEST_REQUIRES_ARM_NEON_FMA;
2074 SpMMMicrokernelTester()
2075 .mr(12)
2076 .nr(2)
2077 .m(12)
2078 .n(2)
2079 .k(1)
2080 .sparsity(0.0f)
2081 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2082 }
2083
2084 TEST(F32_SPMM_12X2__NEONFMA, k_eq_1_subtile) {
2085 TEST_REQUIRES_ARM_NEON_FMA;
2086 for (uint32_t n = 1; n <= 2; n++) {
2087 SpMMMicrokernelTester()
2088 .mr(12)
2089 .nr(2)
2090 .m(12)
2091 .n(n)
2092 .k(1)
2093 .sparsity(0.0f)
2094 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2095 }
2096 }
2097
2098 TEST(F32_SPMM_12X2__NEONFMA, k_gt_1) {
2099 TEST_REQUIRES_ARM_NEON_FMA;
2100 for (size_t k = 2; k < 10; k++) {
2101 SpMMMicrokernelTester()
2102 .mr(12)
2103 .nr(2)
2104 .m(12)
2105 .n(2)
2106 .k(k)
2107 .sparsity(0.0f)
2108 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2109 }
2110 }
2111
2112 TEST(F32_SPMM_12X2__NEONFMA, k_gt_1_subtile) {
2113 TEST_REQUIRES_ARM_NEON_FMA;
2114 for (size_t k = 2; k < 10; k++) {
2115 for (uint32_t n = 1; n <= 2; n++) {
2116 SpMMMicrokernelTester()
2117 .mr(12)
2118 .nr(2)
2119 .m(12)
2120 .n(n)
2121 .k(k)
2122 .sparsity(0.0f)
2123 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2124 }
2125 }
2126 }
2127
2128 TEST(F32_SPMM_12X2__NEONFMA, n_gt_2) {
2129 TEST_REQUIRES_ARM_NEON_FMA;
2130 for (uint32_t n = 3; n < 10; n++) {
2131 for (size_t k = 1; k <= 5; k += 2) {
2132 SpMMMicrokernelTester()
2133 .mr(12)
2134 .nr(2)
2135 .m(12)
2136 .n(n)
2137 .k(k)
2138 .sparsity(0.0f)
2139 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2140 }
2141 }
2142 }
2143
2144 TEST(F32_SPMM_12X2__NEONFMA, n_div_2) {
2145 TEST_REQUIRES_ARM_NEON_FMA;
2146 for (uint32_t n = 4; n <= 6; n += 2) {
2147 for (size_t k = 1; k <= 5; k += 2) {
2148 SpMMMicrokernelTester()
2149 .mr(12)
2150 .nr(2)
2151 .m(12)
2152 .n(n)
2153 .k(k)
2154 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2155 }
2156 }
2157 }
2158
2159 TEST(F32_SPMM_12X2__NEONFMA, m_lt_12) {
2160 TEST_REQUIRES_ARM_NEON_FMA;
2161 for (uint32_t m = 1; m < 12; m++) {
2162 for (uint32_t n = 1; n < 10; n += 3) {
2163 for (size_t k = 1; k <= 5; k += 2) {
2164 SpMMMicrokernelTester()
2165 .mr(12)
2166 .nr(2)
2167 .m(m)
2168 .n(n)
2169 .k(k)
2170 .sparsity(0.0f)
2171 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2172 }
2173 }
2174 }
2175 }
2176
2177 TEST(F32_SPMM_12X2__NEONFMA, m_div_12) {
2178 TEST_REQUIRES_ARM_NEON_FMA;
2179 for (uint32_t m = 24; m <= 36; m += 12) {
2180 for (uint32_t n = 1; n < 10; n += 3) {
2181 for (size_t k = 1; k <= 5; k += 2) {
2182 SpMMMicrokernelTester()
2183 .mr(12)
2184 .nr(2)
2185 .m(m)
2186 .n(n)
2187 .k(k)
2188 .sparsity(0.0f)
2189 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2190 }
2191 }
2192 }
2193 }
2194
2195 TEST(F32_SPMM_12X2__NEONFMA, m_gt_12) {
2196 TEST_REQUIRES_ARM_NEON_FMA;
2197 for (uint32_t m = 13; m < 24; m++) {
2198 for (uint32_t n = 1; n < 10; n += 3) {
2199 for (size_t k = 1; k <= 5; k += 2) {
2200 SpMMMicrokernelTester()
2201 .mr(12)
2202 .nr(2)
2203 .m(m)
2204 .n(n)
2205 .k(k)
2206 .sparsity(0.0f)
2207 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2208 }
2209 }
2210 }
2211 }
2212
2213 TEST(F32_SPMM_12X2__NEONFMA, qmin) {
2214 TEST_REQUIRES_ARM_NEON_FMA;
2215 for (uint32_t n = 1; n < 10; n += 3) {
2216 for (size_t k = 1; k <= 5; k += 2) {
2217 SpMMMicrokernelTester()
2218 .mr(12)
2219 .nr(2)
2220 .m(24)
2221 .n(n)
2222 .k(k)
2223 .sparsity(0.0f)
2224 .qmin(128)
2225 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2226 }
2227 }
2228 }
2229
2230 TEST(F32_SPMM_12X2__NEONFMA, qmax) {
2231 TEST_REQUIRES_ARM_NEON_FMA;
2232 for (uint32_t n = 1; n < 10; n += 3) {
2233 for (size_t k = 1; k <= 5; k += 2) {
2234 SpMMMicrokernelTester()
2235 .mr(12)
2236 .nr(2)
2237 .m(24)
2238 .n(n)
2239 .k(k)
2240 .sparsity(0.0f)
2241 .qmax(128)
2242 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2243 }
2244 }
2245 }
2246
2247 TEST(F32_SPMM_12X2__NEONFMA, half_sparse) {
2248 TEST_REQUIRES_ARM_NEON_FMA;
2249 for (uint32_t n = 1; n < 10; n += 3) {
2250 for (size_t k = 1; k <= 5; k += 2) {
2251 SpMMMicrokernelTester()
2252 .mr(12)
2253 .nr(2)
2254 .m(24)
2255 .n(n)
2256 .k(k)
2257 .sparsity(0.5f)
2258 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2259 }
2260 }
2261 }
2262
2263 TEST(F32_SPMM_12X2__NEONFMA, zero_weights) {
2264 TEST_REQUIRES_ARM_NEON_FMA;
2265 for (uint32_t n = 1; n < 10; n += 3) {
2266 for (size_t k = 1; k <= 5; k += 2) {
2267 SpMMMicrokernelTester()
2268 .mr(12)
2269 .nr(2)
2270 .m(24)
2271 .n(n)
2272 .k(k)
2273 .sparsity(1.0f)
2274 .Test(xnn_f32_spmm_ukernel_12x2__neonfma);
2275 }
2276 }
2277 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002278#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002279
2280
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002281#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002282 TEST(F32_SPMM_12X4__NEONFMA, k_eq_1) {
2283 TEST_REQUIRES_ARM_NEON_FMA;
2284 SpMMMicrokernelTester()
2285 .mr(12)
2286 .nr(4)
2287 .m(12)
2288 .n(4)
2289 .k(1)
2290 .sparsity(0.0f)
2291 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2292 }
2293
2294 TEST(F32_SPMM_12X4__NEONFMA, k_eq_1_subtile) {
2295 TEST_REQUIRES_ARM_NEON_FMA;
2296 for (uint32_t n = 1; n <= 4; n++) {
2297 SpMMMicrokernelTester()
2298 .mr(12)
2299 .nr(4)
2300 .m(12)
2301 .n(n)
2302 .k(1)
2303 .sparsity(0.0f)
2304 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2305 }
2306 }
2307
2308 TEST(F32_SPMM_12X4__NEONFMA, k_gt_1) {
2309 TEST_REQUIRES_ARM_NEON_FMA;
2310 for (size_t k = 2; k < 10; k++) {
2311 SpMMMicrokernelTester()
2312 .mr(12)
2313 .nr(4)
2314 .m(12)
2315 .n(4)
2316 .k(k)
2317 .sparsity(0.0f)
2318 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2319 }
2320 }
2321
2322 TEST(F32_SPMM_12X4__NEONFMA, k_gt_1_subtile) {
2323 TEST_REQUIRES_ARM_NEON_FMA;
2324 for (size_t k = 2; k < 10; k++) {
2325 for (uint32_t n = 1; n <= 4; n++) {
2326 SpMMMicrokernelTester()
2327 .mr(12)
2328 .nr(4)
2329 .m(12)
2330 .n(n)
2331 .k(k)
2332 .sparsity(0.0f)
2333 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2334 }
2335 }
2336 }
2337
2338 TEST(F32_SPMM_12X4__NEONFMA, n_gt_4) {
2339 TEST_REQUIRES_ARM_NEON_FMA;
2340 for (uint32_t n = 5; n < 10; n++) {
2341 for (size_t k = 1; k <= 5; k += 2) {
2342 SpMMMicrokernelTester()
2343 .mr(12)
2344 .nr(4)
2345 .m(12)
2346 .n(n)
2347 .k(k)
2348 .sparsity(0.0f)
2349 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2350 }
2351 }
2352 }
2353
2354 TEST(F32_SPMM_12X4__NEONFMA, n_div_4) {
2355 TEST_REQUIRES_ARM_NEON_FMA;
2356 for (uint32_t n = 8; n <= 12; n += 4) {
2357 for (size_t k = 1; k <= 5; k += 2) {
2358 SpMMMicrokernelTester()
2359 .mr(12)
2360 .nr(4)
2361 .m(12)
2362 .n(n)
2363 .k(k)
2364 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2365 }
2366 }
2367 }
2368
2369 TEST(F32_SPMM_12X4__NEONFMA, m_lt_12) {
2370 TEST_REQUIRES_ARM_NEON_FMA;
2371 for (uint32_t m = 1; m < 12; m++) {
2372 for (uint32_t n = 1; n < 20; n += 5) {
2373 for (size_t k = 1; k <= 5; k += 2) {
2374 SpMMMicrokernelTester()
2375 .mr(12)
2376 .nr(4)
2377 .m(m)
2378 .n(n)
2379 .k(k)
2380 .sparsity(0.0f)
2381 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2382 }
2383 }
2384 }
2385 }
2386
2387 TEST(F32_SPMM_12X4__NEONFMA, m_div_12) {
2388 TEST_REQUIRES_ARM_NEON_FMA;
2389 for (uint32_t m = 24; m <= 36; m += 12) {
2390 for (uint32_t n = 1; n < 20; n += 5) {
2391 for (size_t k = 1; k <= 5; k += 2) {
2392 SpMMMicrokernelTester()
2393 .mr(12)
2394 .nr(4)
2395 .m(m)
2396 .n(n)
2397 .k(k)
2398 .sparsity(0.0f)
2399 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2400 }
2401 }
2402 }
2403 }
2404
2405 TEST(F32_SPMM_12X4__NEONFMA, m_gt_12) {
2406 TEST_REQUIRES_ARM_NEON_FMA;
2407 for (uint32_t m = 13; m < 24; m++) {
2408 for (uint32_t n = 1; n < 20; n += 5) {
2409 for (size_t k = 1; k <= 5; k += 2) {
2410 SpMMMicrokernelTester()
2411 .mr(12)
2412 .nr(4)
2413 .m(m)
2414 .n(n)
2415 .k(k)
2416 .sparsity(0.0f)
2417 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2418 }
2419 }
2420 }
2421 }
2422
2423 TEST(F32_SPMM_12X4__NEONFMA, qmin) {
2424 TEST_REQUIRES_ARM_NEON_FMA;
2425 for (uint32_t n = 1; n < 20; n += 5) {
2426 for (size_t k = 1; k <= 5; k += 2) {
2427 SpMMMicrokernelTester()
2428 .mr(12)
2429 .nr(4)
2430 .m(24)
2431 .n(n)
2432 .k(k)
2433 .sparsity(0.0f)
2434 .qmin(128)
2435 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2436 }
2437 }
2438 }
2439
2440 TEST(F32_SPMM_12X4__NEONFMA, qmax) {
2441 TEST_REQUIRES_ARM_NEON_FMA;
2442 for (uint32_t n = 1; n < 20; n += 5) {
2443 for (size_t k = 1; k <= 5; k += 2) {
2444 SpMMMicrokernelTester()
2445 .mr(12)
2446 .nr(4)
2447 .m(24)
2448 .n(n)
2449 .k(k)
2450 .sparsity(0.0f)
2451 .qmax(128)
2452 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2453 }
2454 }
2455 }
2456
2457 TEST(F32_SPMM_12X4__NEONFMA, half_sparse) {
2458 TEST_REQUIRES_ARM_NEON_FMA;
2459 for (uint32_t n = 1; n < 20; n += 5) {
2460 for (size_t k = 1; k <= 5; k += 2) {
2461 SpMMMicrokernelTester()
2462 .mr(12)
2463 .nr(4)
2464 .m(24)
2465 .n(n)
2466 .k(k)
2467 .sparsity(0.5f)
2468 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2469 }
2470 }
2471 }
2472
2473 TEST(F32_SPMM_12X4__NEONFMA, zero_weights) {
2474 TEST_REQUIRES_ARM_NEON_FMA;
2475 for (uint32_t n = 1; n < 20; n += 5) {
2476 for (size_t k = 1; k <= 5; k += 2) {
2477 SpMMMicrokernelTester()
2478 .mr(12)
2479 .nr(4)
2480 .m(24)
2481 .n(n)
2482 .k(k)
2483 .sparsity(1.0f)
2484 .Test(xnn_f32_spmm_ukernel_12x4__neonfma);
2485 }
2486 }
2487 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002488#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002489
2490
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002491#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002492 TEST(F32_SPMM_16X1__NEONFMA, k_eq_1) {
2493 TEST_REQUIRES_ARM_NEON_FMA;
2494 SpMMMicrokernelTester()
2495 .mr(16)
2496 .nr(1)
2497 .m(16)
2498 .n(1)
2499 .k(1)
2500 .sparsity(0.0f)
2501 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2502 }
2503
2504 TEST(F32_SPMM_16X1__NEONFMA, k_gt_1) {
2505 TEST_REQUIRES_ARM_NEON_FMA;
2506 for (size_t k = 2; k < 10; k++) {
2507 SpMMMicrokernelTester()
2508 .mr(16)
2509 .nr(1)
2510 .m(16)
2511 .n(1)
2512 .k(k)
2513 .sparsity(0.0f)
2514 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2515 }
2516 }
2517
2518 TEST(F32_SPMM_16X1__NEONFMA, n_gt_1) {
2519 TEST_REQUIRES_ARM_NEON_FMA;
2520 for (uint32_t n = 2; n < 10; n++) {
2521 for (size_t k = 1; k <= 5; k += 2) {
2522 SpMMMicrokernelTester()
2523 .mr(16)
2524 .nr(1)
2525 .m(16)
2526 .n(n)
2527 .k(k)
2528 .sparsity(0.0f)
2529 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2530 }
2531 }
2532 }
2533
2534 TEST(F32_SPMM_16X1__NEONFMA, m_lt_16) {
2535 TEST_REQUIRES_ARM_NEON_FMA;
2536 for (uint32_t m = 1; m < 16; m++) {
2537 for (uint32_t n = 1; n < 10; n += 2) {
2538 for (size_t k = 1; k <= 5; k += 2) {
2539 SpMMMicrokernelTester()
2540 .mr(16)
2541 .nr(1)
2542 .m(m)
2543 .n(n)
2544 .k(k)
2545 .sparsity(0.0f)
2546 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2547 }
2548 }
2549 }
2550 }
2551
2552 TEST(F32_SPMM_16X1__NEONFMA, m_div_16) {
2553 TEST_REQUIRES_ARM_NEON_FMA;
2554 for (uint32_t m = 32; m <= 48; m += 16) {
2555 for (uint32_t n = 1; n < 10; n += 2) {
2556 for (size_t k = 1; k <= 5; k += 2) {
2557 SpMMMicrokernelTester()
2558 .mr(16)
2559 .nr(1)
2560 .m(m)
2561 .n(n)
2562 .k(k)
2563 .sparsity(0.0f)
2564 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2565 }
2566 }
2567 }
2568 }
2569
2570 TEST(F32_SPMM_16X1__NEONFMA, m_gt_16) {
2571 TEST_REQUIRES_ARM_NEON_FMA;
2572 for (uint32_t m = 17; m < 32; m++) {
2573 for (uint32_t n = 1; n < 10; n += 2) {
2574 for (size_t k = 1; k <= 5; k += 2) {
2575 SpMMMicrokernelTester()
2576 .mr(16)
2577 .nr(1)
2578 .m(m)
2579 .n(n)
2580 .k(k)
2581 .sparsity(0.0f)
2582 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2583 }
2584 }
2585 }
2586 }
2587
2588 TEST(F32_SPMM_16X1__NEONFMA, qmin) {
2589 TEST_REQUIRES_ARM_NEON_FMA;
2590 for (uint32_t n = 1; n < 10; n += 2) {
2591 for (size_t k = 1; k <= 5; k += 2) {
2592 SpMMMicrokernelTester()
2593 .mr(16)
2594 .nr(1)
2595 .m(32)
2596 .n(n)
2597 .k(k)
2598 .sparsity(0.0f)
2599 .qmin(128)
2600 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2601 }
2602 }
2603 }
2604
2605 TEST(F32_SPMM_16X1__NEONFMA, qmax) {
2606 TEST_REQUIRES_ARM_NEON_FMA;
2607 for (uint32_t n = 1; n < 10; n += 2) {
2608 for (size_t k = 1; k <= 5; k += 2) {
2609 SpMMMicrokernelTester()
2610 .mr(16)
2611 .nr(1)
2612 .m(32)
2613 .n(n)
2614 .k(k)
2615 .sparsity(0.0f)
2616 .qmax(128)
2617 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2618 }
2619 }
2620 }
2621
2622 TEST(F32_SPMM_16X1__NEONFMA, half_sparse) {
2623 TEST_REQUIRES_ARM_NEON_FMA;
2624 for (uint32_t n = 1; n < 10; n += 2) {
2625 for (size_t k = 1; k <= 5; k += 2) {
2626 SpMMMicrokernelTester()
2627 .mr(16)
2628 .nr(1)
2629 .m(32)
2630 .n(n)
2631 .k(k)
2632 .sparsity(0.5f)
2633 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2634 }
2635 }
2636 }
2637
2638 TEST(F32_SPMM_16X1__NEONFMA, zero_weights) {
2639 TEST_REQUIRES_ARM_NEON_FMA;
2640 for (uint32_t n = 1; n < 10; n += 2) {
2641 for (size_t k = 1; k <= 5; k += 2) {
2642 SpMMMicrokernelTester()
2643 .mr(16)
2644 .nr(1)
2645 .m(32)
2646 .n(n)
2647 .k(k)
2648 .sparsity(1.0f)
2649 .Test(xnn_f32_spmm_ukernel_16x1__neonfma);
2650 }
2651 }
2652 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002653#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002654
2655
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002656#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002657 TEST(F32_SPMM_16X2__NEONFMA, k_eq_1) {
2658 TEST_REQUIRES_ARM_NEON_FMA;
2659 SpMMMicrokernelTester()
2660 .mr(16)
2661 .nr(2)
2662 .m(16)
2663 .n(2)
2664 .k(1)
2665 .sparsity(0.0f)
2666 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2667 }
2668
2669 TEST(F32_SPMM_16X2__NEONFMA, k_eq_1_subtile) {
2670 TEST_REQUIRES_ARM_NEON_FMA;
2671 for (uint32_t n = 1; n <= 2; n++) {
2672 SpMMMicrokernelTester()
2673 .mr(16)
2674 .nr(2)
2675 .m(16)
2676 .n(n)
2677 .k(1)
2678 .sparsity(0.0f)
2679 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2680 }
2681 }
2682
2683 TEST(F32_SPMM_16X2__NEONFMA, k_gt_1) {
2684 TEST_REQUIRES_ARM_NEON_FMA;
2685 for (size_t k = 2; k < 10; k++) {
2686 SpMMMicrokernelTester()
2687 .mr(16)
2688 .nr(2)
2689 .m(16)
2690 .n(2)
2691 .k(k)
2692 .sparsity(0.0f)
2693 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2694 }
2695 }
2696
2697 TEST(F32_SPMM_16X2__NEONFMA, k_gt_1_subtile) {
2698 TEST_REQUIRES_ARM_NEON_FMA;
2699 for (size_t k = 2; k < 10; k++) {
2700 for (uint32_t n = 1; n <= 2; n++) {
2701 SpMMMicrokernelTester()
2702 .mr(16)
2703 .nr(2)
2704 .m(16)
2705 .n(n)
2706 .k(k)
2707 .sparsity(0.0f)
2708 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2709 }
2710 }
2711 }
2712
2713 TEST(F32_SPMM_16X2__NEONFMA, n_gt_2) {
2714 TEST_REQUIRES_ARM_NEON_FMA;
2715 for (uint32_t n = 3; n < 10; n++) {
2716 for (size_t k = 1; k <= 5; k += 2) {
2717 SpMMMicrokernelTester()
2718 .mr(16)
2719 .nr(2)
2720 .m(16)
2721 .n(n)
2722 .k(k)
2723 .sparsity(0.0f)
2724 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2725 }
2726 }
2727 }
2728
2729 TEST(F32_SPMM_16X2__NEONFMA, n_div_2) {
2730 TEST_REQUIRES_ARM_NEON_FMA;
2731 for (uint32_t n = 4; n <= 6; n += 2) {
2732 for (size_t k = 1; k <= 5; k += 2) {
2733 SpMMMicrokernelTester()
2734 .mr(16)
2735 .nr(2)
2736 .m(16)
2737 .n(n)
2738 .k(k)
2739 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2740 }
2741 }
2742 }
2743
2744 TEST(F32_SPMM_16X2__NEONFMA, m_lt_16) {
2745 TEST_REQUIRES_ARM_NEON_FMA;
2746 for (uint32_t m = 1; m < 16; m++) {
2747 for (uint32_t n = 1; n < 10; n += 3) {
2748 for (size_t k = 1; k <= 5; k += 2) {
2749 SpMMMicrokernelTester()
2750 .mr(16)
2751 .nr(2)
2752 .m(m)
2753 .n(n)
2754 .k(k)
2755 .sparsity(0.0f)
2756 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2757 }
2758 }
2759 }
2760 }
2761
2762 TEST(F32_SPMM_16X2__NEONFMA, m_div_16) {
2763 TEST_REQUIRES_ARM_NEON_FMA;
2764 for (uint32_t m = 32; m <= 48; m += 16) {
2765 for (uint32_t n = 1; n < 10; n += 3) {
2766 for (size_t k = 1; k <= 5; k += 2) {
2767 SpMMMicrokernelTester()
2768 .mr(16)
2769 .nr(2)
2770 .m(m)
2771 .n(n)
2772 .k(k)
2773 .sparsity(0.0f)
2774 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2775 }
2776 }
2777 }
2778 }
2779
2780 TEST(F32_SPMM_16X2__NEONFMA, m_gt_16) {
2781 TEST_REQUIRES_ARM_NEON_FMA;
2782 for (uint32_t m = 17; m < 32; m++) {
2783 for (uint32_t n = 1; n < 10; n += 3) {
2784 for (size_t k = 1; k <= 5; k += 2) {
2785 SpMMMicrokernelTester()
2786 .mr(16)
2787 .nr(2)
2788 .m(m)
2789 .n(n)
2790 .k(k)
2791 .sparsity(0.0f)
2792 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2793 }
2794 }
2795 }
2796 }
2797
2798 TEST(F32_SPMM_16X2__NEONFMA, qmin) {
2799 TEST_REQUIRES_ARM_NEON_FMA;
2800 for (uint32_t n = 1; n < 10; n += 3) {
2801 for (size_t k = 1; k <= 5; k += 2) {
2802 SpMMMicrokernelTester()
2803 .mr(16)
2804 .nr(2)
2805 .m(32)
2806 .n(n)
2807 .k(k)
2808 .sparsity(0.0f)
2809 .qmin(128)
2810 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2811 }
2812 }
2813 }
2814
2815 TEST(F32_SPMM_16X2__NEONFMA, qmax) {
2816 TEST_REQUIRES_ARM_NEON_FMA;
2817 for (uint32_t n = 1; n < 10; n += 3) {
2818 for (size_t k = 1; k <= 5; k += 2) {
2819 SpMMMicrokernelTester()
2820 .mr(16)
2821 .nr(2)
2822 .m(32)
2823 .n(n)
2824 .k(k)
2825 .sparsity(0.0f)
2826 .qmax(128)
2827 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2828 }
2829 }
2830 }
2831
2832 TEST(F32_SPMM_16X2__NEONFMA, half_sparse) {
2833 TEST_REQUIRES_ARM_NEON_FMA;
2834 for (uint32_t n = 1; n < 10; n += 3) {
2835 for (size_t k = 1; k <= 5; k += 2) {
2836 SpMMMicrokernelTester()
2837 .mr(16)
2838 .nr(2)
2839 .m(32)
2840 .n(n)
2841 .k(k)
2842 .sparsity(0.5f)
2843 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2844 }
2845 }
2846 }
2847
2848 TEST(F32_SPMM_16X2__NEONFMA, zero_weights) {
2849 TEST_REQUIRES_ARM_NEON_FMA;
2850 for (uint32_t n = 1; n < 10; n += 3) {
2851 for (size_t k = 1; k <= 5; k += 2) {
2852 SpMMMicrokernelTester()
2853 .mr(16)
2854 .nr(2)
2855 .m(32)
2856 .n(n)
2857 .k(k)
2858 .sparsity(1.0f)
2859 .Test(xnn_f32_spmm_ukernel_16x2__neonfma);
2860 }
2861 }
2862 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002863#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002864
2865
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002866#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002867 TEST(F32_SPMM_16X4__NEONFMA, k_eq_1) {
2868 TEST_REQUIRES_ARM_NEON_FMA;
2869 SpMMMicrokernelTester()
2870 .mr(16)
2871 .nr(4)
2872 .m(16)
2873 .n(4)
2874 .k(1)
2875 .sparsity(0.0f)
2876 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
2877 }
2878
2879 TEST(F32_SPMM_16X4__NEONFMA, k_eq_1_subtile) {
2880 TEST_REQUIRES_ARM_NEON_FMA;
2881 for (uint32_t n = 1; n <= 4; n++) {
2882 SpMMMicrokernelTester()
2883 .mr(16)
2884 .nr(4)
2885 .m(16)
2886 .n(n)
2887 .k(1)
2888 .sparsity(0.0f)
2889 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
2890 }
2891 }
2892
2893 TEST(F32_SPMM_16X4__NEONFMA, k_gt_1) {
2894 TEST_REQUIRES_ARM_NEON_FMA;
2895 for (size_t k = 2; k < 10; k++) {
2896 SpMMMicrokernelTester()
2897 .mr(16)
2898 .nr(4)
2899 .m(16)
2900 .n(4)
2901 .k(k)
2902 .sparsity(0.0f)
2903 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
2904 }
2905 }
2906
2907 TEST(F32_SPMM_16X4__NEONFMA, k_gt_1_subtile) {
2908 TEST_REQUIRES_ARM_NEON_FMA;
2909 for (size_t k = 2; k < 10; k++) {
2910 for (uint32_t n = 1; n <= 4; n++) {
2911 SpMMMicrokernelTester()
2912 .mr(16)
2913 .nr(4)
2914 .m(16)
2915 .n(n)
2916 .k(k)
2917 .sparsity(0.0f)
2918 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
2919 }
2920 }
2921 }
2922
2923 TEST(F32_SPMM_16X4__NEONFMA, n_gt_4) {
2924 TEST_REQUIRES_ARM_NEON_FMA;
2925 for (uint32_t n = 5; n < 10; n++) {
2926 for (size_t k = 1; k <= 5; k += 2) {
2927 SpMMMicrokernelTester()
2928 .mr(16)
2929 .nr(4)
2930 .m(16)
2931 .n(n)
2932 .k(k)
2933 .sparsity(0.0f)
2934 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
2935 }
2936 }
2937 }
2938
2939 TEST(F32_SPMM_16X4__NEONFMA, n_div_4) {
2940 TEST_REQUIRES_ARM_NEON_FMA;
2941 for (uint32_t n = 8; n <= 12; n += 4) {
2942 for (size_t k = 1; k <= 5; k += 2) {
2943 SpMMMicrokernelTester()
2944 .mr(16)
2945 .nr(4)
2946 .m(16)
2947 .n(n)
2948 .k(k)
2949 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
2950 }
2951 }
2952 }
2953
2954 TEST(F32_SPMM_16X4__NEONFMA, m_lt_16) {
2955 TEST_REQUIRES_ARM_NEON_FMA;
2956 for (uint32_t m = 1; m < 16; m++) {
2957 for (uint32_t n = 1; n < 20; n += 5) {
2958 for (size_t k = 1; k <= 5; k += 2) {
2959 SpMMMicrokernelTester()
2960 .mr(16)
2961 .nr(4)
2962 .m(m)
2963 .n(n)
2964 .k(k)
2965 .sparsity(0.0f)
2966 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
2967 }
2968 }
2969 }
2970 }
2971
2972 TEST(F32_SPMM_16X4__NEONFMA, m_div_16) {
2973 TEST_REQUIRES_ARM_NEON_FMA;
2974 for (uint32_t m = 32; m <= 48; m += 16) {
2975 for (uint32_t n = 1; n < 20; n += 5) {
2976 for (size_t k = 1; k <= 5; k += 2) {
2977 SpMMMicrokernelTester()
2978 .mr(16)
2979 .nr(4)
2980 .m(m)
2981 .n(n)
2982 .k(k)
2983 .sparsity(0.0f)
2984 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
2985 }
2986 }
2987 }
2988 }
2989
2990 TEST(F32_SPMM_16X4__NEONFMA, m_gt_16) {
2991 TEST_REQUIRES_ARM_NEON_FMA;
2992 for (uint32_t m = 17; m < 32; m++) {
2993 for (uint32_t n = 1; n < 20; n += 5) {
2994 for (size_t k = 1; k <= 5; k += 2) {
2995 SpMMMicrokernelTester()
2996 .mr(16)
2997 .nr(4)
2998 .m(m)
2999 .n(n)
3000 .k(k)
3001 .sparsity(0.0f)
3002 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
3003 }
3004 }
3005 }
3006 }
3007
3008 TEST(F32_SPMM_16X4__NEONFMA, qmin) {
3009 TEST_REQUIRES_ARM_NEON_FMA;
3010 for (uint32_t n = 1; n < 20; n += 5) {
3011 for (size_t k = 1; k <= 5; k += 2) {
3012 SpMMMicrokernelTester()
3013 .mr(16)
3014 .nr(4)
3015 .m(32)
3016 .n(n)
3017 .k(k)
3018 .sparsity(0.0f)
3019 .qmin(128)
3020 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
3021 }
3022 }
3023 }
3024
3025 TEST(F32_SPMM_16X4__NEONFMA, qmax) {
3026 TEST_REQUIRES_ARM_NEON_FMA;
3027 for (uint32_t n = 1; n < 20; n += 5) {
3028 for (size_t k = 1; k <= 5; k += 2) {
3029 SpMMMicrokernelTester()
3030 .mr(16)
3031 .nr(4)
3032 .m(32)
3033 .n(n)
3034 .k(k)
3035 .sparsity(0.0f)
3036 .qmax(128)
3037 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
3038 }
3039 }
3040 }
3041
3042 TEST(F32_SPMM_16X4__NEONFMA, half_sparse) {
3043 TEST_REQUIRES_ARM_NEON_FMA;
3044 for (uint32_t n = 1; n < 20; n += 5) {
3045 for (size_t k = 1; k <= 5; k += 2) {
3046 SpMMMicrokernelTester()
3047 .mr(16)
3048 .nr(4)
3049 .m(32)
3050 .n(n)
3051 .k(k)
3052 .sparsity(0.5f)
3053 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
3054 }
3055 }
3056 }
3057
3058 TEST(F32_SPMM_16X4__NEONFMA, zero_weights) {
3059 TEST_REQUIRES_ARM_NEON_FMA;
3060 for (uint32_t n = 1; n < 20; n += 5) {
3061 for (size_t k = 1; k <= 5; k += 2) {
3062 SpMMMicrokernelTester()
3063 .mr(16)
3064 .nr(4)
3065 .m(32)
3066 .n(n)
3067 .k(k)
3068 .sparsity(1.0f)
3069 .Test(xnn_f32_spmm_ukernel_16x4__neonfma);
3070 }
3071 }
3072 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003073#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003074
3075
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003076#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003077 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, k_eq_1) {
3078 TEST_REQUIRES_ARM_NEON_FMA;
3079 SpMMMicrokernelTester()
3080 .mr(16)
3081 .nr(1)
3082 .m(16)
3083 .n(1)
3084 .k(1)
3085 .sparsity(0.0f)
3086 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3087 }
3088
3089 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, k_gt_1) {
3090 TEST_REQUIRES_ARM_NEON_FMA;
3091 for (size_t k = 2; k < 10; k++) {
3092 SpMMMicrokernelTester()
3093 .mr(16)
3094 .nr(1)
3095 .m(16)
3096 .n(1)
3097 .k(k)
3098 .sparsity(0.0f)
3099 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3100 }
3101 }
3102
3103 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, n_gt_1) {
3104 TEST_REQUIRES_ARM_NEON_FMA;
3105 for (uint32_t n = 2; n < 10; n++) {
3106 for (size_t k = 1; k <= 5; k += 2) {
3107 SpMMMicrokernelTester()
3108 .mr(16)
3109 .nr(1)
3110 .m(16)
3111 .n(n)
3112 .k(k)
3113 .sparsity(0.0f)
3114 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3115 }
3116 }
3117 }
3118
3119 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, m_lt_16) {
3120 TEST_REQUIRES_ARM_NEON_FMA;
3121 for (uint32_t m = 1; m < 16; m++) {
3122 for (uint32_t n = 1; n < 10; n += 2) {
3123 for (size_t k = 1; k <= 5; k += 2) {
3124 SpMMMicrokernelTester()
3125 .mr(16)
3126 .nr(1)
3127 .m(m)
3128 .n(n)
3129 .k(k)
3130 .sparsity(0.0f)
3131 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3132 }
3133 }
3134 }
3135 }
3136
3137 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, m_div_16) {
3138 TEST_REQUIRES_ARM_NEON_FMA;
3139 for (uint32_t m = 32; m <= 48; m += 16) {
3140 for (uint32_t n = 1; n < 10; n += 2) {
3141 for (size_t k = 1; k <= 5; k += 2) {
3142 SpMMMicrokernelTester()
3143 .mr(16)
3144 .nr(1)
3145 .m(m)
3146 .n(n)
3147 .k(k)
3148 .sparsity(0.0f)
3149 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3150 }
3151 }
3152 }
3153 }
3154
3155 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, m_gt_16) {
3156 TEST_REQUIRES_ARM_NEON_FMA;
3157 for (uint32_t m = 17; m < 32; m++) {
3158 for (uint32_t n = 1; n < 10; n += 2) {
3159 for (size_t k = 1; k <= 5; k += 2) {
3160 SpMMMicrokernelTester()
3161 .mr(16)
3162 .nr(1)
3163 .m(m)
3164 .n(n)
3165 .k(k)
3166 .sparsity(0.0f)
3167 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3168 }
3169 }
3170 }
3171 }
3172
3173 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, qmin) {
3174 TEST_REQUIRES_ARM_NEON_FMA;
3175 for (uint32_t n = 1; n < 10; n += 2) {
3176 for (size_t k = 1; k <= 5; k += 2) {
3177 SpMMMicrokernelTester()
3178 .mr(16)
3179 .nr(1)
3180 .m(32)
3181 .n(n)
3182 .k(k)
3183 .sparsity(0.0f)
3184 .qmin(128)
3185 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3186 }
3187 }
3188 }
3189
3190 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, qmax) {
3191 TEST_REQUIRES_ARM_NEON_FMA;
3192 for (uint32_t n = 1; n < 10; n += 2) {
3193 for (size_t k = 1; k <= 5; k += 2) {
3194 SpMMMicrokernelTester()
3195 .mr(16)
3196 .nr(1)
3197 .m(32)
3198 .n(n)
3199 .k(k)
3200 .sparsity(0.0f)
3201 .qmax(128)
3202 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3203 }
3204 }
3205 }
3206
3207 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, half_sparse) {
3208 TEST_REQUIRES_ARM_NEON_FMA;
3209 for (uint32_t n = 1; n < 10; n += 2) {
3210 for (size_t k = 1; k <= 5; k += 2) {
3211 SpMMMicrokernelTester()
3212 .mr(16)
3213 .nr(1)
3214 .m(32)
3215 .n(n)
3216 .k(k)
3217 .sparsity(0.5f)
3218 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3219 }
3220 }
3221 }
3222
3223 TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, zero_weights) {
3224 TEST_REQUIRES_ARM_NEON_FMA;
3225 for (uint32_t n = 1; n < 10; n += 2) {
3226 for (size_t k = 1; k <= 5; k += 2) {
3227 SpMMMicrokernelTester()
3228 .mr(16)
3229 .nr(1)
3230 .m(32)
3231 .n(n)
3232 .k(k)
3233 .sparsity(1.0f)
3234 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_pipelined);
3235 }
3236 }
3237 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003238#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003239
3240
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003241#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003242 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, k_eq_2) {
3243 TEST_REQUIRES_ARM_NEON_FMA;
3244 SpMMMicrokernelTester()
3245 .mr(16)
3246 .nr(1)
3247 .m(16)
3248 .n(1)
3249 .k(2)
3250 .sparsity(0.0f)
3251 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3252 }
3253
3254 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, k_lt_2) {
3255 TEST_REQUIRES_ARM_NEON_FMA;
3256 for (size_t k = 1; k < 2; k++) {
3257 SpMMMicrokernelTester()
3258 .mr(16)
3259 .nr(1)
3260 .m(16)
3261 .n(1)
3262 .k(k)
3263 .sparsity(0.0f)
3264 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3265 }
3266 }
3267
3268 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, k_gt_2) {
3269 TEST_REQUIRES_ARM_NEON_FMA;
3270 for (size_t k = 3; k < 4; k++) {
3271 SpMMMicrokernelTester()
3272 .mr(16)
3273 .nr(1)
3274 .m(16)
3275 .n(1)
3276 .k(k)
3277 .sparsity(0.0f)
3278 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3279 }
3280 }
3281
3282 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, k_div_2) {
3283 TEST_REQUIRES_ARM_NEON_FMA;
3284 for (size_t k = 4; k <= 20; k += 2) {
3285 SpMMMicrokernelTester()
3286 .mr(16)
3287 .nr(1)
3288 .m(16)
3289 .n(1)
3290 .k(k)
3291 .sparsity(0.0f)
3292 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3293 }
3294 }
3295
3296 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, n_gt_1) {
3297 TEST_REQUIRES_ARM_NEON_FMA;
3298 for (uint32_t n = 2; n < 10; n++) {
3299 for (size_t k = 1; k <= 10; k += 3) {
3300 SpMMMicrokernelTester()
3301 .mr(16)
3302 .nr(1)
3303 .m(16)
3304 .n(n)
3305 .k(k)
3306 .sparsity(0.0f)
3307 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3308 }
3309 }
3310 }
3311
3312 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, m_lt_16) {
3313 TEST_REQUIRES_ARM_NEON_FMA;
3314 for (uint32_t m = 1; m < 16; m++) {
3315 for (uint32_t n = 1; n < 10; n += 2) {
3316 for (size_t k = 1; k <= 10; k += 3) {
3317 SpMMMicrokernelTester()
3318 .mr(16)
3319 .nr(1)
3320 .m(m)
3321 .n(n)
3322 .k(k)
3323 .sparsity(0.0f)
3324 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3325 }
3326 }
3327 }
3328 }
3329
3330 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, m_div_16) {
3331 TEST_REQUIRES_ARM_NEON_FMA;
3332 for (uint32_t m = 32; m <= 48; m += 16) {
3333 for (uint32_t n = 1; n < 10; n += 2) {
3334 for (size_t k = 1; k <= 10; k += 3) {
3335 SpMMMicrokernelTester()
3336 .mr(16)
3337 .nr(1)
3338 .m(m)
3339 .n(n)
3340 .k(k)
3341 .sparsity(0.0f)
3342 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3343 }
3344 }
3345 }
3346 }
3347
3348 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, m_gt_16) {
3349 TEST_REQUIRES_ARM_NEON_FMA;
3350 for (uint32_t m = 17; m < 32; m++) {
3351 for (uint32_t n = 1; n < 10; n += 2) {
3352 for (size_t k = 1; k <= 10; k += 3) {
3353 SpMMMicrokernelTester()
3354 .mr(16)
3355 .nr(1)
3356 .m(m)
3357 .n(n)
3358 .k(k)
3359 .sparsity(0.0f)
3360 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3361 }
3362 }
3363 }
3364 }
3365
3366 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, qmin) {
3367 TEST_REQUIRES_ARM_NEON_FMA;
3368 for (uint32_t n = 1; n < 10; n += 2) {
3369 for (size_t k = 1; k <= 10; k += 3) {
3370 SpMMMicrokernelTester()
3371 .mr(16)
3372 .nr(1)
3373 .m(32)
3374 .n(n)
3375 .k(k)
3376 .sparsity(0.0f)
3377 .qmin(128)
3378 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3379 }
3380 }
3381 }
3382
3383 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, qmax) {
3384 TEST_REQUIRES_ARM_NEON_FMA;
3385 for (uint32_t n = 1; n < 10; n += 2) {
3386 for (size_t k = 1; k <= 10; k += 3) {
3387 SpMMMicrokernelTester()
3388 .mr(16)
3389 .nr(1)
3390 .m(32)
3391 .n(n)
3392 .k(k)
3393 .sparsity(0.0f)
3394 .qmax(128)
3395 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3396 }
3397 }
3398 }
3399
3400 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, half_sparse) {
3401 TEST_REQUIRES_ARM_NEON_FMA;
3402 for (uint32_t n = 1; n < 10; n += 2) {
3403 for (size_t k = 1; k <= 10; k += 3) {
3404 SpMMMicrokernelTester()
3405 .mr(16)
3406 .nr(1)
3407 .m(32)
3408 .n(n)
3409 .k(k)
3410 .sparsity(0.5f)
3411 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3412 }
3413 }
3414 }
3415
3416 TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, zero_weights) {
3417 TEST_REQUIRES_ARM_NEON_FMA;
3418 for (uint32_t n = 1; n < 10; n += 2) {
3419 for (size_t k = 1; k <= 10; k += 3) {
3420 SpMMMicrokernelTester()
3421 .mr(16)
3422 .nr(1)
3423 .m(32)
3424 .n(n)
3425 .k(k)
3426 .sparsity(1.0f)
3427 .Test(xnn_f32_spmm_ukernel_16x1__neonfma_unroll2);
3428 }
3429 }
3430 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003431#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003432
3433
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003434#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003435 TEST(F32_SPMM_4X1__SSE, k_eq_1) {
3436 TEST_REQUIRES_X86_SSE;
3437 SpMMMicrokernelTester()
3438 .mr(4)
3439 .nr(1)
3440 .m(4)
3441 .n(1)
3442 .k(1)
3443 .sparsity(0.0f)
3444 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3445 }
3446
3447 TEST(F32_SPMM_4X1__SSE, k_gt_1) {
3448 TEST_REQUIRES_X86_SSE;
3449 for (size_t k = 2; k < 10; k++) {
3450 SpMMMicrokernelTester()
3451 .mr(4)
3452 .nr(1)
3453 .m(4)
3454 .n(1)
3455 .k(k)
3456 .sparsity(0.0f)
3457 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3458 }
3459 }
3460
3461 TEST(F32_SPMM_4X1__SSE, n_gt_1) {
3462 TEST_REQUIRES_X86_SSE;
3463 for (uint32_t n = 2; n < 10; n++) {
3464 for (size_t k = 1; k <= 5; k += 2) {
3465 SpMMMicrokernelTester()
3466 .mr(4)
3467 .nr(1)
3468 .m(4)
3469 .n(n)
3470 .k(k)
3471 .sparsity(0.0f)
3472 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3473 }
3474 }
3475 }
3476
3477 TEST(F32_SPMM_4X1__SSE, m_lt_4) {
3478 TEST_REQUIRES_X86_SSE;
3479 for (uint32_t m = 1; m < 4; m++) {
3480 for (uint32_t n = 1; n < 10; n += 2) {
3481 for (size_t k = 1; k <= 5; k += 2) {
3482 SpMMMicrokernelTester()
3483 .mr(4)
3484 .nr(1)
3485 .m(m)
3486 .n(n)
3487 .k(k)
3488 .sparsity(0.0f)
3489 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3490 }
3491 }
3492 }
3493 }
3494
3495 TEST(F32_SPMM_4X1__SSE, m_div_4) {
3496 TEST_REQUIRES_X86_SSE;
3497 for (uint32_t m = 8; m <= 12; m += 4) {
3498 for (uint32_t n = 1; n < 10; n += 2) {
3499 for (size_t k = 1; k <= 5; k += 2) {
3500 SpMMMicrokernelTester()
3501 .mr(4)
3502 .nr(1)
3503 .m(m)
3504 .n(n)
3505 .k(k)
3506 .sparsity(0.0f)
3507 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3508 }
3509 }
3510 }
3511 }
3512
3513 TEST(F32_SPMM_4X1__SSE, m_gt_4) {
3514 TEST_REQUIRES_X86_SSE;
3515 for (uint32_t m = 5; m < 8; m++) {
3516 for (uint32_t n = 1; n < 10; n += 2) {
3517 for (size_t k = 1; k <= 5; k += 2) {
3518 SpMMMicrokernelTester()
3519 .mr(4)
3520 .nr(1)
3521 .m(m)
3522 .n(n)
3523 .k(k)
3524 .sparsity(0.0f)
3525 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3526 }
3527 }
3528 }
3529 }
3530
3531 TEST(F32_SPMM_4X1__SSE, qmin) {
3532 TEST_REQUIRES_X86_SSE;
3533 for (uint32_t n = 1; n < 10; n += 2) {
3534 for (size_t k = 1; k <= 5; k += 2) {
3535 SpMMMicrokernelTester()
3536 .mr(4)
3537 .nr(1)
3538 .m(8)
3539 .n(n)
3540 .k(k)
3541 .sparsity(0.0f)
3542 .qmin(128)
3543 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3544 }
3545 }
3546 }
3547
3548 TEST(F32_SPMM_4X1__SSE, qmax) {
3549 TEST_REQUIRES_X86_SSE;
3550 for (uint32_t n = 1; n < 10; n += 2) {
3551 for (size_t k = 1; k <= 5; k += 2) {
3552 SpMMMicrokernelTester()
3553 .mr(4)
3554 .nr(1)
3555 .m(8)
3556 .n(n)
3557 .k(k)
3558 .sparsity(0.0f)
3559 .qmax(128)
3560 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3561 }
3562 }
3563 }
3564
3565 TEST(F32_SPMM_4X1__SSE, half_sparse) {
3566 TEST_REQUIRES_X86_SSE;
3567 for (uint32_t n = 1; n < 10; n += 2) {
3568 for (size_t k = 1; k <= 5; k += 2) {
3569 SpMMMicrokernelTester()
3570 .mr(4)
3571 .nr(1)
3572 .m(8)
3573 .n(n)
3574 .k(k)
3575 .sparsity(0.5f)
3576 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3577 }
3578 }
3579 }
3580
3581 TEST(F32_SPMM_4X1__SSE, zero_weights) {
3582 TEST_REQUIRES_X86_SSE;
3583 for (uint32_t n = 1; n < 10; n += 2) {
3584 for (size_t k = 1; k <= 5; k += 2) {
3585 SpMMMicrokernelTester()
3586 .mr(4)
3587 .nr(1)
3588 .m(8)
3589 .n(n)
3590 .k(k)
3591 .sparsity(1.0f)
3592 .Test(xnn_f32_spmm_ukernel_4x1__sse);
3593 }
3594 }
3595 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003596#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003597
3598
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003599#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003600 TEST(F32_SPMM_8X1__SSE, k_eq_1) {
3601 TEST_REQUIRES_X86_SSE;
3602 SpMMMicrokernelTester()
3603 .mr(8)
3604 .nr(1)
3605 .m(8)
3606 .n(1)
3607 .k(1)
3608 .sparsity(0.0f)
3609 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3610 }
3611
3612 TEST(F32_SPMM_8X1__SSE, k_gt_1) {
3613 TEST_REQUIRES_X86_SSE;
3614 for (size_t k = 2; k < 10; k++) {
3615 SpMMMicrokernelTester()
3616 .mr(8)
3617 .nr(1)
3618 .m(8)
3619 .n(1)
3620 .k(k)
3621 .sparsity(0.0f)
3622 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3623 }
3624 }
3625
3626 TEST(F32_SPMM_8X1__SSE, n_gt_1) {
3627 TEST_REQUIRES_X86_SSE;
3628 for (uint32_t n = 2; n < 10; n++) {
3629 for (size_t k = 1; k <= 5; k += 2) {
3630 SpMMMicrokernelTester()
3631 .mr(8)
3632 .nr(1)
3633 .m(8)
3634 .n(n)
3635 .k(k)
3636 .sparsity(0.0f)
3637 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3638 }
3639 }
3640 }
3641
3642 TEST(F32_SPMM_8X1__SSE, m_lt_8) {
3643 TEST_REQUIRES_X86_SSE;
3644 for (uint32_t m = 1; m < 8; m++) {
3645 for (uint32_t n = 1; n < 10; n += 2) {
3646 for (size_t k = 1; k <= 5; k += 2) {
3647 SpMMMicrokernelTester()
3648 .mr(8)
3649 .nr(1)
3650 .m(m)
3651 .n(n)
3652 .k(k)
3653 .sparsity(0.0f)
3654 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3655 }
3656 }
3657 }
3658 }
3659
3660 TEST(F32_SPMM_8X1__SSE, m_div_8) {
3661 TEST_REQUIRES_X86_SSE;
3662 for (uint32_t m = 16; m <= 24; m += 8) {
3663 for (uint32_t n = 1; n < 10; n += 2) {
3664 for (size_t k = 1; k <= 5; k += 2) {
3665 SpMMMicrokernelTester()
3666 .mr(8)
3667 .nr(1)
3668 .m(m)
3669 .n(n)
3670 .k(k)
3671 .sparsity(0.0f)
3672 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3673 }
3674 }
3675 }
3676 }
3677
3678 TEST(F32_SPMM_8X1__SSE, m_gt_8) {
3679 TEST_REQUIRES_X86_SSE;
3680 for (uint32_t m = 9; m < 16; m++) {
3681 for (uint32_t n = 1; n < 10; n += 2) {
3682 for (size_t k = 1; k <= 5; k += 2) {
3683 SpMMMicrokernelTester()
3684 .mr(8)
3685 .nr(1)
3686 .m(m)
3687 .n(n)
3688 .k(k)
3689 .sparsity(0.0f)
3690 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3691 }
3692 }
3693 }
3694 }
3695
3696 TEST(F32_SPMM_8X1__SSE, qmin) {
3697 TEST_REQUIRES_X86_SSE;
3698 for (uint32_t n = 1; n < 10; n += 2) {
3699 for (size_t k = 1; k <= 5; k += 2) {
3700 SpMMMicrokernelTester()
3701 .mr(8)
3702 .nr(1)
3703 .m(16)
3704 .n(n)
3705 .k(k)
3706 .sparsity(0.0f)
3707 .qmin(128)
3708 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3709 }
3710 }
3711 }
3712
3713 TEST(F32_SPMM_8X1__SSE, qmax) {
3714 TEST_REQUIRES_X86_SSE;
3715 for (uint32_t n = 1; n < 10; n += 2) {
3716 for (size_t k = 1; k <= 5; k += 2) {
3717 SpMMMicrokernelTester()
3718 .mr(8)
3719 .nr(1)
3720 .m(16)
3721 .n(n)
3722 .k(k)
3723 .sparsity(0.0f)
3724 .qmax(128)
3725 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3726 }
3727 }
3728 }
3729
3730 TEST(F32_SPMM_8X1__SSE, half_sparse) {
3731 TEST_REQUIRES_X86_SSE;
3732 for (uint32_t n = 1; n < 10; n += 2) {
3733 for (size_t k = 1; k <= 5; k += 2) {
3734 SpMMMicrokernelTester()
3735 .mr(8)
3736 .nr(1)
3737 .m(16)
3738 .n(n)
3739 .k(k)
3740 .sparsity(0.5f)
3741 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3742 }
3743 }
3744 }
3745
3746 TEST(F32_SPMM_8X1__SSE, zero_weights) {
3747 TEST_REQUIRES_X86_SSE;
3748 for (uint32_t n = 1; n < 10; n += 2) {
3749 for (size_t k = 1; k <= 5; k += 2) {
3750 SpMMMicrokernelTester()
3751 .mr(8)
3752 .nr(1)
3753 .m(16)
3754 .n(n)
3755 .k(k)
3756 .sparsity(1.0f)
3757 .Test(xnn_f32_spmm_ukernel_8x1__sse);
3758 }
3759 }
3760 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07003761#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07003762
3763
3764TEST(F32_SPMM_1X1__SCALAR, k_eq_1) {
3765 SpMMMicrokernelTester()
3766 .mr(1)
3767 .nr(1)
3768 .m(1)
3769 .n(1)
3770 .k(1)
3771 .sparsity(0.0f)
3772 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3773}
3774
3775TEST(F32_SPMM_1X1__SCALAR, k_gt_1) {
3776 for (size_t k = 2; k < 10; k++) {
3777 SpMMMicrokernelTester()
3778 .mr(1)
3779 .nr(1)
3780 .m(1)
3781 .n(1)
3782 .k(k)
3783 .sparsity(0.0f)
3784 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3785 }
3786}
3787
3788TEST(F32_SPMM_1X1__SCALAR, n_gt_1) {
3789 for (uint32_t n = 2; n < 10; n++) {
3790 for (size_t k = 1; k <= 5; k += 2) {
3791 SpMMMicrokernelTester()
3792 .mr(1)
3793 .nr(1)
3794 .m(1)
3795 .n(n)
3796 .k(k)
3797 .sparsity(0.0f)
3798 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3799 }
3800 }
3801}
3802
3803TEST(F32_SPMM_1X1__SCALAR, m_lt_1) {
3804 for (uint32_t m = 1; m < 1; m++) {
3805 for (uint32_t n = 1; n < 10; n += 2) {
3806 for (size_t k = 1; k <= 5; k += 2) {
3807 SpMMMicrokernelTester()
3808 .mr(1)
3809 .nr(1)
3810 .m(m)
3811 .n(n)
3812 .k(k)
3813 .sparsity(0.0f)
3814 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3815 }
3816 }
3817 }
3818}
3819
3820TEST(F32_SPMM_1X1__SCALAR, m_div_1) {
3821 for (uint32_t m = 2; m <= 3; m += 1) {
3822 for (uint32_t n = 1; n < 10; n += 2) {
3823 for (size_t k = 1; k <= 5; k += 2) {
3824 SpMMMicrokernelTester()
3825 .mr(1)
3826 .nr(1)
3827 .m(m)
3828 .n(n)
3829 .k(k)
3830 .sparsity(0.0f)
3831 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3832 }
3833 }
3834 }
3835}
3836
3837TEST(F32_SPMM_1X1__SCALAR, m_gt_1) {
3838 for (uint32_t m = 2; m < 2; m++) {
3839 for (uint32_t n = 1; n < 10; n += 2) {
3840 for (size_t k = 1; k <= 5; k += 2) {
3841 SpMMMicrokernelTester()
3842 .mr(1)
3843 .nr(1)
3844 .m(m)
3845 .n(n)
3846 .k(k)
3847 .sparsity(0.0f)
3848 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3849 }
3850 }
3851 }
3852}
3853
3854TEST(F32_SPMM_1X1__SCALAR, qmin) {
3855 for (uint32_t n = 1; n < 10; n += 2) {
3856 for (size_t k = 1; k <= 5; k += 2) {
3857 SpMMMicrokernelTester()
3858 .mr(1)
3859 .nr(1)
3860 .m(2)
3861 .n(n)
3862 .k(k)
3863 .sparsity(0.0f)
3864 .qmin(128)
3865 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3866 }
3867 }
3868}
3869
3870TEST(F32_SPMM_1X1__SCALAR, qmax) {
3871 for (uint32_t n = 1; n < 10; n += 2) {
3872 for (size_t k = 1; k <= 5; k += 2) {
3873 SpMMMicrokernelTester()
3874 .mr(1)
3875 .nr(1)
3876 .m(2)
3877 .n(n)
3878 .k(k)
3879 .sparsity(0.0f)
3880 .qmax(128)
3881 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3882 }
3883 }
3884}
3885
3886TEST(F32_SPMM_1X1__SCALAR, half_sparse) {
3887 for (uint32_t n = 1; n < 10; n += 2) {
3888 for (size_t k = 1; k <= 5; k += 2) {
3889 SpMMMicrokernelTester()
3890 .mr(1)
3891 .nr(1)
3892 .m(2)
3893 .n(n)
3894 .k(k)
3895 .sparsity(0.5f)
3896 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3897 }
3898 }
3899}
3900
3901TEST(F32_SPMM_1X1__SCALAR, zero_weights) {
3902 for (uint32_t n = 1; n < 10; n += 2) {
3903 for (size_t k = 1; k <= 5; k += 2) {
3904 SpMMMicrokernelTester()
3905 .mr(1)
3906 .nr(1)
3907 .m(2)
3908 .n(n)
3909 .k(k)
3910 .sparsity(1.0f)
3911 .Test(xnn_f32_spmm_ukernel_1x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
3912 }
3913 }
3914}
3915
3916TEST(F32_SPMM_1X1__SCALAR_PIPELINED, k_eq_1) {
3917 SpMMMicrokernelTester()
3918 .mr(1)
3919 .nr(1)
3920 .m(1)
3921 .n(1)
3922 .k(1)
3923 .sparsity(0.0f)
3924 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
3925}
3926
3927TEST(F32_SPMM_1X1__SCALAR_PIPELINED, k_gt_1) {
3928 for (size_t k = 2; k < 10; k++) {
3929 SpMMMicrokernelTester()
3930 .mr(1)
3931 .nr(1)
3932 .m(1)
3933 .n(1)
3934 .k(k)
3935 .sparsity(0.0f)
3936 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
3937 }
3938}
3939
3940TEST(F32_SPMM_1X1__SCALAR_PIPELINED, n_gt_1) {
3941 for (uint32_t n = 2; n < 10; n++) {
3942 for (size_t k = 1; k <= 5; k += 2) {
3943 SpMMMicrokernelTester()
3944 .mr(1)
3945 .nr(1)
3946 .m(1)
3947 .n(n)
3948 .k(k)
3949 .sparsity(0.0f)
3950 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
3951 }
3952 }
3953}
3954
3955TEST(F32_SPMM_1X1__SCALAR_PIPELINED, m_lt_1) {
3956 for (uint32_t m = 1; m < 1; m++) {
3957 for (uint32_t n = 1; n < 10; n += 2) {
3958 for (size_t k = 1; k <= 5; k += 2) {
3959 SpMMMicrokernelTester()
3960 .mr(1)
3961 .nr(1)
3962 .m(m)
3963 .n(n)
3964 .k(k)
3965 .sparsity(0.0f)
3966 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
3967 }
3968 }
3969 }
3970}
3971
3972TEST(F32_SPMM_1X1__SCALAR_PIPELINED, m_div_1) {
3973 for (uint32_t m = 2; m <= 3; m += 1) {
3974 for (uint32_t n = 1; n < 10; n += 2) {
3975 for (size_t k = 1; k <= 5; k += 2) {
3976 SpMMMicrokernelTester()
3977 .mr(1)
3978 .nr(1)
3979 .m(m)
3980 .n(n)
3981 .k(k)
3982 .sparsity(0.0f)
3983 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
3984 }
3985 }
3986 }
3987}
3988
3989TEST(F32_SPMM_1X1__SCALAR_PIPELINED, m_gt_1) {
3990 for (uint32_t m = 2; m < 2; m++) {
3991 for (uint32_t n = 1; n < 10; n += 2) {
3992 for (size_t k = 1; k <= 5; k += 2) {
3993 SpMMMicrokernelTester()
3994 .mr(1)
3995 .nr(1)
3996 .m(m)
3997 .n(n)
3998 .k(k)
3999 .sparsity(0.0f)
4000 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4001 }
4002 }
4003 }
4004}
4005
4006TEST(F32_SPMM_1X1__SCALAR_PIPELINED, qmin) {
4007 for (uint32_t n = 1; n < 10; n += 2) {
4008 for (size_t k = 1; k <= 5; k += 2) {
4009 SpMMMicrokernelTester()
4010 .mr(1)
4011 .nr(1)
4012 .m(2)
4013 .n(n)
4014 .k(k)
4015 .sparsity(0.0f)
4016 .qmin(128)
4017 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4018 }
4019 }
4020}
4021
4022TEST(F32_SPMM_1X1__SCALAR_PIPELINED, qmax) {
4023 for (uint32_t n = 1; n < 10; n += 2) {
4024 for (size_t k = 1; k <= 5; k += 2) {
4025 SpMMMicrokernelTester()
4026 .mr(1)
4027 .nr(1)
4028 .m(2)
4029 .n(n)
4030 .k(k)
4031 .sparsity(0.0f)
4032 .qmax(128)
4033 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4034 }
4035 }
4036}
4037
4038TEST(F32_SPMM_1X1__SCALAR_PIPELINED, half_sparse) {
4039 for (uint32_t n = 1; n < 10; n += 2) {
4040 for (size_t k = 1; k <= 5; k += 2) {
4041 SpMMMicrokernelTester()
4042 .mr(1)
4043 .nr(1)
4044 .m(2)
4045 .n(n)
4046 .k(k)
4047 .sparsity(0.5f)
4048 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4049 }
4050 }
4051}
4052
4053TEST(F32_SPMM_1X1__SCALAR_PIPELINED, zero_weights) {
4054 for (uint32_t n = 1; n < 10; n += 2) {
4055 for (size_t k = 1; k <= 5; k += 2) {
4056 SpMMMicrokernelTester()
4057 .mr(1)
4058 .nr(1)
4059 .m(2)
4060 .n(n)
4061 .k(k)
4062 .sparsity(1.0f)
4063 .Test(xnn_f32_spmm_ukernel_1x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4064 }
4065 }
4066}
4067
XNNPACK Teamb455b122019-09-27 18:10:33 -07004068TEST(F32_SPMM_2X1__SCALAR, k_eq_1) {
4069 SpMMMicrokernelTester()
4070 .mr(2)
4071 .nr(1)
4072 .m(2)
4073 .n(1)
4074 .k(1)
4075 .sparsity(0.0f)
4076 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4077}
4078
4079TEST(F32_SPMM_2X1__SCALAR, k_gt_1) {
4080 for (size_t k = 2; k < 10; k++) {
4081 SpMMMicrokernelTester()
4082 .mr(2)
4083 .nr(1)
4084 .m(2)
4085 .n(1)
4086 .k(k)
4087 .sparsity(0.0f)
4088 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4089 }
4090}
4091
4092TEST(F32_SPMM_2X1__SCALAR, n_gt_1) {
4093 for (uint32_t n = 2; n < 10; n++) {
4094 for (size_t k = 1; k <= 5; k += 2) {
4095 SpMMMicrokernelTester()
4096 .mr(2)
4097 .nr(1)
4098 .m(2)
4099 .n(n)
4100 .k(k)
4101 .sparsity(0.0f)
4102 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4103 }
4104 }
4105}
4106
4107TEST(F32_SPMM_2X1__SCALAR, m_lt_2) {
4108 for (uint32_t m = 1; m < 2; m++) {
4109 for (uint32_t n = 1; n < 10; n += 2) {
4110 for (size_t k = 1; k <= 5; k += 2) {
4111 SpMMMicrokernelTester()
4112 .mr(2)
4113 .nr(1)
4114 .m(m)
4115 .n(n)
4116 .k(k)
4117 .sparsity(0.0f)
4118 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4119 }
4120 }
4121 }
4122}
4123
4124TEST(F32_SPMM_2X1__SCALAR, m_div_2) {
4125 for (uint32_t m = 4; m <= 6; m += 2) {
4126 for (uint32_t n = 1; n < 10; n += 2) {
4127 for (size_t k = 1; k <= 5; k += 2) {
4128 SpMMMicrokernelTester()
4129 .mr(2)
4130 .nr(1)
4131 .m(m)
4132 .n(n)
4133 .k(k)
4134 .sparsity(0.0f)
4135 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4136 }
4137 }
4138 }
4139}
4140
4141TEST(F32_SPMM_2X1__SCALAR, m_gt_2) {
4142 for (uint32_t m = 3; m < 4; m++) {
4143 for (uint32_t n = 1; n < 10; n += 2) {
4144 for (size_t k = 1; k <= 5; k += 2) {
4145 SpMMMicrokernelTester()
4146 .mr(2)
4147 .nr(1)
4148 .m(m)
4149 .n(n)
4150 .k(k)
4151 .sparsity(0.0f)
4152 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4153 }
4154 }
4155 }
4156}
4157
4158TEST(F32_SPMM_2X1__SCALAR, qmin) {
4159 for (uint32_t n = 1; n < 10; n += 2) {
4160 for (size_t k = 1; k <= 5; k += 2) {
4161 SpMMMicrokernelTester()
4162 .mr(2)
4163 .nr(1)
4164 .m(4)
4165 .n(n)
4166 .k(k)
4167 .sparsity(0.0f)
4168 .qmin(128)
4169 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4170 }
4171 }
4172}
4173
4174TEST(F32_SPMM_2X1__SCALAR, qmax) {
4175 for (uint32_t n = 1; n < 10; n += 2) {
4176 for (size_t k = 1; k <= 5; k += 2) {
4177 SpMMMicrokernelTester()
4178 .mr(2)
4179 .nr(1)
4180 .m(4)
4181 .n(n)
4182 .k(k)
4183 .sparsity(0.0f)
4184 .qmax(128)
4185 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4186 }
4187 }
4188}
4189
4190TEST(F32_SPMM_2X1__SCALAR, half_sparse) {
4191 for (uint32_t n = 1; n < 10; n += 2) {
4192 for (size_t k = 1; k <= 5; k += 2) {
4193 SpMMMicrokernelTester()
4194 .mr(2)
4195 .nr(1)
4196 .m(4)
4197 .n(n)
4198 .k(k)
4199 .sparsity(0.5f)
4200 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4201 }
4202 }
4203}
4204
4205TEST(F32_SPMM_2X1__SCALAR, zero_weights) {
4206 for (uint32_t n = 1; n < 10; n += 2) {
4207 for (size_t k = 1; k <= 5; k += 2) {
4208 SpMMMicrokernelTester()
4209 .mr(2)
4210 .nr(1)
4211 .m(4)
4212 .n(n)
4213 .k(k)
4214 .sparsity(1.0f)
4215 .Test(xnn_f32_spmm_ukernel_2x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4216 }
4217 }
4218}
4219
4220TEST(F32_SPMM_2X1__SCALAR_PIPELINED, k_eq_1) {
4221 SpMMMicrokernelTester()
4222 .mr(2)
4223 .nr(1)
4224 .m(2)
4225 .n(1)
4226 .k(1)
4227 .sparsity(0.0f)
4228 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4229}
4230
4231TEST(F32_SPMM_2X1__SCALAR_PIPELINED, k_gt_1) {
4232 for (size_t k = 2; k < 10; k++) {
4233 SpMMMicrokernelTester()
4234 .mr(2)
4235 .nr(1)
4236 .m(2)
4237 .n(1)
4238 .k(k)
4239 .sparsity(0.0f)
4240 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4241 }
4242}
4243
4244TEST(F32_SPMM_2X1__SCALAR_PIPELINED, n_gt_1) {
4245 for (uint32_t n = 2; n < 10; n++) {
4246 for (size_t k = 1; k <= 5; k += 2) {
4247 SpMMMicrokernelTester()
4248 .mr(2)
4249 .nr(1)
4250 .m(2)
4251 .n(n)
4252 .k(k)
4253 .sparsity(0.0f)
4254 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4255 }
4256 }
4257}
4258
4259TEST(F32_SPMM_2X1__SCALAR_PIPELINED, m_lt_2) {
4260 for (uint32_t m = 1; m < 2; m++) {
4261 for (uint32_t n = 1; n < 10; n += 2) {
4262 for (size_t k = 1; k <= 5; k += 2) {
4263 SpMMMicrokernelTester()
4264 .mr(2)
4265 .nr(1)
4266 .m(m)
4267 .n(n)
4268 .k(k)
4269 .sparsity(0.0f)
4270 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4271 }
4272 }
4273 }
4274}
4275
4276TEST(F32_SPMM_2X1__SCALAR_PIPELINED, m_div_2) {
4277 for (uint32_t m = 4; m <= 6; m += 2) {
4278 for (uint32_t n = 1; n < 10; n += 2) {
4279 for (size_t k = 1; k <= 5; k += 2) {
4280 SpMMMicrokernelTester()
4281 .mr(2)
4282 .nr(1)
4283 .m(m)
4284 .n(n)
4285 .k(k)
4286 .sparsity(0.0f)
4287 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4288 }
4289 }
4290 }
4291}
4292
4293TEST(F32_SPMM_2X1__SCALAR_PIPELINED, m_gt_2) {
4294 for (uint32_t m = 3; m < 4; m++) {
4295 for (uint32_t n = 1; n < 10; n += 2) {
4296 for (size_t k = 1; k <= 5; k += 2) {
4297 SpMMMicrokernelTester()
4298 .mr(2)
4299 .nr(1)
4300 .m(m)
4301 .n(n)
4302 .k(k)
4303 .sparsity(0.0f)
4304 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4305 }
4306 }
4307 }
4308}
4309
4310TEST(F32_SPMM_2X1__SCALAR_PIPELINED, qmin) {
4311 for (uint32_t n = 1; n < 10; n += 2) {
4312 for (size_t k = 1; k <= 5; k += 2) {
4313 SpMMMicrokernelTester()
4314 .mr(2)
4315 .nr(1)
4316 .m(4)
4317 .n(n)
4318 .k(k)
4319 .sparsity(0.0f)
4320 .qmin(128)
4321 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4322 }
4323 }
4324}
4325
4326TEST(F32_SPMM_2X1__SCALAR_PIPELINED, qmax) {
4327 for (uint32_t n = 1; n < 10; n += 2) {
4328 for (size_t k = 1; k <= 5; k += 2) {
4329 SpMMMicrokernelTester()
4330 .mr(2)
4331 .nr(1)
4332 .m(4)
4333 .n(n)
4334 .k(k)
4335 .sparsity(0.0f)
4336 .qmax(128)
4337 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4338 }
4339 }
4340}
4341
4342TEST(F32_SPMM_2X1__SCALAR_PIPELINED, half_sparse) {
4343 for (uint32_t n = 1; n < 10; n += 2) {
4344 for (size_t k = 1; k <= 5; k += 2) {
4345 SpMMMicrokernelTester()
4346 .mr(2)
4347 .nr(1)
4348 .m(4)
4349 .n(n)
4350 .k(k)
4351 .sparsity(0.5f)
4352 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4353 }
4354 }
4355}
4356
4357TEST(F32_SPMM_2X1__SCALAR_PIPELINED, zero_weights) {
4358 for (uint32_t n = 1; n < 10; n += 2) {
4359 for (size_t k = 1; k <= 5; k += 2) {
4360 SpMMMicrokernelTester()
4361 .mr(2)
4362 .nr(1)
4363 .m(4)
4364 .n(n)
4365 .k(k)
4366 .sparsity(1.0f)
4367 .Test(xnn_f32_spmm_ukernel_2x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4368 }
4369 }
4370}
4371
XNNPACK Teamb455b122019-09-27 18:10:33 -07004372TEST(F32_SPMM_4X1__SCALAR, k_eq_1) {
4373 SpMMMicrokernelTester()
4374 .mr(4)
4375 .nr(1)
4376 .m(4)
4377 .n(1)
4378 .k(1)
4379 .sparsity(0.0f)
4380 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4381}
4382
4383TEST(F32_SPMM_4X1__SCALAR, k_gt_1) {
4384 for (size_t k = 2; k < 10; k++) {
4385 SpMMMicrokernelTester()
4386 .mr(4)
4387 .nr(1)
4388 .m(4)
4389 .n(1)
4390 .k(k)
4391 .sparsity(0.0f)
4392 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4393 }
4394}
4395
4396TEST(F32_SPMM_4X1__SCALAR, n_gt_1) {
4397 for (uint32_t n = 2; n < 10; n++) {
4398 for (size_t k = 1; k <= 5; k += 2) {
4399 SpMMMicrokernelTester()
4400 .mr(4)
4401 .nr(1)
4402 .m(4)
4403 .n(n)
4404 .k(k)
4405 .sparsity(0.0f)
4406 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4407 }
4408 }
4409}
4410
4411TEST(F32_SPMM_4X1__SCALAR, m_lt_4) {
4412 for (uint32_t m = 1; m < 4; m++) {
4413 for (uint32_t n = 1; n < 10; n += 2) {
4414 for (size_t k = 1; k <= 5; k += 2) {
4415 SpMMMicrokernelTester()
4416 .mr(4)
4417 .nr(1)
4418 .m(m)
4419 .n(n)
4420 .k(k)
4421 .sparsity(0.0f)
4422 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4423 }
4424 }
4425 }
4426}
4427
4428TEST(F32_SPMM_4X1__SCALAR, m_div_4) {
4429 for (uint32_t m = 8; m <= 12; m += 4) {
4430 for (uint32_t n = 1; n < 10; n += 2) {
4431 for (size_t k = 1; k <= 5; k += 2) {
4432 SpMMMicrokernelTester()
4433 .mr(4)
4434 .nr(1)
4435 .m(m)
4436 .n(n)
4437 .k(k)
4438 .sparsity(0.0f)
4439 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4440 }
4441 }
4442 }
4443}
4444
4445TEST(F32_SPMM_4X1__SCALAR, m_gt_4) {
4446 for (uint32_t m = 5; m < 8; m++) {
4447 for (uint32_t n = 1; n < 10; n += 2) {
4448 for (size_t k = 1; k <= 5; k += 2) {
4449 SpMMMicrokernelTester()
4450 .mr(4)
4451 .nr(1)
4452 .m(m)
4453 .n(n)
4454 .k(k)
4455 .sparsity(0.0f)
4456 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4457 }
4458 }
4459 }
4460}
4461
4462TEST(F32_SPMM_4X1__SCALAR, qmin) {
4463 for (uint32_t n = 1; n < 10; n += 2) {
4464 for (size_t k = 1; k <= 5; k += 2) {
4465 SpMMMicrokernelTester()
4466 .mr(4)
4467 .nr(1)
4468 .m(8)
4469 .n(n)
4470 .k(k)
4471 .sparsity(0.0f)
4472 .qmin(128)
4473 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4474 }
4475 }
4476}
4477
4478TEST(F32_SPMM_4X1__SCALAR, qmax) {
4479 for (uint32_t n = 1; n < 10; n += 2) {
4480 for (size_t k = 1; k <= 5; k += 2) {
4481 SpMMMicrokernelTester()
4482 .mr(4)
4483 .nr(1)
4484 .m(8)
4485 .n(n)
4486 .k(k)
4487 .sparsity(0.0f)
4488 .qmax(128)
4489 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4490 }
4491 }
4492}
4493
4494TEST(F32_SPMM_4X1__SCALAR, half_sparse) {
4495 for (uint32_t n = 1; n < 10; n += 2) {
4496 for (size_t k = 1; k <= 5; k += 2) {
4497 SpMMMicrokernelTester()
4498 .mr(4)
4499 .nr(1)
4500 .m(8)
4501 .n(n)
4502 .k(k)
4503 .sparsity(0.5f)
4504 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4505 }
4506 }
4507}
4508
4509TEST(F32_SPMM_4X1__SCALAR, zero_weights) {
4510 for (uint32_t n = 1; n < 10; n += 2) {
4511 for (size_t k = 1; k <= 5; k += 2) {
4512 SpMMMicrokernelTester()
4513 .mr(4)
4514 .nr(1)
4515 .m(8)
4516 .n(n)
4517 .k(k)
4518 .sparsity(1.0f)
4519 .Test(xnn_f32_spmm_ukernel_4x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4520 }
4521 }
4522}
4523
4524TEST(F32_SPMM_4X1__SCALAR_PIPELINED, k_eq_1) {
4525 SpMMMicrokernelTester()
4526 .mr(4)
4527 .nr(1)
4528 .m(4)
4529 .n(1)
4530 .k(1)
4531 .sparsity(0.0f)
4532 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4533}
4534
4535TEST(F32_SPMM_4X1__SCALAR_PIPELINED, k_gt_1) {
4536 for (size_t k = 2; k < 10; k++) {
4537 SpMMMicrokernelTester()
4538 .mr(4)
4539 .nr(1)
4540 .m(4)
4541 .n(1)
4542 .k(k)
4543 .sparsity(0.0f)
4544 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4545 }
4546}
4547
4548TEST(F32_SPMM_4X1__SCALAR_PIPELINED, n_gt_1) {
4549 for (uint32_t n = 2; n < 10; n++) {
4550 for (size_t k = 1; k <= 5; k += 2) {
4551 SpMMMicrokernelTester()
4552 .mr(4)
4553 .nr(1)
4554 .m(4)
4555 .n(n)
4556 .k(k)
4557 .sparsity(0.0f)
4558 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4559 }
4560 }
4561}
4562
4563TEST(F32_SPMM_4X1__SCALAR_PIPELINED, m_lt_4) {
4564 for (uint32_t m = 1; m < 4; m++) {
4565 for (uint32_t n = 1; n < 10; n += 2) {
4566 for (size_t k = 1; k <= 5; k += 2) {
4567 SpMMMicrokernelTester()
4568 .mr(4)
4569 .nr(1)
4570 .m(m)
4571 .n(n)
4572 .k(k)
4573 .sparsity(0.0f)
4574 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4575 }
4576 }
4577 }
4578}
4579
4580TEST(F32_SPMM_4X1__SCALAR_PIPELINED, m_div_4) {
4581 for (uint32_t m = 8; m <= 12; m += 4) {
4582 for (uint32_t n = 1; n < 10; n += 2) {
4583 for (size_t k = 1; k <= 5; k += 2) {
4584 SpMMMicrokernelTester()
4585 .mr(4)
4586 .nr(1)
4587 .m(m)
4588 .n(n)
4589 .k(k)
4590 .sparsity(0.0f)
4591 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4592 }
4593 }
4594 }
4595}
4596
4597TEST(F32_SPMM_4X1__SCALAR_PIPELINED, m_gt_4) {
4598 for (uint32_t m = 5; m < 8; m++) {
4599 for (uint32_t n = 1; n < 10; n += 2) {
4600 for (size_t k = 1; k <= 5; k += 2) {
4601 SpMMMicrokernelTester()
4602 .mr(4)
4603 .nr(1)
4604 .m(m)
4605 .n(n)
4606 .k(k)
4607 .sparsity(0.0f)
4608 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4609 }
4610 }
4611 }
4612}
4613
4614TEST(F32_SPMM_4X1__SCALAR_PIPELINED, qmin) {
4615 for (uint32_t n = 1; n < 10; n += 2) {
4616 for (size_t k = 1; k <= 5; k += 2) {
4617 SpMMMicrokernelTester()
4618 .mr(4)
4619 .nr(1)
4620 .m(8)
4621 .n(n)
4622 .k(k)
4623 .sparsity(0.0f)
4624 .qmin(128)
4625 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4626 }
4627 }
4628}
4629
4630TEST(F32_SPMM_4X1__SCALAR_PIPELINED, qmax) {
4631 for (uint32_t n = 1; n < 10; n += 2) {
4632 for (size_t k = 1; k <= 5; k += 2) {
4633 SpMMMicrokernelTester()
4634 .mr(4)
4635 .nr(1)
4636 .m(8)
4637 .n(n)
4638 .k(k)
4639 .sparsity(0.0f)
4640 .qmax(128)
4641 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4642 }
4643 }
4644}
4645
4646TEST(F32_SPMM_4X1__SCALAR_PIPELINED, half_sparse) {
4647 for (uint32_t n = 1; n < 10; n += 2) {
4648 for (size_t k = 1; k <= 5; k += 2) {
4649 SpMMMicrokernelTester()
4650 .mr(4)
4651 .nr(1)
4652 .m(8)
4653 .n(n)
4654 .k(k)
4655 .sparsity(0.5f)
4656 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4657 }
4658 }
4659}
4660
4661TEST(F32_SPMM_4X1__SCALAR_PIPELINED, zero_weights) {
4662 for (uint32_t n = 1; n < 10; n += 2) {
4663 for (size_t k = 1; k <= 5; k += 2) {
4664 SpMMMicrokernelTester()
4665 .mr(4)
4666 .nr(1)
4667 .m(8)
4668 .n(n)
4669 .k(k)
4670 .sparsity(1.0f)
4671 .Test(xnn_f32_spmm_ukernel_4x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4672 }
4673 }
4674}
4675
XNNPACK Teamb455b122019-09-27 18:10:33 -07004676TEST(F32_SPMM_8X1__SCALAR, k_eq_1) {
4677 SpMMMicrokernelTester()
4678 .mr(8)
4679 .nr(1)
4680 .m(8)
4681 .n(1)
4682 .k(1)
4683 .sparsity(0.0f)
4684 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4685}
4686
4687TEST(F32_SPMM_8X1__SCALAR, k_gt_1) {
4688 for (size_t k = 2; k < 10; k++) {
4689 SpMMMicrokernelTester()
4690 .mr(8)
4691 .nr(1)
4692 .m(8)
4693 .n(1)
4694 .k(k)
4695 .sparsity(0.0f)
4696 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4697 }
4698}
4699
4700TEST(F32_SPMM_8X1__SCALAR, n_gt_1) {
4701 for (uint32_t n = 2; n < 10; n++) {
4702 for (size_t k = 1; k <= 5; k += 2) {
4703 SpMMMicrokernelTester()
4704 .mr(8)
4705 .nr(1)
4706 .m(8)
4707 .n(n)
4708 .k(k)
4709 .sparsity(0.0f)
4710 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4711 }
4712 }
4713}
4714
4715TEST(F32_SPMM_8X1__SCALAR, m_lt_8) {
4716 for (uint32_t m = 1; m < 8; m++) {
4717 for (uint32_t n = 1; n < 10; n += 2) {
4718 for (size_t k = 1; k <= 5; k += 2) {
4719 SpMMMicrokernelTester()
4720 .mr(8)
4721 .nr(1)
4722 .m(m)
4723 .n(n)
4724 .k(k)
4725 .sparsity(0.0f)
4726 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4727 }
4728 }
4729 }
4730}
4731
4732TEST(F32_SPMM_8X1__SCALAR, m_div_8) {
4733 for (uint32_t m = 16; m <= 24; m += 8) {
4734 for (uint32_t n = 1; n < 10; n += 2) {
4735 for (size_t k = 1; k <= 5; k += 2) {
4736 SpMMMicrokernelTester()
4737 .mr(8)
4738 .nr(1)
4739 .m(m)
4740 .n(n)
4741 .k(k)
4742 .sparsity(0.0f)
4743 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4744 }
4745 }
4746 }
4747}
4748
4749TEST(F32_SPMM_8X1__SCALAR, m_gt_8) {
4750 for (uint32_t m = 9; m < 16; m++) {
4751 for (uint32_t n = 1; n < 10; n += 2) {
4752 for (size_t k = 1; k <= 5; k += 2) {
4753 SpMMMicrokernelTester()
4754 .mr(8)
4755 .nr(1)
4756 .m(m)
4757 .n(n)
4758 .k(k)
4759 .sparsity(0.0f)
4760 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4761 }
4762 }
4763 }
4764}
4765
4766TEST(F32_SPMM_8X1__SCALAR, qmin) {
4767 for (uint32_t n = 1; n < 10; n += 2) {
4768 for (size_t k = 1; k <= 5; k += 2) {
4769 SpMMMicrokernelTester()
4770 .mr(8)
4771 .nr(1)
4772 .m(16)
4773 .n(n)
4774 .k(k)
4775 .sparsity(0.0f)
4776 .qmin(128)
4777 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4778 }
4779 }
4780}
4781
4782TEST(F32_SPMM_8X1__SCALAR, qmax) {
4783 for (uint32_t n = 1; n < 10; n += 2) {
4784 for (size_t k = 1; k <= 5; k += 2) {
4785 SpMMMicrokernelTester()
4786 .mr(8)
4787 .nr(1)
4788 .m(16)
4789 .n(n)
4790 .k(k)
4791 .sparsity(0.0f)
4792 .qmax(128)
4793 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4794 }
4795 }
4796}
4797
4798TEST(F32_SPMM_8X1__SCALAR, half_sparse) {
4799 for (uint32_t n = 1; n < 10; n += 2) {
4800 for (size_t k = 1; k <= 5; k += 2) {
4801 SpMMMicrokernelTester()
4802 .mr(8)
4803 .nr(1)
4804 .m(16)
4805 .n(n)
4806 .k(k)
4807 .sparsity(0.5f)
4808 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4809 }
4810 }
4811}
4812
4813TEST(F32_SPMM_8X1__SCALAR, zero_weights) {
4814 for (uint32_t n = 1; n < 10; n += 2) {
4815 for (size_t k = 1; k <= 5; k += 2) {
4816 SpMMMicrokernelTester()
4817 .mr(8)
4818 .nr(1)
4819 .m(16)
4820 .n(n)
4821 .k(k)
4822 .sparsity(1.0f)
4823 .Test(xnn_f32_spmm_ukernel_8x1__scalar, SpMMMicrokernelTester::Variant::Scalar);
4824 }
4825 }
4826}
4827
4828TEST(F32_SPMM_8X1__SCALAR_PIPELINED, k_eq_1) {
4829 SpMMMicrokernelTester()
4830 .mr(8)
4831 .nr(1)
4832 .m(8)
4833 .n(1)
4834 .k(1)
4835 .sparsity(0.0f)
4836 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4837}
4838
4839TEST(F32_SPMM_8X1__SCALAR_PIPELINED, k_gt_1) {
4840 for (size_t k = 2; k < 10; k++) {
4841 SpMMMicrokernelTester()
4842 .mr(8)
4843 .nr(1)
4844 .m(8)
4845 .n(1)
4846 .k(k)
4847 .sparsity(0.0f)
4848 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4849 }
4850}
4851
4852TEST(F32_SPMM_8X1__SCALAR_PIPELINED, n_gt_1) {
4853 for (uint32_t n = 2; n < 10; n++) {
4854 for (size_t k = 1; k <= 5; k += 2) {
4855 SpMMMicrokernelTester()
4856 .mr(8)
4857 .nr(1)
4858 .m(8)
4859 .n(n)
4860 .k(k)
4861 .sparsity(0.0f)
4862 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4863 }
4864 }
4865}
4866
4867TEST(F32_SPMM_8X1__SCALAR_PIPELINED, m_lt_8) {
4868 for (uint32_t m = 1; m < 8; m++) {
4869 for (uint32_t n = 1; n < 10; n += 2) {
4870 for (size_t k = 1; k <= 5; k += 2) {
4871 SpMMMicrokernelTester()
4872 .mr(8)
4873 .nr(1)
4874 .m(m)
4875 .n(n)
4876 .k(k)
4877 .sparsity(0.0f)
4878 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4879 }
4880 }
4881 }
4882}
4883
4884TEST(F32_SPMM_8X1__SCALAR_PIPELINED, m_div_8) {
4885 for (uint32_t m = 16; m <= 24; m += 8) {
4886 for (uint32_t n = 1; n < 10; n += 2) {
4887 for (size_t k = 1; k <= 5; k += 2) {
4888 SpMMMicrokernelTester()
4889 .mr(8)
4890 .nr(1)
4891 .m(m)
4892 .n(n)
4893 .k(k)
4894 .sparsity(0.0f)
4895 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4896 }
4897 }
4898 }
4899}
4900
4901TEST(F32_SPMM_8X1__SCALAR_PIPELINED, m_gt_8) {
4902 for (uint32_t m = 9; m < 16; m++) {
4903 for (uint32_t n = 1; n < 10; n += 2) {
4904 for (size_t k = 1; k <= 5; k += 2) {
4905 SpMMMicrokernelTester()
4906 .mr(8)
4907 .nr(1)
4908 .m(m)
4909 .n(n)
4910 .k(k)
4911 .sparsity(0.0f)
4912 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4913 }
4914 }
4915 }
4916}
4917
4918TEST(F32_SPMM_8X1__SCALAR_PIPELINED, qmin) {
4919 for (uint32_t n = 1; n < 10; n += 2) {
4920 for (size_t k = 1; k <= 5; k += 2) {
4921 SpMMMicrokernelTester()
4922 .mr(8)
4923 .nr(1)
4924 .m(16)
4925 .n(n)
4926 .k(k)
4927 .sparsity(0.0f)
4928 .qmin(128)
4929 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4930 }
4931 }
4932}
4933
4934TEST(F32_SPMM_8X1__SCALAR_PIPELINED, qmax) {
4935 for (uint32_t n = 1; n < 10; n += 2) {
4936 for (size_t k = 1; k <= 5; k += 2) {
4937 SpMMMicrokernelTester()
4938 .mr(8)
4939 .nr(1)
4940 .m(16)
4941 .n(n)
4942 .k(k)
4943 .sparsity(0.0f)
4944 .qmax(128)
4945 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4946 }
4947 }
4948}
4949
4950TEST(F32_SPMM_8X1__SCALAR_PIPELINED, half_sparse) {
4951 for (uint32_t n = 1; n < 10; n += 2) {
4952 for (size_t k = 1; k <= 5; k += 2) {
4953 SpMMMicrokernelTester()
4954 .mr(8)
4955 .nr(1)
4956 .m(16)
4957 .n(n)
4958 .k(k)
4959 .sparsity(0.5f)
4960 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4961 }
4962 }
4963}
4964
4965TEST(F32_SPMM_8X1__SCALAR_PIPELINED, zero_weights) {
4966 for (uint32_t n = 1; n < 10; n += 2) {
4967 for (size_t k = 1; k <= 5; k += 2) {
4968 SpMMMicrokernelTester()
4969 .mr(8)
4970 .nr(1)
4971 .m(16)
4972 .n(n)
4973 .k(k)
4974 .sparsity(1.0f)
4975 .Test(xnn_f32_spmm_ukernel_8x1__scalar_pipelined, SpMMMicrokernelTester::Variant::Scalar);
4976 }
4977 }
4978}
4979
Erich Elsenc6afd9b2019-10-24 16:10:53 -07004980TEST(F32_SPMM_8X2__SCALAR, k_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004981 SpMMMicrokernelTester()
4982 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07004983 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004984 .m(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07004985 .n(2)
4986 .k(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004987 .sparsity(0.0f)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07004988 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004989}
4990
Erich Elsenc6afd9b2019-10-24 16:10:53 -07004991TEST(F32_SPMM_8X2__SCALAR, k_eq_1_subtile) {
4992 for (uint32_t n = 1; n <= 2; n++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004993 SpMMMicrokernelTester()
4994 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07004995 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004996 .m(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07004997 .n(n)
4998 .k(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004999 .sparsity(0.0f)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005000 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005001 }
5002}
5003
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005004TEST(F32_SPMM_8X2__SCALAR, k_gt_1) {
5005 for (size_t k = 2; k < 10; k++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005006 SpMMMicrokernelTester()
5007 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005008 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07005009 .m(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005010 .n(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07005011 .k(k)
5012 .sparsity(0.0f)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005013 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005014 }
5015}
5016
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005017TEST(F32_SPMM_8X2__SCALAR, k_gt_1_subtile) {
5018 for (size_t k = 2; k < 10; k++) {
5019 for (uint32_t n = 1; n <= 2; n++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005020 SpMMMicrokernelTester()
5021 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005022 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07005023 .m(8)
5024 .n(n)
5025 .k(k)
5026 .sparsity(0.0f)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005027 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005028 }
5029 }
5030}
5031
Marat Dukhanc452eb12019-10-25 19:19:34 -07005032TEST(F32_SPMM_8X2__SCALAR, n_gt_2) {
5033 for (uint32_t n = 3; n < 10; n++) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005034 for (size_t k = 1; k <= 5; k += 2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005035 SpMMMicrokernelTester()
5036 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005037 .nr(2)
5038 .m(8)
5039 .n(n)
5040 .k(k)
5041 .sparsity(0.0f)
5042 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
5043 }
5044 }
5045}
5046
Marat Dukhanc452eb12019-10-25 19:19:34 -07005047TEST(F32_SPMM_8X2__SCALAR, n_div_2) {
5048 for (uint32_t n = 4; n <= 6; n += 2) {
5049 for (size_t k = 1; k <= 5; k += 2) {
5050 SpMMMicrokernelTester()
5051 .mr(8)
5052 .nr(2)
5053 .m(8)
5054 .n(n)
5055 .k(k)
5056 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
5057 }
5058 }
5059}
5060
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005061TEST(F32_SPMM_8X2__SCALAR, m_lt_8) {
5062 for (uint32_t m = 1; m < 8; m++) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005063 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005064 for (size_t k = 1; k <= 5; k += 2) {
5065 SpMMMicrokernelTester()
5066 .mr(8)
5067 .nr(2)
5068 .m(m)
5069 .n(n)
5070 .k(k)
5071 .sparsity(0.0f)
5072 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
5073 }
5074 }
5075 }
5076}
5077
5078TEST(F32_SPMM_8X2__SCALAR, m_div_8) {
5079 for (uint32_t m = 16; m <= 24; m += 8) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005080 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005081 for (size_t k = 1; k <= 5; k += 2) {
5082 SpMMMicrokernelTester()
5083 .mr(8)
5084 .nr(2)
5085 .m(m)
5086 .n(n)
5087 .k(k)
5088 .sparsity(0.0f)
5089 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
5090 }
5091 }
5092 }
5093}
5094
5095TEST(F32_SPMM_8X2__SCALAR, m_gt_8) {
5096 for (uint32_t m = 9; m < 16; m++) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005097 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005098 for (size_t k = 1; k <= 5; k += 2) {
5099 SpMMMicrokernelTester()
5100 .mr(8)
5101 .nr(2)
5102 .m(m)
5103 .n(n)
5104 .k(k)
5105 .sparsity(0.0f)
5106 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
5107 }
5108 }
5109 }
5110}
5111
5112TEST(F32_SPMM_8X2__SCALAR, qmin) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005113 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005114 for (size_t k = 1; k <= 5; k += 2) {
5115 SpMMMicrokernelTester()
5116 .mr(8)
5117 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07005118 .m(16)
5119 .n(n)
5120 .k(k)
5121 .sparsity(0.0f)
5122 .qmin(128)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005123 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005124 }
5125 }
5126}
5127
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005128TEST(F32_SPMM_8X2__SCALAR, qmax) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005129 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005130 for (size_t k = 1; k <= 5; k += 2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005131 SpMMMicrokernelTester()
5132 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005133 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07005134 .m(16)
5135 .n(n)
5136 .k(k)
5137 .sparsity(0.0f)
5138 .qmax(128)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005139 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005140 }
5141 }
5142}
5143
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005144TEST(F32_SPMM_8X2__SCALAR, half_sparse) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005145 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005146 for (size_t k = 1; k <= 5; k += 2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005147 SpMMMicrokernelTester()
5148 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005149 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07005150 .m(16)
5151 .n(n)
5152 .k(k)
5153 .sparsity(0.5f)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005154 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005155 }
5156 }
5157}
5158
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005159TEST(F32_SPMM_8X2__SCALAR, zero_weights) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005160 for (uint32_t n = 1; n < 10; n += 3) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005161 for (size_t k = 1; k <= 5; k += 2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07005162 SpMMMicrokernelTester()
5163 .mr(8)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005164 .nr(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07005165 .m(16)
5166 .n(n)
5167 .k(k)
5168 .sparsity(1.0f)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005169 .Test(xnn_f32_spmm_ukernel_8x2__scalar, SpMMMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -07005170 }
5171 }
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005172}
5173
5174TEST(F32_SPMM_8X4__SCALAR, k_eq_1) {
5175 SpMMMicrokernelTester()
5176 .mr(8)
5177 .nr(4)
5178 .m(8)
Marat Dukhanc452eb12019-10-25 19:19:34 -07005179 .n(4)
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005180 .k(1)
5181 .sparsity(0.0f)
5182 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5183}
5184
5185TEST(F32_SPMM_8X4__SCALAR, k_eq_1_subtile) {
5186 for (uint32_t n = 1; n <= 4; n++) {
5187 SpMMMicrokernelTester()
5188 .mr(8)
5189 .nr(4)
5190 .m(8)
5191 .n(n)
5192 .k(1)
5193 .sparsity(0.0f)
5194 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5195 }
5196}
5197
5198TEST(F32_SPMM_8X4__SCALAR, k_gt_1) {
5199 for (size_t k = 2; k < 10; k++) {
5200 SpMMMicrokernelTester()
5201 .mr(8)
5202 .nr(4)
5203 .m(8)
5204 .n(4)
5205 .k(k)
5206 .sparsity(0.0f)
5207 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5208 }
5209}
5210
5211TEST(F32_SPMM_8X4__SCALAR, k_gt_1_subtile) {
5212 for (size_t k = 2; k < 10; k++) {
5213 for (uint32_t n = 1; n <= 4; n++) {
5214 SpMMMicrokernelTester()
5215 .mr(8)
5216 .nr(4)
5217 .m(8)
5218 .n(n)
5219 .k(k)
5220 .sparsity(0.0f)
5221 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5222 }
5223 }
5224}
5225
Marat Dukhanc452eb12019-10-25 19:19:34 -07005226TEST(F32_SPMM_8X4__SCALAR, n_gt_4) {
5227 for (uint32_t n = 5; n < 10; n++) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005228 for (size_t k = 1; k <= 5; k += 2) {
5229 SpMMMicrokernelTester()
5230 .mr(8)
5231 .nr(4)
5232 .m(8)
5233 .n(n)
5234 .k(k)
5235 .sparsity(0.0f)
5236 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5237 }
5238 }
5239}
5240
Marat Dukhanc452eb12019-10-25 19:19:34 -07005241TEST(F32_SPMM_8X4__SCALAR, n_div_4) {
5242 for (uint32_t n = 8; n <= 12; n += 4) {
5243 for (size_t k = 1; k <= 5; k += 2) {
5244 SpMMMicrokernelTester()
5245 .mr(8)
5246 .nr(4)
5247 .m(8)
5248 .n(n)
5249 .k(k)
5250 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5251 }
5252 }
5253}
5254
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005255TEST(F32_SPMM_8X4__SCALAR, m_lt_8) {
5256 for (uint32_t m = 1; m < 8; m++) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005257 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005258 for (size_t k = 1; k <= 5; k += 2) {
5259 SpMMMicrokernelTester()
5260 .mr(8)
5261 .nr(4)
5262 .m(m)
5263 .n(n)
5264 .k(k)
5265 .sparsity(0.0f)
5266 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5267 }
5268 }
5269 }
5270}
5271
5272TEST(F32_SPMM_8X4__SCALAR, m_div_8) {
5273 for (uint32_t m = 16; m <= 24; m += 8) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005274 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005275 for (size_t k = 1; k <= 5; k += 2) {
5276 SpMMMicrokernelTester()
5277 .mr(8)
5278 .nr(4)
5279 .m(m)
5280 .n(n)
5281 .k(k)
5282 .sparsity(0.0f)
5283 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5284 }
5285 }
5286 }
5287}
5288
5289TEST(F32_SPMM_8X4__SCALAR, m_gt_8) {
5290 for (uint32_t m = 9; m < 16; m++) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005291 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005292 for (size_t k = 1; k <= 5; k += 2) {
5293 SpMMMicrokernelTester()
5294 .mr(8)
5295 .nr(4)
5296 .m(m)
5297 .n(n)
5298 .k(k)
5299 .sparsity(0.0f)
5300 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5301 }
5302 }
5303 }
5304}
5305
5306TEST(F32_SPMM_8X4__SCALAR, qmin) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005307 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005308 for (size_t k = 1; k <= 5; k += 2) {
5309 SpMMMicrokernelTester()
5310 .mr(8)
5311 .nr(4)
5312 .m(16)
5313 .n(n)
5314 .k(k)
5315 .sparsity(0.0f)
5316 .qmin(128)
5317 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5318 }
5319 }
5320}
5321
5322TEST(F32_SPMM_8X4__SCALAR, qmax) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005323 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005324 for (size_t k = 1; k <= 5; k += 2) {
5325 SpMMMicrokernelTester()
5326 .mr(8)
5327 .nr(4)
5328 .m(16)
5329 .n(n)
5330 .k(k)
5331 .sparsity(0.0f)
5332 .qmax(128)
5333 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5334 }
5335 }
5336}
5337
5338TEST(F32_SPMM_8X4__SCALAR, half_sparse) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005339 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005340 for (size_t k = 1; k <= 5; k += 2) {
5341 SpMMMicrokernelTester()
5342 .mr(8)
5343 .nr(4)
5344 .m(16)
5345 .n(n)
5346 .k(k)
5347 .sparsity(0.5f)
5348 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5349 }
5350 }
5351}
5352
5353TEST(F32_SPMM_8X4__SCALAR, zero_weights) {
Marat Dukhanc452eb12019-10-25 19:19:34 -07005354 for (uint32_t n = 1; n < 20; n += 5) {
Erich Elsenc6afd9b2019-10-24 16:10:53 -07005355 for (size_t k = 1; k <= 5; k += 2) {
5356 SpMMMicrokernelTester()
5357 .mr(8)
5358 .nr(4)
5359 .m(16)
5360 .n(n)
5361 .k(k)
5362 .sparsity(1.0f)
5363 .Test(xnn_f32_spmm_ukernel_8x4__scalar, SpMMMicrokernelTester::Variant::Scalar);
5364 }
5365 }
Marat Dukhanc452eb12019-10-25 19:19:34 -07005366}