blob: 773806b862232ab3e9d5e1a1346645d5b5171685 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-ppmm.yaml
11// Generator: tools/generate-gemm-test.py
12
13
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <gtest/gtest.h>
15
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
XNNPACK Teamb455b122019-09-27 18:10:33 -070019#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include "gemm-microkernel-tester.h"
23
24
Marat Dukhan1dadbf72019-10-01 10:46:20 -070025#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070026 TEST(F32_PPMM_4X8__NEON, k_eq_1) {
27 TEST_REQUIRES_ARM_NEON;
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(4)
34 .n(8)
35 .k(1)
36 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
37 }
38
39 TEST(F32_PPMM_4X8__NEON, strided_cn) {
40 TEST_REQUIRES_ARM_NEON;
41 GemmMicrokernelTester()
42 .mr(4)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(4)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
50 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
51 }
52
53 TEST(F32_PPMM_4X8__NEON, k_eq_1_strided_a) {
54 TEST_REQUIRES_ARM_NEON;
55 GemmMicrokernelTester()
56 .mr(4)
57 .nr(8)
58 .kr(1)
59 .sr(1)
60 .m(4)
61 .n(8)
62 .k(1)
63 .a_stride(3)
64 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
65 }
66
67 TEST(F32_PPMM_4X8__NEON, k_eq_1_subtile) {
68 TEST_REQUIRES_ARM_NEON;
69 for (uint32_t m = 1; m <= 4; m++) {
70 for (uint32_t n = 1; n <= 8; n++) {
71 GemmMicrokernelTester()
72 .mr(4)
73 .nr(8)
74 .kr(1)
75 .sr(1)
76 .m(m)
77 .n(n)
78 .k(1)
79 .iterations(1)
80 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
81 }
82 }
83 }
84
85 TEST(F32_PPMM_4X8__NEON, k_eq_1_subtile_m) {
86 TEST_REQUIRES_ARM_NEON;
87 for (uint32_t m = 1; m <= 4; m++) {
88 GemmMicrokernelTester()
89 .mr(4)
90 .nr(8)
91 .kr(1)
92 .sr(1)
93 .m(m)
94 .n(8)
95 .k(1)
96 .iterations(1)
97 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
98 }
99 }
100
101 TEST(F32_PPMM_4X8__NEON, k_eq_1_subtile_n) {
102 TEST_REQUIRES_ARM_NEON;
103 for (uint32_t n = 1; n <= 8; n++) {
104 GemmMicrokernelTester()
105 .mr(4)
106 .nr(8)
107 .kr(1)
108 .sr(1)
109 .m(4)
110 .n(n)
111 .k(1)
112 .iterations(1)
113 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
114 }
115 }
116
117 TEST(F32_PPMM_4X8__NEON, k_gt_1) {
118 TEST_REQUIRES_ARM_NEON;
119 for (size_t k = 2; k < 10; k++) {
120 GemmMicrokernelTester()
121 .mr(4)
122 .nr(8)
123 .kr(1)
124 .sr(1)
125 .m(4)
126 .n(8)
127 .k(k)
128 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
129 }
130 }
131
132 TEST(F32_PPMM_4X8__NEON, k_gt_1_subtile) {
133 TEST_REQUIRES_ARM_NEON;
134 for (size_t k = 2; k < 10; k++) {
135 for (uint32_t m = 1; m <= 4; m++) {
136 for (uint32_t n = 1; n <= 8; n++) {
137 GemmMicrokernelTester()
138 .mr(4)
139 .nr(8)
140 .kr(1)
141 .sr(1)
142 .m(m)
143 .n(n)
144 .k(k)
145 .iterations(1)
146 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
147 }
148 }
149 }
150 }
151
152 TEST(F32_PPMM_4X8__NEON, n_gt_8) {
153 TEST_REQUIRES_ARM_NEON;
154 for (uint32_t n = 9; n < 16; n++) {
155 for (size_t k = 1; k <= 5; k += 2) {
156 GemmMicrokernelTester()
157 .mr(4)
158 .nr(8)
159 .kr(1)
160 .sr(1)
161 .m(4)
162 .n(8)
163 .k(k)
164 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
165 }
166 }
167 }
168
169 TEST(F32_PPMM_4X8__NEON, n_gt_8_strided_cn) {
170 TEST_REQUIRES_ARM_NEON;
171 for (uint32_t n = 9; n < 16; n++) {
172 for (size_t k = 1; k <= 5; k += 2) {
173 GemmMicrokernelTester()
174 .mr(4)
175 .nr(8)
176 .kr(1)
177 .sr(1)
178 .m(4)
179 .n(8)
180 .k(k)
181 .cn_stride(11)
182 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
183 }
184 }
185 }
186
187 TEST(F32_PPMM_4X8__NEON, n_gt_8_strided_a) {
188 TEST_REQUIRES_ARM_NEON;
189 for (uint32_t n = 9; n < 16; n++) {
190 for (size_t k = 1; k <= 5; k += 2) {
191 GemmMicrokernelTester()
192 .mr(4)
193 .nr(8)
194 .kr(1)
195 .sr(1)
196 .m(4)
197 .n(n)
198 .k(k)
199 .a_stride(7)
200 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
201 }
202 }
203 }
204
205 TEST(F32_PPMM_4X8__NEON, n_gt_8_subtile) {
206 TEST_REQUIRES_ARM_NEON;
207 for (uint32_t n = 9; n < 16; n++) {
208 for (size_t k = 1; k <= 5; k += 2) {
209 for (uint32_t m = 1; m <= 4; m++) {
210 GemmMicrokernelTester()
211 .mr(4)
212 .nr(8)
213 .kr(1)
214 .sr(1)
215 .m(m)
216 .n(n)
217 .k(k)
218 .iterations(1)
219 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
220 }
221 }
222 }
223 }
224
225 TEST(F32_PPMM_4X8__NEON, n_div_8) {
226 TEST_REQUIRES_ARM_NEON;
227 for (uint32_t n = 16; n <= 24; n += 8) {
228 for (size_t k = 1; k <= 5; k += 2) {
229 GemmMicrokernelTester()
230 .mr(4)
231 .nr(8)
232 .kr(1)
233 .sr(1)
234 .m(4)
235 .n(8)
236 .k(k)
237 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
238 }
239 }
240 }
241
242 TEST(F32_PPMM_4X8__NEON, n_div_8_strided_cn) {
243 TEST_REQUIRES_ARM_NEON;
244 for (uint32_t n = 16; n <= 24; n += 8) {
245 for (size_t k = 1; k <= 5; k += 2) {
246 GemmMicrokernelTester()
247 .mr(4)
248 .nr(8)
249 .kr(1)
250 .sr(1)
251 .m(4)
252 .n(n)
253 .k(k)
254 .cn_stride(11)
255 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
256 }
257 }
258 }
259
260 TEST(F32_PPMM_4X8__NEON, n_div_8_strided_a) {
261 TEST_REQUIRES_ARM_NEON;
262 for (uint32_t n = 16; n <= 24; n += 8) {
263 for (size_t k = 1; k <= 5; k += 2) {
264 GemmMicrokernelTester()
265 .mr(4)
266 .nr(8)
267 .kr(1)
268 .sr(1)
269 .m(4)
270 .n(n)
271 .k(k)
272 .a_stride(7)
273 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
274 }
275 }
276 }
277
278 TEST(F32_PPMM_4X8__NEON, n_div_8_subtile) {
279 TEST_REQUIRES_ARM_NEON;
280 for (uint32_t n = 16; n <= 24; n += 8) {
281 for (size_t k = 1; k <= 5; k += 2) {
282 for (uint32_t m = 1; m <= 4; m++) {
283 GemmMicrokernelTester()
284 .mr(4)
285 .nr(8)
286 .kr(1)
287 .sr(1)
288 .m(m)
289 .n(n)
290 .k(k)
291 .iterations(1)
292 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
293 }
294 }
295 }
296 }
297
298 TEST(F32_PPMM_4X8__NEON, strided_cm_subtile) {
299 TEST_REQUIRES_ARM_NEON;
300 for (size_t k = 1; k <= 5; k += 2) {
301 for (uint32_t m = 1; m <= 4; m++) {
302 for (uint32_t n = 1; n <= 8; n++) {
303 GemmMicrokernelTester()
304 .mr(4)
305 .nr(8)
306 .kr(1)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .cm_stride(11)
312 .iterations(1)
313 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
314 }
315 }
316 }
317 }
318
319 TEST(F32_PPMM_4X8__NEON, qmin) {
320 TEST_REQUIRES_ARM_NEON;
321 GemmMicrokernelTester()
322 .mr(4)
323 .nr(8)
324 .kr(1)
325 .sr(1)
326 .m(4)
327 .n(8)
328 .k(1)
329 .qmin(128)
330 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
331 }
332
333 TEST(F32_PPMM_4X8__NEON, qmax) {
334 TEST_REQUIRES_ARM_NEON;
335 GemmMicrokernelTester()
336 .mr(4)
337 .nr(8)
338 .kr(1)
339 .sr(1)
340 .m(4)
341 .n(8)
342 .k(1)
343 .qmax(128)
344 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
345 }
346
347 TEST(F32_PPMM_4X8__NEON, strided_cm) {
348 TEST_REQUIRES_ARM_NEON;
349 GemmMicrokernelTester()
350 .mr(4)
351 .nr(8)
352 .kr(1)
353 .sr(1)
354 .m(4)
355 .n(8)
356 .k(1)
357 .cm_stride(11)
358 .Test(xnn_f32_ppmm_ukernel_4x8__neon);
359 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700360#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700361
362
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700363#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700364 TEST(F32_PPMM_4X8__NEONFMA, k_eq_1) {
365 TEST_REQUIRES_ARM_NEON_FMA;
366 GemmMicrokernelTester()
367 .mr(4)
368 .nr(8)
369 .kr(1)
370 .sr(1)
371 .m(4)
372 .n(8)
373 .k(1)
374 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
375 }
376
377 TEST(F32_PPMM_4X8__NEONFMA, strided_cn) {
378 TEST_REQUIRES_ARM_NEON_FMA;
379 GemmMicrokernelTester()
380 .mr(4)
381 .nr(8)
382 .kr(1)
383 .sr(1)
384 .m(4)
385 .n(8)
386 .k(1)
387 .cn_stride(11)
388 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
389 }
390
391 TEST(F32_PPMM_4X8__NEONFMA, k_eq_1_strided_a) {
392 TEST_REQUIRES_ARM_NEON_FMA;
393 GemmMicrokernelTester()
394 .mr(4)
395 .nr(8)
396 .kr(1)
397 .sr(1)
398 .m(4)
399 .n(8)
400 .k(1)
401 .a_stride(3)
402 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
403 }
404
405 TEST(F32_PPMM_4X8__NEONFMA, k_eq_1_subtile) {
406 TEST_REQUIRES_ARM_NEON_FMA;
407 for (uint32_t m = 1; m <= 4; m++) {
408 for (uint32_t n = 1; n <= 8; n++) {
409 GemmMicrokernelTester()
410 .mr(4)
411 .nr(8)
412 .kr(1)
413 .sr(1)
414 .m(m)
415 .n(n)
416 .k(1)
417 .iterations(1)
418 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
419 }
420 }
421 }
422
423 TEST(F32_PPMM_4X8__NEONFMA, k_eq_1_subtile_m) {
424 TEST_REQUIRES_ARM_NEON_FMA;
425 for (uint32_t m = 1; m <= 4; m++) {
426 GemmMicrokernelTester()
427 .mr(4)
428 .nr(8)
429 .kr(1)
430 .sr(1)
431 .m(m)
432 .n(8)
433 .k(1)
434 .iterations(1)
435 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
436 }
437 }
438
439 TEST(F32_PPMM_4X8__NEONFMA, k_eq_1_subtile_n) {
440 TEST_REQUIRES_ARM_NEON_FMA;
441 for (uint32_t n = 1; n <= 8; n++) {
442 GemmMicrokernelTester()
443 .mr(4)
444 .nr(8)
445 .kr(1)
446 .sr(1)
447 .m(4)
448 .n(n)
449 .k(1)
450 .iterations(1)
451 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
452 }
453 }
454
455 TEST(F32_PPMM_4X8__NEONFMA, k_gt_1) {
456 TEST_REQUIRES_ARM_NEON_FMA;
457 for (size_t k = 2; k < 10; k++) {
458 GemmMicrokernelTester()
459 .mr(4)
460 .nr(8)
461 .kr(1)
462 .sr(1)
463 .m(4)
464 .n(8)
465 .k(k)
466 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
467 }
468 }
469
470 TEST(F32_PPMM_4X8__NEONFMA, k_gt_1_subtile) {
471 TEST_REQUIRES_ARM_NEON_FMA;
472 for (size_t k = 2; k < 10; k++) {
473 for (uint32_t m = 1; m <= 4; m++) {
474 for (uint32_t n = 1; n <= 8; n++) {
475 GemmMicrokernelTester()
476 .mr(4)
477 .nr(8)
478 .kr(1)
479 .sr(1)
480 .m(m)
481 .n(n)
482 .k(k)
483 .iterations(1)
484 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
485 }
486 }
487 }
488 }
489
490 TEST(F32_PPMM_4X8__NEONFMA, n_gt_8) {
491 TEST_REQUIRES_ARM_NEON_FMA;
492 for (uint32_t n = 9; n < 16; n++) {
493 for (size_t k = 1; k <= 5; k += 2) {
494 GemmMicrokernelTester()
495 .mr(4)
496 .nr(8)
497 .kr(1)
498 .sr(1)
499 .m(4)
500 .n(8)
501 .k(k)
502 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
503 }
504 }
505 }
506
507 TEST(F32_PPMM_4X8__NEONFMA, n_gt_8_strided_cn) {
508 TEST_REQUIRES_ARM_NEON_FMA;
509 for (uint32_t n = 9; n < 16; n++) {
510 for (size_t k = 1; k <= 5; k += 2) {
511 GemmMicrokernelTester()
512 .mr(4)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(4)
517 .n(8)
518 .k(k)
519 .cn_stride(11)
520 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
521 }
522 }
523 }
524
525 TEST(F32_PPMM_4X8__NEONFMA, n_gt_8_strided_a) {
526 TEST_REQUIRES_ARM_NEON_FMA;
527 for (uint32_t n = 9; n < 16; n++) {
528 for (size_t k = 1; k <= 5; k += 2) {
529 GemmMicrokernelTester()
530 .mr(4)
531 .nr(8)
532 .kr(1)
533 .sr(1)
534 .m(4)
535 .n(n)
536 .k(k)
537 .a_stride(7)
538 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
539 }
540 }
541 }
542
543 TEST(F32_PPMM_4X8__NEONFMA, n_gt_8_subtile) {
544 TEST_REQUIRES_ARM_NEON_FMA;
545 for (uint32_t n = 9; n < 16; n++) {
546 for (size_t k = 1; k <= 5; k += 2) {
547 for (uint32_t m = 1; m <= 4; m++) {
548 GemmMicrokernelTester()
549 .mr(4)
550 .nr(8)
551 .kr(1)
552 .sr(1)
553 .m(m)
554 .n(n)
555 .k(k)
556 .iterations(1)
557 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
558 }
559 }
560 }
561 }
562
563 TEST(F32_PPMM_4X8__NEONFMA, n_div_8) {
564 TEST_REQUIRES_ARM_NEON_FMA;
565 for (uint32_t n = 16; n <= 24; n += 8) {
566 for (size_t k = 1; k <= 5; k += 2) {
567 GemmMicrokernelTester()
568 .mr(4)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(4)
573 .n(8)
574 .k(k)
575 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
576 }
577 }
578 }
579
580 TEST(F32_PPMM_4X8__NEONFMA, n_div_8_strided_cn) {
581 TEST_REQUIRES_ARM_NEON_FMA;
582 for (uint32_t n = 16; n <= 24; n += 8) {
583 for (size_t k = 1; k <= 5; k += 2) {
584 GemmMicrokernelTester()
585 .mr(4)
586 .nr(8)
587 .kr(1)
588 .sr(1)
589 .m(4)
590 .n(n)
591 .k(k)
592 .cn_stride(11)
593 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
594 }
595 }
596 }
597
598 TEST(F32_PPMM_4X8__NEONFMA, n_div_8_strided_a) {
599 TEST_REQUIRES_ARM_NEON_FMA;
600 for (uint32_t n = 16; n <= 24; n += 8) {
601 for (size_t k = 1; k <= 5; k += 2) {
602 GemmMicrokernelTester()
603 .mr(4)
604 .nr(8)
605 .kr(1)
606 .sr(1)
607 .m(4)
608 .n(n)
609 .k(k)
610 .a_stride(7)
611 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
612 }
613 }
614 }
615
616 TEST(F32_PPMM_4X8__NEONFMA, n_div_8_subtile) {
617 TEST_REQUIRES_ARM_NEON_FMA;
618 for (uint32_t n = 16; n <= 24; n += 8) {
619 for (size_t k = 1; k <= 5; k += 2) {
620 for (uint32_t m = 1; m <= 4; m++) {
621 GemmMicrokernelTester()
622 .mr(4)
623 .nr(8)
624 .kr(1)
625 .sr(1)
626 .m(m)
627 .n(n)
628 .k(k)
629 .iterations(1)
630 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
631 }
632 }
633 }
634 }
635
636 TEST(F32_PPMM_4X8__NEONFMA, strided_cm_subtile) {
637 TEST_REQUIRES_ARM_NEON_FMA;
638 for (size_t k = 1; k <= 5; k += 2) {
639 for (uint32_t m = 1; m <= 4; m++) {
640 for (uint32_t n = 1; n <= 8; n++) {
641 GemmMicrokernelTester()
642 .mr(4)
643 .nr(8)
644 .kr(1)
645 .sr(1)
646 .m(m)
647 .n(n)
648 .k(k)
649 .cm_stride(11)
650 .iterations(1)
651 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
652 }
653 }
654 }
655 }
656
657 TEST(F32_PPMM_4X8__NEONFMA, qmin) {
658 TEST_REQUIRES_ARM_NEON_FMA;
659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(8)
662 .kr(1)
663 .sr(1)
664 .m(4)
665 .n(8)
666 .k(1)
667 .qmin(128)
668 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
669 }
670
671 TEST(F32_PPMM_4X8__NEONFMA, qmax) {
672 TEST_REQUIRES_ARM_NEON_FMA;
673 GemmMicrokernelTester()
674 .mr(4)
675 .nr(8)
676 .kr(1)
677 .sr(1)
678 .m(4)
679 .n(8)
680 .k(1)
681 .qmax(128)
682 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
683 }
684
685 TEST(F32_PPMM_4X8__NEONFMA, strided_cm) {
686 TEST_REQUIRES_ARM_NEON_FMA;
687 GemmMicrokernelTester()
688 .mr(4)
689 .nr(8)
690 .kr(1)
691 .sr(1)
692 .m(4)
693 .n(8)
694 .k(1)
695 .cm_stride(11)
696 .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
697 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700698#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700699
700
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700701#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700702 TEST(F32_PPMM_8X8__NEON, k_eq_1) {
703 TEST_REQUIRES_ARM_NEON;
704 GemmMicrokernelTester()
705 .mr(8)
706 .nr(8)
707 .kr(1)
708 .sr(1)
709 .m(8)
710 .n(8)
711 .k(1)
712 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
713 }
714
715 TEST(F32_PPMM_8X8__NEON, strided_cn) {
716 TEST_REQUIRES_ARM_NEON;
717 GemmMicrokernelTester()
718 .mr(8)
719 .nr(8)
720 .kr(1)
721 .sr(1)
722 .m(8)
723 .n(8)
724 .k(1)
725 .cn_stride(11)
726 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
727 }
728
729 TEST(F32_PPMM_8X8__NEON, k_eq_1_strided_a) {
730 TEST_REQUIRES_ARM_NEON;
731 GemmMicrokernelTester()
732 .mr(8)
733 .nr(8)
734 .kr(1)
735 .sr(1)
736 .m(8)
737 .n(8)
738 .k(1)
739 .a_stride(3)
740 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
741 }
742
743 TEST(F32_PPMM_8X8__NEON, k_eq_1_subtile) {
744 TEST_REQUIRES_ARM_NEON;
745 for (uint32_t m = 1; m <= 8; m++) {
746 for (uint32_t n = 1; n <= 8; n++) {
747 GemmMicrokernelTester()
748 .mr(8)
749 .nr(8)
750 .kr(1)
751 .sr(1)
752 .m(m)
753 .n(n)
754 .k(1)
755 .iterations(1)
756 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
757 }
758 }
759 }
760
761 TEST(F32_PPMM_8X8__NEON, k_eq_1_subtile_m) {
762 TEST_REQUIRES_ARM_NEON;
763 for (uint32_t m = 1; m <= 8; m++) {
764 GemmMicrokernelTester()
765 .mr(8)
766 .nr(8)
767 .kr(1)
768 .sr(1)
769 .m(m)
770 .n(8)
771 .k(1)
772 .iterations(1)
773 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
774 }
775 }
776
777 TEST(F32_PPMM_8X8__NEON, k_eq_1_subtile_n) {
778 TEST_REQUIRES_ARM_NEON;
779 for (uint32_t n = 1; n <= 8; n++) {
780 GemmMicrokernelTester()
781 .mr(8)
782 .nr(8)
783 .kr(1)
784 .sr(1)
785 .m(8)
786 .n(n)
787 .k(1)
788 .iterations(1)
789 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
790 }
791 }
792
793 TEST(F32_PPMM_8X8__NEON, k_gt_1) {
794 TEST_REQUIRES_ARM_NEON;
795 for (size_t k = 2; k < 10; k++) {
796 GemmMicrokernelTester()
797 .mr(8)
798 .nr(8)
799 .kr(1)
800 .sr(1)
801 .m(8)
802 .n(8)
803 .k(k)
804 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
805 }
806 }
807
808 TEST(F32_PPMM_8X8__NEON, k_gt_1_subtile) {
809 TEST_REQUIRES_ARM_NEON;
810 for (size_t k = 2; k < 10; k++) {
811 for (uint32_t m = 1; m <= 8; m++) {
812 for (uint32_t n = 1; n <= 8; n++) {
813 GemmMicrokernelTester()
814 .mr(8)
815 .nr(8)
816 .kr(1)
817 .sr(1)
818 .m(m)
819 .n(n)
820 .k(k)
821 .iterations(1)
822 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
823 }
824 }
825 }
826 }
827
828 TEST(F32_PPMM_8X8__NEON, n_gt_8) {
829 TEST_REQUIRES_ARM_NEON;
830 for (uint32_t n = 9; n < 16; n++) {
831 for (size_t k = 1; k <= 5; k += 2) {
832 GemmMicrokernelTester()
833 .mr(8)
834 .nr(8)
835 .kr(1)
836 .sr(1)
837 .m(8)
838 .n(8)
839 .k(k)
840 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
841 }
842 }
843 }
844
845 TEST(F32_PPMM_8X8__NEON, n_gt_8_strided_cn) {
846 TEST_REQUIRES_ARM_NEON;
847 for (uint32_t n = 9; n < 16; n++) {
848 for (size_t k = 1; k <= 5; k += 2) {
849 GemmMicrokernelTester()
850 .mr(8)
851 .nr(8)
852 .kr(1)
853 .sr(1)
854 .m(8)
855 .n(8)
856 .k(k)
857 .cn_stride(11)
858 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
859 }
860 }
861 }
862
863 TEST(F32_PPMM_8X8__NEON, n_gt_8_strided_a) {
864 TEST_REQUIRES_ARM_NEON;
865 for (uint32_t n = 9; n < 16; n++) {
866 for (size_t k = 1; k <= 5; k += 2) {
867 GemmMicrokernelTester()
868 .mr(8)
869 .nr(8)
870 .kr(1)
871 .sr(1)
872 .m(8)
873 .n(n)
874 .k(k)
875 .a_stride(7)
876 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
877 }
878 }
879 }
880
881 TEST(F32_PPMM_8X8__NEON, n_gt_8_subtile) {
882 TEST_REQUIRES_ARM_NEON;
883 for (uint32_t n = 9; n < 16; n++) {
884 for (size_t k = 1; k <= 5; k += 2) {
885 for (uint32_t m = 1; m <= 8; m++) {
886 GemmMicrokernelTester()
887 .mr(8)
888 .nr(8)
889 .kr(1)
890 .sr(1)
891 .m(m)
892 .n(n)
893 .k(k)
894 .iterations(1)
895 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
896 }
897 }
898 }
899 }
900
901 TEST(F32_PPMM_8X8__NEON, n_div_8) {
902 TEST_REQUIRES_ARM_NEON;
903 for (uint32_t n = 16; n <= 24; n += 8) {
904 for (size_t k = 1; k <= 5; k += 2) {
905 GemmMicrokernelTester()
906 .mr(8)
907 .nr(8)
908 .kr(1)
909 .sr(1)
910 .m(8)
911 .n(8)
912 .k(k)
913 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
914 }
915 }
916 }
917
918 TEST(F32_PPMM_8X8__NEON, n_div_8_strided_cn) {
919 TEST_REQUIRES_ARM_NEON;
920 for (uint32_t n = 16; n <= 24; n += 8) {
921 for (size_t k = 1; k <= 5; k += 2) {
922 GemmMicrokernelTester()
923 .mr(8)
924 .nr(8)
925 .kr(1)
926 .sr(1)
927 .m(8)
928 .n(n)
929 .k(k)
930 .cn_stride(11)
931 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
932 }
933 }
934 }
935
936 TEST(F32_PPMM_8X8__NEON, n_div_8_strided_a) {
937 TEST_REQUIRES_ARM_NEON;
938 for (uint32_t n = 16; n <= 24; n += 8) {
939 for (size_t k = 1; k <= 5; k += 2) {
940 GemmMicrokernelTester()
941 .mr(8)
942 .nr(8)
943 .kr(1)
944 .sr(1)
945 .m(8)
946 .n(n)
947 .k(k)
948 .a_stride(7)
949 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
950 }
951 }
952 }
953
954 TEST(F32_PPMM_8X8__NEON, n_div_8_subtile) {
955 TEST_REQUIRES_ARM_NEON;
956 for (uint32_t n = 16; n <= 24; n += 8) {
957 for (size_t k = 1; k <= 5; k += 2) {
958 for (uint32_t m = 1; m <= 8; m++) {
959 GemmMicrokernelTester()
960 .mr(8)
961 .nr(8)
962 .kr(1)
963 .sr(1)
964 .m(m)
965 .n(n)
966 .k(k)
967 .iterations(1)
968 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
969 }
970 }
971 }
972 }
973
974 TEST(F32_PPMM_8X8__NEON, strided_cm_subtile) {
975 TEST_REQUIRES_ARM_NEON;
976 for (size_t k = 1; k <= 5; k += 2) {
977 for (uint32_t m = 1; m <= 8; m++) {
978 for (uint32_t n = 1; n <= 8; n++) {
979 GemmMicrokernelTester()
980 .mr(8)
981 .nr(8)
982 .kr(1)
983 .sr(1)
984 .m(m)
985 .n(n)
986 .k(k)
987 .cm_stride(11)
988 .iterations(1)
989 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
990 }
991 }
992 }
993 }
994
995 TEST(F32_PPMM_8X8__NEON, qmin) {
996 TEST_REQUIRES_ARM_NEON;
997 GemmMicrokernelTester()
998 .mr(8)
999 .nr(8)
1000 .kr(1)
1001 .sr(1)
1002 .m(8)
1003 .n(8)
1004 .k(1)
1005 .qmin(128)
1006 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
1007 }
1008
1009 TEST(F32_PPMM_8X8__NEON, qmax) {
1010 TEST_REQUIRES_ARM_NEON;
1011 GemmMicrokernelTester()
1012 .mr(8)
1013 .nr(8)
1014 .kr(1)
1015 .sr(1)
1016 .m(8)
1017 .n(8)
1018 .k(1)
1019 .qmax(128)
1020 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
1021 }
1022
1023 TEST(F32_PPMM_8X8__NEON, strided_cm) {
1024 TEST_REQUIRES_ARM_NEON;
1025 GemmMicrokernelTester()
1026 .mr(8)
1027 .nr(8)
1028 .kr(1)
1029 .sr(1)
1030 .m(8)
1031 .n(8)
1032 .k(1)
1033 .cm_stride(11)
1034 .Test(xnn_f32_ppmm_ukernel_8x8__neon);
1035 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001036#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001037
1038
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001039#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001040 TEST(F32_PPMM_8X8__NEONFMA, k_eq_1) {
1041 TEST_REQUIRES_ARM_NEON_FMA;
1042 GemmMicrokernelTester()
1043 .mr(8)
1044 .nr(8)
1045 .kr(1)
1046 .sr(1)
1047 .m(8)
1048 .n(8)
1049 .k(1)
1050 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1051 }
1052
1053 TEST(F32_PPMM_8X8__NEONFMA, strided_cn) {
1054 TEST_REQUIRES_ARM_NEON_FMA;
1055 GemmMicrokernelTester()
1056 .mr(8)
1057 .nr(8)
1058 .kr(1)
1059 .sr(1)
1060 .m(8)
1061 .n(8)
1062 .k(1)
1063 .cn_stride(11)
1064 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1065 }
1066
1067 TEST(F32_PPMM_8X8__NEONFMA, k_eq_1_strided_a) {
1068 TEST_REQUIRES_ARM_NEON_FMA;
1069 GemmMicrokernelTester()
1070 .mr(8)
1071 .nr(8)
1072 .kr(1)
1073 .sr(1)
1074 .m(8)
1075 .n(8)
1076 .k(1)
1077 .a_stride(3)
1078 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1079 }
1080
1081 TEST(F32_PPMM_8X8__NEONFMA, k_eq_1_subtile) {
1082 TEST_REQUIRES_ARM_NEON_FMA;
1083 for (uint32_t m = 1; m <= 8; m++) {
1084 for (uint32_t n = 1; n <= 8; n++) {
1085 GemmMicrokernelTester()
1086 .mr(8)
1087 .nr(8)
1088 .kr(1)
1089 .sr(1)
1090 .m(m)
1091 .n(n)
1092 .k(1)
1093 .iterations(1)
1094 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1095 }
1096 }
1097 }
1098
1099 TEST(F32_PPMM_8X8__NEONFMA, k_eq_1_subtile_m) {
1100 TEST_REQUIRES_ARM_NEON_FMA;
1101 for (uint32_t m = 1; m <= 8; m++) {
1102 GemmMicrokernelTester()
1103 .mr(8)
1104 .nr(8)
1105 .kr(1)
1106 .sr(1)
1107 .m(m)
1108 .n(8)
1109 .k(1)
1110 .iterations(1)
1111 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1112 }
1113 }
1114
1115 TEST(F32_PPMM_8X8__NEONFMA, k_eq_1_subtile_n) {
1116 TEST_REQUIRES_ARM_NEON_FMA;
1117 for (uint32_t n = 1; n <= 8; n++) {
1118 GemmMicrokernelTester()
1119 .mr(8)
1120 .nr(8)
1121 .kr(1)
1122 .sr(1)
1123 .m(8)
1124 .n(n)
1125 .k(1)
1126 .iterations(1)
1127 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1128 }
1129 }
1130
1131 TEST(F32_PPMM_8X8__NEONFMA, k_gt_1) {
1132 TEST_REQUIRES_ARM_NEON_FMA;
1133 for (size_t k = 2; k < 10; k++) {
1134 GemmMicrokernelTester()
1135 .mr(8)
1136 .nr(8)
1137 .kr(1)
1138 .sr(1)
1139 .m(8)
1140 .n(8)
1141 .k(k)
1142 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1143 }
1144 }
1145
1146 TEST(F32_PPMM_8X8__NEONFMA, k_gt_1_subtile) {
1147 TEST_REQUIRES_ARM_NEON_FMA;
1148 for (size_t k = 2; k < 10; k++) {
1149 for (uint32_t m = 1; m <= 8; m++) {
1150 for (uint32_t n = 1; n <= 8; n++) {
1151 GemmMicrokernelTester()
1152 .mr(8)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(m)
1157 .n(n)
1158 .k(k)
1159 .iterations(1)
1160 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1161 }
1162 }
1163 }
1164 }
1165
1166 TEST(F32_PPMM_8X8__NEONFMA, n_gt_8) {
1167 TEST_REQUIRES_ARM_NEON_FMA;
1168 for (uint32_t n = 9; n < 16; n++) {
1169 for (size_t k = 1; k <= 5; k += 2) {
1170 GemmMicrokernelTester()
1171 .mr(8)
1172 .nr(8)
1173 .kr(1)
1174 .sr(1)
1175 .m(8)
1176 .n(8)
1177 .k(k)
1178 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1179 }
1180 }
1181 }
1182
1183 TEST(F32_PPMM_8X8__NEONFMA, n_gt_8_strided_cn) {
1184 TEST_REQUIRES_ARM_NEON_FMA;
1185 for (uint32_t n = 9; n < 16; n++) {
1186 for (size_t k = 1; k <= 5; k += 2) {
1187 GemmMicrokernelTester()
1188 .mr(8)
1189 .nr(8)
1190 .kr(1)
1191 .sr(1)
1192 .m(8)
1193 .n(8)
1194 .k(k)
1195 .cn_stride(11)
1196 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1197 }
1198 }
1199 }
1200
1201 TEST(F32_PPMM_8X8__NEONFMA, n_gt_8_strided_a) {
1202 TEST_REQUIRES_ARM_NEON_FMA;
1203 for (uint32_t n = 9; n < 16; n++) {
1204 for (size_t k = 1; k <= 5; k += 2) {
1205 GemmMicrokernelTester()
1206 .mr(8)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(8)
1211 .n(n)
1212 .k(k)
1213 .a_stride(7)
1214 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1215 }
1216 }
1217 }
1218
1219 TEST(F32_PPMM_8X8__NEONFMA, n_gt_8_subtile) {
1220 TEST_REQUIRES_ARM_NEON_FMA;
1221 for (uint32_t n = 9; n < 16; n++) {
1222 for (size_t k = 1; k <= 5; k += 2) {
1223 for (uint32_t m = 1; m <= 8; m++) {
1224 GemmMicrokernelTester()
1225 .mr(8)
1226 .nr(8)
1227 .kr(1)
1228 .sr(1)
1229 .m(m)
1230 .n(n)
1231 .k(k)
1232 .iterations(1)
1233 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1234 }
1235 }
1236 }
1237 }
1238
1239 TEST(F32_PPMM_8X8__NEONFMA, n_div_8) {
1240 TEST_REQUIRES_ARM_NEON_FMA;
1241 for (uint32_t n = 16; n <= 24; n += 8) {
1242 for (size_t k = 1; k <= 5; k += 2) {
1243 GemmMicrokernelTester()
1244 .mr(8)
1245 .nr(8)
1246 .kr(1)
1247 .sr(1)
1248 .m(8)
1249 .n(8)
1250 .k(k)
1251 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1252 }
1253 }
1254 }
1255
1256 TEST(F32_PPMM_8X8__NEONFMA, n_div_8_strided_cn) {
1257 TEST_REQUIRES_ARM_NEON_FMA;
1258 for (uint32_t n = 16; n <= 24; n += 8) {
1259 for (size_t k = 1; k <= 5; k += 2) {
1260 GemmMicrokernelTester()
1261 .mr(8)
1262 .nr(8)
1263 .kr(1)
1264 .sr(1)
1265 .m(8)
1266 .n(n)
1267 .k(k)
1268 .cn_stride(11)
1269 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1270 }
1271 }
1272 }
1273
1274 TEST(F32_PPMM_8X8__NEONFMA, n_div_8_strided_a) {
1275 TEST_REQUIRES_ARM_NEON_FMA;
1276 for (uint32_t n = 16; n <= 24; n += 8) {
1277 for (size_t k = 1; k <= 5; k += 2) {
1278 GemmMicrokernelTester()
1279 .mr(8)
1280 .nr(8)
1281 .kr(1)
1282 .sr(1)
1283 .m(8)
1284 .n(n)
1285 .k(k)
1286 .a_stride(7)
1287 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1288 }
1289 }
1290 }
1291
1292 TEST(F32_PPMM_8X8__NEONFMA, n_div_8_subtile) {
1293 TEST_REQUIRES_ARM_NEON_FMA;
1294 for (uint32_t n = 16; n <= 24; n += 8) {
1295 for (size_t k = 1; k <= 5; k += 2) {
1296 for (uint32_t m = 1; m <= 8; m++) {
1297 GemmMicrokernelTester()
1298 .mr(8)
1299 .nr(8)
1300 .kr(1)
1301 .sr(1)
1302 .m(m)
1303 .n(n)
1304 .k(k)
1305 .iterations(1)
1306 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1307 }
1308 }
1309 }
1310 }
1311
1312 TEST(F32_PPMM_8X8__NEONFMA, strided_cm_subtile) {
1313 TEST_REQUIRES_ARM_NEON_FMA;
1314 for (size_t k = 1; k <= 5; k += 2) {
1315 for (uint32_t m = 1; m <= 8; m++) {
1316 for (uint32_t n = 1; n <= 8; n++) {
1317 GemmMicrokernelTester()
1318 .mr(8)
1319 .nr(8)
1320 .kr(1)
1321 .sr(1)
1322 .m(m)
1323 .n(n)
1324 .k(k)
1325 .cm_stride(11)
1326 .iterations(1)
1327 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1328 }
1329 }
1330 }
1331 }
1332
1333 TEST(F32_PPMM_8X8__NEONFMA, qmin) {
1334 TEST_REQUIRES_ARM_NEON_FMA;
1335 GemmMicrokernelTester()
1336 .mr(8)
1337 .nr(8)
1338 .kr(1)
1339 .sr(1)
1340 .m(8)
1341 .n(8)
1342 .k(1)
1343 .qmin(128)
1344 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1345 }
1346
1347 TEST(F32_PPMM_8X8__NEONFMA, qmax) {
1348 TEST_REQUIRES_ARM_NEON_FMA;
1349 GemmMicrokernelTester()
1350 .mr(8)
1351 .nr(8)
1352 .kr(1)
1353 .sr(1)
1354 .m(8)
1355 .n(8)
1356 .k(1)
1357 .qmax(128)
1358 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1359 }
1360
1361 TEST(F32_PPMM_8X8__NEONFMA, strided_cm) {
1362 TEST_REQUIRES_ARM_NEON_FMA;
1363 GemmMicrokernelTester()
1364 .mr(8)
1365 .nr(8)
1366 .kr(1)
1367 .sr(1)
1368 .m(8)
1369 .n(8)
1370 .k(1)
1371 .cm_stride(11)
1372 .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
1373 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001374#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001375
1376
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001377#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001378 TEST(F32_PPMM_4X8__SSE, k_eq_1) {
1379 TEST_REQUIRES_X86_SSE;
1380 GemmMicrokernelTester()
1381 .mr(4)
1382 .nr(8)
1383 .kr(1)
1384 .sr(1)
1385 .m(4)
1386 .n(8)
1387 .k(1)
1388 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1389 }
1390
1391 TEST(F32_PPMM_4X8__SSE, strided_cn) {
1392 TEST_REQUIRES_X86_SSE;
1393 GemmMicrokernelTester()
1394 .mr(4)
1395 .nr(8)
1396 .kr(1)
1397 .sr(1)
1398 .m(4)
1399 .n(8)
1400 .k(1)
1401 .cn_stride(11)
1402 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1403 }
1404
1405 TEST(F32_PPMM_4X8__SSE, k_eq_1_strided_a) {
1406 TEST_REQUIRES_X86_SSE;
1407 GemmMicrokernelTester()
1408 .mr(4)
1409 .nr(8)
1410 .kr(1)
1411 .sr(1)
1412 .m(4)
1413 .n(8)
1414 .k(1)
1415 .a_stride(3)
1416 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1417 }
1418
1419 TEST(F32_PPMM_4X8__SSE, k_eq_1_subtile) {
1420 TEST_REQUIRES_X86_SSE;
1421 for (uint32_t m = 1; m <= 4; m++) {
1422 for (uint32_t n = 1; n <= 8; n++) {
1423 GemmMicrokernelTester()
1424 .mr(4)
1425 .nr(8)
1426 .kr(1)
1427 .sr(1)
1428 .m(m)
1429 .n(n)
1430 .k(1)
1431 .iterations(1)
1432 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1433 }
1434 }
1435 }
1436
1437 TEST(F32_PPMM_4X8__SSE, k_eq_1_subtile_m) {
1438 TEST_REQUIRES_X86_SSE;
1439 for (uint32_t m = 1; m <= 4; m++) {
1440 GemmMicrokernelTester()
1441 .mr(4)
1442 .nr(8)
1443 .kr(1)
1444 .sr(1)
1445 .m(m)
1446 .n(8)
1447 .k(1)
1448 .iterations(1)
1449 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1450 }
1451 }
1452
1453 TEST(F32_PPMM_4X8__SSE, k_eq_1_subtile_n) {
1454 TEST_REQUIRES_X86_SSE;
1455 for (uint32_t n = 1; n <= 8; n++) {
1456 GemmMicrokernelTester()
1457 .mr(4)
1458 .nr(8)
1459 .kr(1)
1460 .sr(1)
1461 .m(4)
1462 .n(n)
1463 .k(1)
1464 .iterations(1)
1465 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1466 }
1467 }
1468
1469 TEST(F32_PPMM_4X8__SSE, k_gt_1) {
1470 TEST_REQUIRES_X86_SSE;
1471 for (size_t k = 2; k < 10; k++) {
1472 GemmMicrokernelTester()
1473 .mr(4)
1474 .nr(8)
1475 .kr(1)
1476 .sr(1)
1477 .m(4)
1478 .n(8)
1479 .k(k)
1480 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1481 }
1482 }
1483
1484 TEST(F32_PPMM_4X8__SSE, k_gt_1_subtile) {
1485 TEST_REQUIRES_X86_SSE;
1486 for (size_t k = 2; k < 10; k++) {
1487 for (uint32_t m = 1; m <= 4; m++) {
1488 for (uint32_t n = 1; n <= 8; n++) {
1489 GemmMicrokernelTester()
1490 .mr(4)
1491 .nr(8)
1492 .kr(1)
1493 .sr(1)
1494 .m(m)
1495 .n(n)
1496 .k(k)
1497 .iterations(1)
1498 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1499 }
1500 }
1501 }
1502 }
1503
1504 TEST(F32_PPMM_4X8__SSE, n_gt_8) {
1505 TEST_REQUIRES_X86_SSE;
1506 for (uint32_t n = 9; n < 16; n++) {
1507 for (size_t k = 1; k <= 5; k += 2) {
1508 GemmMicrokernelTester()
1509 .mr(4)
1510 .nr(8)
1511 .kr(1)
1512 .sr(1)
1513 .m(4)
1514 .n(8)
1515 .k(k)
1516 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1517 }
1518 }
1519 }
1520
1521 TEST(F32_PPMM_4X8__SSE, n_gt_8_strided_cn) {
1522 TEST_REQUIRES_X86_SSE;
1523 for (uint32_t n = 9; n < 16; n++) {
1524 for (size_t k = 1; k <= 5; k += 2) {
1525 GemmMicrokernelTester()
1526 .mr(4)
1527 .nr(8)
1528 .kr(1)
1529 .sr(1)
1530 .m(4)
1531 .n(8)
1532 .k(k)
1533 .cn_stride(11)
1534 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1535 }
1536 }
1537 }
1538
1539 TEST(F32_PPMM_4X8__SSE, n_gt_8_strided_a) {
1540 TEST_REQUIRES_X86_SSE;
1541 for (uint32_t n = 9; n < 16; n++) {
1542 for (size_t k = 1; k <= 5; k += 2) {
1543 GemmMicrokernelTester()
1544 .mr(4)
1545 .nr(8)
1546 .kr(1)
1547 .sr(1)
1548 .m(4)
1549 .n(n)
1550 .k(k)
1551 .a_stride(7)
1552 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1553 }
1554 }
1555 }
1556
1557 TEST(F32_PPMM_4X8__SSE, n_gt_8_subtile) {
1558 TEST_REQUIRES_X86_SSE;
1559 for (uint32_t n = 9; n < 16; n++) {
1560 for (size_t k = 1; k <= 5; k += 2) {
1561 for (uint32_t m = 1; m <= 4; m++) {
1562 GemmMicrokernelTester()
1563 .mr(4)
1564 .nr(8)
1565 .kr(1)
1566 .sr(1)
1567 .m(m)
1568 .n(n)
1569 .k(k)
1570 .iterations(1)
1571 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1572 }
1573 }
1574 }
1575 }
1576
1577 TEST(F32_PPMM_4X8__SSE, n_div_8) {
1578 TEST_REQUIRES_X86_SSE;
1579 for (uint32_t n = 16; n <= 24; n += 8) {
1580 for (size_t k = 1; k <= 5; k += 2) {
1581 GemmMicrokernelTester()
1582 .mr(4)
1583 .nr(8)
1584 .kr(1)
1585 .sr(1)
1586 .m(4)
1587 .n(8)
1588 .k(k)
1589 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1590 }
1591 }
1592 }
1593
1594 TEST(F32_PPMM_4X8__SSE, n_div_8_strided_cn) {
1595 TEST_REQUIRES_X86_SSE;
1596 for (uint32_t n = 16; n <= 24; n += 8) {
1597 for (size_t k = 1; k <= 5; k += 2) {
1598 GemmMicrokernelTester()
1599 .mr(4)
1600 .nr(8)
1601 .kr(1)
1602 .sr(1)
1603 .m(4)
1604 .n(n)
1605 .k(k)
1606 .cn_stride(11)
1607 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1608 }
1609 }
1610 }
1611
1612 TEST(F32_PPMM_4X8__SSE, n_div_8_strided_a) {
1613 TEST_REQUIRES_X86_SSE;
1614 for (uint32_t n = 16; n <= 24; n += 8) {
1615 for (size_t k = 1; k <= 5; k += 2) {
1616 GemmMicrokernelTester()
1617 .mr(4)
1618 .nr(8)
1619 .kr(1)
1620 .sr(1)
1621 .m(4)
1622 .n(n)
1623 .k(k)
1624 .a_stride(7)
1625 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1626 }
1627 }
1628 }
1629
1630 TEST(F32_PPMM_4X8__SSE, n_div_8_subtile) {
1631 TEST_REQUIRES_X86_SSE;
1632 for (uint32_t n = 16; n <= 24; n += 8) {
1633 for (size_t k = 1; k <= 5; k += 2) {
1634 for (uint32_t m = 1; m <= 4; m++) {
1635 GemmMicrokernelTester()
1636 .mr(4)
1637 .nr(8)
1638 .kr(1)
1639 .sr(1)
1640 .m(m)
1641 .n(n)
1642 .k(k)
1643 .iterations(1)
1644 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1645 }
1646 }
1647 }
1648 }
1649
1650 TEST(F32_PPMM_4X8__SSE, strided_cm_subtile) {
1651 TEST_REQUIRES_X86_SSE;
1652 for (size_t k = 1; k <= 5; k += 2) {
1653 for (uint32_t m = 1; m <= 4; m++) {
1654 for (uint32_t n = 1; n <= 8; n++) {
1655 GemmMicrokernelTester()
1656 .mr(4)
1657 .nr(8)
1658 .kr(1)
1659 .sr(1)
1660 .m(m)
1661 .n(n)
1662 .k(k)
1663 .cm_stride(11)
1664 .iterations(1)
1665 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1666 }
1667 }
1668 }
1669 }
1670
1671 TEST(F32_PPMM_4X8__SSE, qmin) {
1672 TEST_REQUIRES_X86_SSE;
1673 GemmMicrokernelTester()
1674 .mr(4)
1675 .nr(8)
1676 .kr(1)
1677 .sr(1)
1678 .m(4)
1679 .n(8)
1680 .k(1)
1681 .qmin(128)
1682 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1683 }
1684
1685 TEST(F32_PPMM_4X8__SSE, qmax) {
1686 TEST_REQUIRES_X86_SSE;
1687 GemmMicrokernelTester()
1688 .mr(4)
1689 .nr(8)
1690 .kr(1)
1691 .sr(1)
1692 .m(4)
1693 .n(8)
1694 .k(1)
1695 .qmax(128)
1696 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1697 }
1698
1699 TEST(F32_PPMM_4X8__SSE, strided_cm) {
1700 TEST_REQUIRES_X86_SSE;
1701 GemmMicrokernelTester()
1702 .mr(4)
1703 .nr(8)
1704 .kr(1)
1705 .sr(1)
1706 .m(4)
1707 .n(8)
1708 .k(1)
1709 .cm_stride(11)
1710 .Test(xnn_f32_ppmm_ukernel_4x8__sse);
1711 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001712#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001713
1714
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001715#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
XNNPACK Teamb455b122019-09-27 18:10:33 -07001716 TEST(F32_PPMM_4X8__PSIMD, k_eq_1) {
1717 TEST_REQUIRES_PSIMD;
1718 GemmMicrokernelTester()
1719 .mr(4)
1720 .nr(8)
1721 .kr(1)
1722 .sr(1)
1723 .m(4)
1724 .n(8)
1725 .k(1)
1726 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1727 }
1728
1729 TEST(F32_PPMM_4X8__PSIMD, strided_cn) {
1730 TEST_REQUIRES_PSIMD;
1731 GemmMicrokernelTester()
1732 .mr(4)
1733 .nr(8)
1734 .kr(1)
1735 .sr(1)
1736 .m(4)
1737 .n(8)
1738 .k(1)
1739 .cn_stride(11)
1740 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1741 }
1742
1743 TEST(F32_PPMM_4X8__PSIMD, k_eq_1_strided_a) {
1744 TEST_REQUIRES_PSIMD;
1745 GemmMicrokernelTester()
1746 .mr(4)
1747 .nr(8)
1748 .kr(1)
1749 .sr(1)
1750 .m(4)
1751 .n(8)
1752 .k(1)
1753 .a_stride(3)
1754 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1755 }
1756
1757 TEST(F32_PPMM_4X8__PSIMD, k_eq_1_subtile) {
1758 TEST_REQUIRES_PSIMD;
1759 for (uint32_t m = 1; m <= 4; m++) {
1760 for (uint32_t n = 1; n <= 8; n++) {
1761 GemmMicrokernelTester()
1762 .mr(4)
1763 .nr(8)
1764 .kr(1)
1765 .sr(1)
1766 .m(m)
1767 .n(n)
1768 .k(1)
1769 .iterations(1)
1770 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1771 }
1772 }
1773 }
1774
1775 TEST(F32_PPMM_4X8__PSIMD, k_eq_1_subtile_m) {
1776 TEST_REQUIRES_PSIMD;
1777 for (uint32_t m = 1; m <= 4; m++) {
1778 GemmMicrokernelTester()
1779 .mr(4)
1780 .nr(8)
1781 .kr(1)
1782 .sr(1)
1783 .m(m)
1784 .n(8)
1785 .k(1)
1786 .iterations(1)
1787 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1788 }
1789 }
1790
1791 TEST(F32_PPMM_4X8__PSIMD, k_eq_1_subtile_n) {
1792 TEST_REQUIRES_PSIMD;
1793 for (uint32_t n = 1; n <= 8; n++) {
1794 GemmMicrokernelTester()
1795 .mr(4)
1796 .nr(8)
1797 .kr(1)
1798 .sr(1)
1799 .m(4)
1800 .n(n)
1801 .k(1)
1802 .iterations(1)
1803 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1804 }
1805 }
1806
1807 TEST(F32_PPMM_4X8__PSIMD, k_gt_1) {
1808 TEST_REQUIRES_PSIMD;
1809 for (size_t k = 2; k < 10; k++) {
1810 GemmMicrokernelTester()
1811 .mr(4)
1812 .nr(8)
1813 .kr(1)
1814 .sr(1)
1815 .m(4)
1816 .n(8)
1817 .k(k)
1818 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1819 }
1820 }
1821
1822 TEST(F32_PPMM_4X8__PSIMD, k_gt_1_subtile) {
1823 TEST_REQUIRES_PSIMD;
1824 for (size_t k = 2; k < 10; k++) {
1825 for (uint32_t m = 1; m <= 4; m++) {
1826 for (uint32_t n = 1; n <= 8; n++) {
1827 GemmMicrokernelTester()
1828 .mr(4)
1829 .nr(8)
1830 .kr(1)
1831 .sr(1)
1832 .m(m)
1833 .n(n)
1834 .k(k)
1835 .iterations(1)
1836 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1837 }
1838 }
1839 }
1840 }
1841
1842 TEST(F32_PPMM_4X8__PSIMD, n_gt_8) {
1843 TEST_REQUIRES_PSIMD;
1844 for (uint32_t n = 9; n < 16; n++) {
1845 for (size_t k = 1; k <= 5; k += 2) {
1846 GemmMicrokernelTester()
1847 .mr(4)
1848 .nr(8)
1849 .kr(1)
1850 .sr(1)
1851 .m(4)
1852 .n(8)
1853 .k(k)
1854 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1855 }
1856 }
1857 }
1858
1859 TEST(F32_PPMM_4X8__PSIMD, n_gt_8_strided_cn) {
1860 TEST_REQUIRES_PSIMD;
1861 for (uint32_t n = 9; n < 16; n++) {
1862 for (size_t k = 1; k <= 5; k += 2) {
1863 GemmMicrokernelTester()
1864 .mr(4)
1865 .nr(8)
1866 .kr(1)
1867 .sr(1)
1868 .m(4)
1869 .n(8)
1870 .k(k)
1871 .cn_stride(11)
1872 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1873 }
1874 }
1875 }
1876
1877 TEST(F32_PPMM_4X8__PSIMD, n_gt_8_strided_a) {
1878 TEST_REQUIRES_PSIMD;
1879 for (uint32_t n = 9; n < 16; n++) {
1880 for (size_t k = 1; k <= 5; k += 2) {
1881 GemmMicrokernelTester()
1882 .mr(4)
1883 .nr(8)
1884 .kr(1)
1885 .sr(1)
1886 .m(4)
1887 .n(n)
1888 .k(k)
1889 .a_stride(7)
1890 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1891 }
1892 }
1893 }
1894
1895 TEST(F32_PPMM_4X8__PSIMD, n_gt_8_subtile) {
1896 TEST_REQUIRES_PSIMD;
1897 for (uint32_t n = 9; n < 16; n++) {
1898 for (size_t k = 1; k <= 5; k += 2) {
1899 for (uint32_t m = 1; m <= 4; m++) {
1900 GemmMicrokernelTester()
1901 .mr(4)
1902 .nr(8)
1903 .kr(1)
1904 .sr(1)
1905 .m(m)
1906 .n(n)
1907 .k(k)
1908 .iterations(1)
1909 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1910 }
1911 }
1912 }
1913 }
1914
1915 TEST(F32_PPMM_4X8__PSIMD, n_div_8) {
1916 TEST_REQUIRES_PSIMD;
1917 for (uint32_t n = 16; n <= 24; n += 8) {
1918 for (size_t k = 1; k <= 5; k += 2) {
1919 GemmMicrokernelTester()
1920 .mr(4)
1921 .nr(8)
1922 .kr(1)
1923 .sr(1)
1924 .m(4)
1925 .n(8)
1926 .k(k)
1927 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1928 }
1929 }
1930 }
1931
1932 TEST(F32_PPMM_4X8__PSIMD, n_div_8_strided_cn) {
1933 TEST_REQUIRES_PSIMD;
1934 for (uint32_t n = 16; n <= 24; n += 8) {
1935 for (size_t k = 1; k <= 5; k += 2) {
1936 GemmMicrokernelTester()
1937 .mr(4)
1938 .nr(8)
1939 .kr(1)
1940 .sr(1)
1941 .m(4)
1942 .n(n)
1943 .k(k)
1944 .cn_stride(11)
1945 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1946 }
1947 }
1948 }
1949
1950 TEST(F32_PPMM_4X8__PSIMD, n_div_8_strided_a) {
1951 TEST_REQUIRES_PSIMD;
1952 for (uint32_t n = 16; n <= 24; n += 8) {
1953 for (size_t k = 1; k <= 5; k += 2) {
1954 GemmMicrokernelTester()
1955 .mr(4)
1956 .nr(8)
1957 .kr(1)
1958 .sr(1)
1959 .m(4)
1960 .n(n)
1961 .k(k)
1962 .a_stride(7)
1963 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1964 }
1965 }
1966 }
1967
1968 TEST(F32_PPMM_4X8__PSIMD, n_div_8_subtile) {
1969 TEST_REQUIRES_PSIMD;
1970 for (uint32_t n = 16; n <= 24; n += 8) {
1971 for (size_t k = 1; k <= 5; k += 2) {
1972 for (uint32_t m = 1; m <= 4; m++) {
1973 GemmMicrokernelTester()
1974 .mr(4)
1975 .nr(8)
1976 .kr(1)
1977 .sr(1)
1978 .m(m)
1979 .n(n)
1980 .k(k)
1981 .iterations(1)
1982 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
1983 }
1984 }
1985 }
1986 }
1987
1988 TEST(F32_PPMM_4X8__PSIMD, strided_cm_subtile) {
1989 TEST_REQUIRES_PSIMD;
1990 for (size_t k = 1; k <= 5; k += 2) {
1991 for (uint32_t m = 1; m <= 4; m++) {
1992 for (uint32_t n = 1; n <= 8; n++) {
1993 GemmMicrokernelTester()
1994 .mr(4)
1995 .nr(8)
1996 .kr(1)
1997 .sr(1)
1998 .m(m)
1999 .n(n)
2000 .k(k)
2001 .cm_stride(11)
2002 .iterations(1)
2003 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
2004 }
2005 }
2006 }
2007 }
2008
2009 TEST(F32_PPMM_4X8__PSIMD, qmin) {
2010 TEST_REQUIRES_PSIMD;
2011 GemmMicrokernelTester()
2012 .mr(4)
2013 .nr(8)
2014 .kr(1)
2015 .sr(1)
2016 .m(4)
2017 .n(8)
2018 .k(1)
2019 .qmin(128)
2020 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
2021 }
2022
2023 TEST(F32_PPMM_4X8__PSIMD, qmax) {
2024 TEST_REQUIRES_PSIMD;
2025 GemmMicrokernelTester()
2026 .mr(4)
2027 .nr(8)
2028 .kr(1)
2029 .sr(1)
2030 .m(4)
2031 .n(8)
2032 .k(1)
2033 .qmax(128)
2034 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
2035 }
2036
2037 TEST(F32_PPMM_4X8__PSIMD, strided_cm) {
2038 TEST_REQUIRES_PSIMD;
2039 GemmMicrokernelTester()
2040 .mr(4)
2041 .nr(8)
2042 .kr(1)
2043 .sr(1)
2044 .m(4)
2045 .n(8)
2046 .k(1)
2047 .cm_stride(11)
2048 .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
2049 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002050#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
XNNPACK Teamb455b122019-09-27 18:10:33 -07002051
2052
2053TEST(F32_PPMM_4X2__SCALAR, k_eq_1) {
2054 GemmMicrokernelTester()
2055 .mr(4)
2056 .nr(2)
2057 .kr(1)
2058 .sr(1)
2059 .m(4)
2060 .n(2)
2061 .k(1)
2062 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2063}
2064
2065TEST(F32_PPMM_4X2__SCALAR, strided_cn) {
2066 GemmMicrokernelTester()
2067 .mr(4)
2068 .nr(2)
2069 .kr(1)
2070 .sr(1)
2071 .m(4)
2072 .n(2)
2073 .k(1)
2074 .cn_stride(5)
2075 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2076}
2077
2078TEST(F32_PPMM_4X2__SCALAR, k_eq_1_strided_a) {
2079 GemmMicrokernelTester()
2080 .mr(4)
2081 .nr(2)
2082 .kr(1)
2083 .sr(1)
2084 .m(4)
2085 .n(2)
2086 .k(1)
2087 .a_stride(3)
2088 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2089}
2090
2091TEST(F32_PPMM_4X2__SCALAR, k_eq_1_subtile) {
2092 for (uint32_t m = 1; m <= 4; m++) {
2093 for (uint32_t n = 1; n <= 2; n++) {
2094 GemmMicrokernelTester()
2095 .mr(4)
2096 .nr(2)
2097 .kr(1)
2098 .sr(1)
2099 .m(m)
2100 .n(n)
2101 .k(1)
2102 .iterations(1)
2103 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2104 }
2105 }
2106}
2107
2108TEST(F32_PPMM_4X2__SCALAR, k_eq_1_subtile_m) {
2109 for (uint32_t m = 1; m <= 4; m++) {
2110 GemmMicrokernelTester()
2111 .mr(4)
2112 .nr(2)
2113 .kr(1)
2114 .sr(1)
2115 .m(m)
2116 .n(2)
2117 .k(1)
2118 .iterations(1)
2119 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2120 }
2121}
2122
2123TEST(F32_PPMM_4X2__SCALAR, k_eq_1_subtile_n) {
2124 for (uint32_t n = 1; n <= 2; n++) {
2125 GemmMicrokernelTester()
2126 .mr(4)
2127 .nr(2)
2128 .kr(1)
2129 .sr(1)
2130 .m(4)
2131 .n(n)
2132 .k(1)
2133 .iterations(1)
2134 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2135 }
2136}
2137
2138TEST(F32_PPMM_4X2__SCALAR, k_gt_1) {
2139 for (size_t k = 2; k < 10; k++) {
2140 GemmMicrokernelTester()
2141 .mr(4)
2142 .nr(2)
2143 .kr(1)
2144 .sr(1)
2145 .m(4)
2146 .n(2)
2147 .k(k)
2148 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2149 }
2150}
2151
2152TEST(F32_PPMM_4X2__SCALAR, k_gt_1_subtile) {
2153 for (size_t k = 2; k < 10; k++) {
2154 for (uint32_t m = 1; m <= 4; m++) {
2155 for (uint32_t n = 1; n <= 2; n++) {
2156 GemmMicrokernelTester()
2157 .mr(4)
2158 .nr(2)
2159 .kr(1)
2160 .sr(1)
2161 .m(m)
2162 .n(n)
2163 .k(k)
2164 .iterations(1)
2165 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2166 }
2167 }
2168 }
2169}
2170
2171TEST(F32_PPMM_4X2__SCALAR, n_gt_2) {
2172 for (uint32_t n = 3; n < 4; n++) {
2173 for (size_t k = 1; k <= 5; k += 2) {
2174 GemmMicrokernelTester()
2175 .mr(4)
2176 .nr(2)
2177 .kr(1)
2178 .sr(1)
2179 .m(4)
2180 .n(2)
2181 .k(k)
2182 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2183 }
2184 }
2185}
2186
2187TEST(F32_PPMM_4X2__SCALAR, n_gt_2_strided_cn) {
2188 for (uint32_t n = 3; n < 4; n++) {
2189 for (size_t k = 1; k <= 5; k += 2) {
2190 GemmMicrokernelTester()
2191 .mr(4)
2192 .nr(2)
2193 .kr(1)
2194 .sr(1)
2195 .m(4)
2196 .n(2)
2197 .k(k)
2198 .cn_stride(5)
2199 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2200 }
2201 }
2202}
2203
2204TEST(F32_PPMM_4X2__SCALAR, n_gt_2_strided_a) {
2205 for (uint32_t n = 3; n < 4; n++) {
2206 for (size_t k = 1; k <= 5; k += 2) {
2207 GemmMicrokernelTester()
2208 .mr(4)
2209 .nr(2)
2210 .kr(1)
2211 .sr(1)
2212 .m(4)
2213 .n(n)
2214 .k(k)
2215 .a_stride(7)
2216 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2217 }
2218 }
2219}
2220
2221TEST(F32_PPMM_4X2__SCALAR, n_gt_2_subtile) {
2222 for (uint32_t n = 3; n < 4; n++) {
2223 for (size_t k = 1; k <= 5; k += 2) {
2224 for (uint32_t m = 1; m <= 4; m++) {
2225 GemmMicrokernelTester()
2226 .mr(4)
2227 .nr(2)
2228 .kr(1)
2229 .sr(1)
2230 .m(m)
2231 .n(n)
2232 .k(k)
2233 .iterations(1)
2234 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2235 }
2236 }
2237 }
2238}
2239
2240TEST(F32_PPMM_4X2__SCALAR, n_div_2) {
2241 for (uint32_t n = 4; n <= 6; n += 2) {
2242 for (size_t k = 1; k <= 5; k += 2) {
2243 GemmMicrokernelTester()
2244 .mr(4)
2245 .nr(2)
2246 .kr(1)
2247 .sr(1)
2248 .m(4)
2249 .n(2)
2250 .k(k)
2251 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2252 }
2253 }
2254}
2255
2256TEST(F32_PPMM_4X2__SCALAR, n_div_2_strided_cn) {
2257 for (uint32_t n = 4; n <= 6; n += 2) {
2258 for (size_t k = 1; k <= 5; k += 2) {
2259 GemmMicrokernelTester()
2260 .mr(4)
2261 .nr(2)
2262 .kr(1)
2263 .sr(1)
2264 .m(4)
2265 .n(n)
2266 .k(k)
2267 .cn_stride(5)
2268 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2269 }
2270 }
2271}
2272
2273TEST(F32_PPMM_4X2__SCALAR, n_div_2_strided_a) {
2274 for (uint32_t n = 4; n <= 6; n += 2) {
2275 for (size_t k = 1; k <= 5; k += 2) {
2276 GemmMicrokernelTester()
2277 .mr(4)
2278 .nr(2)
2279 .kr(1)
2280 .sr(1)
2281 .m(4)
2282 .n(n)
2283 .k(k)
2284 .a_stride(7)
2285 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2286 }
2287 }
2288}
2289
2290TEST(F32_PPMM_4X2__SCALAR, n_div_2_subtile) {
2291 for (uint32_t n = 4; n <= 6; n += 2) {
2292 for (size_t k = 1; k <= 5; k += 2) {
2293 for (uint32_t m = 1; m <= 4; m++) {
2294 GemmMicrokernelTester()
2295 .mr(4)
2296 .nr(2)
2297 .kr(1)
2298 .sr(1)
2299 .m(m)
2300 .n(n)
2301 .k(k)
2302 .iterations(1)
2303 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2304 }
2305 }
2306 }
2307}
2308
2309TEST(F32_PPMM_4X2__SCALAR, strided_cm_subtile) {
2310 for (size_t k = 1; k <= 5; k += 2) {
2311 for (uint32_t m = 1; m <= 4; m++) {
2312 for (uint32_t n = 1; n <= 2; n++) {
2313 GemmMicrokernelTester()
2314 .mr(4)
2315 .nr(2)
2316 .kr(1)
2317 .sr(1)
2318 .m(m)
2319 .n(n)
2320 .k(k)
2321 .cm_stride(5)
2322 .iterations(1)
2323 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2324 }
2325 }
2326 }
2327}
2328
2329TEST(F32_PPMM_4X2__SCALAR, qmin) {
2330 GemmMicrokernelTester()
2331 .mr(4)
2332 .nr(2)
2333 .kr(1)
2334 .sr(1)
2335 .m(4)
2336 .n(2)
2337 .k(1)
2338 .qmin(128)
2339 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2340}
2341
2342TEST(F32_PPMM_4X2__SCALAR, qmax) {
2343 GemmMicrokernelTester()
2344 .mr(4)
2345 .nr(2)
2346 .kr(1)
2347 .sr(1)
2348 .m(4)
2349 .n(2)
2350 .k(1)
2351 .qmax(128)
2352 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2353}
2354
2355TEST(F32_PPMM_4X2__SCALAR, strided_cm) {
2356 GemmMicrokernelTester()
2357 .mr(4)
2358 .nr(2)
2359 .kr(1)
2360 .sr(1)
2361 .m(4)
2362 .n(2)
2363 .k(1)
2364 .cm_stride(5)
2365 .Test(xnn_f32_ppmm_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2366}
2367
2368
2369TEST(F32_PPMM_2X4__SCALAR, k_eq_1) {
2370 GemmMicrokernelTester()
2371 .mr(2)
2372 .nr(4)
2373 .kr(1)
2374 .sr(1)
2375 .m(2)
2376 .n(4)
2377 .k(1)
2378 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2379}
2380
2381TEST(F32_PPMM_2X4__SCALAR, strided_cn) {
2382 GemmMicrokernelTester()
2383 .mr(2)
2384 .nr(4)
2385 .kr(1)
2386 .sr(1)
2387 .m(2)
2388 .n(4)
2389 .k(1)
2390 .cn_stride(7)
2391 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2392}
2393
2394TEST(F32_PPMM_2X4__SCALAR, k_eq_1_strided_a) {
2395 GemmMicrokernelTester()
2396 .mr(2)
2397 .nr(4)
2398 .kr(1)
2399 .sr(1)
2400 .m(2)
2401 .n(4)
2402 .k(1)
2403 .a_stride(3)
2404 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2405}
2406
2407TEST(F32_PPMM_2X4__SCALAR, k_eq_1_subtile) {
2408 for (uint32_t m = 1; m <= 2; m++) {
2409 for (uint32_t n = 1; n <= 4; n++) {
2410 GemmMicrokernelTester()
2411 .mr(2)
2412 .nr(4)
2413 .kr(1)
2414 .sr(1)
2415 .m(m)
2416 .n(n)
2417 .k(1)
2418 .iterations(1)
2419 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2420 }
2421 }
2422}
2423
2424TEST(F32_PPMM_2X4__SCALAR, k_eq_1_subtile_m) {
2425 for (uint32_t m = 1; m <= 2; m++) {
2426 GemmMicrokernelTester()
2427 .mr(2)
2428 .nr(4)
2429 .kr(1)
2430 .sr(1)
2431 .m(m)
2432 .n(4)
2433 .k(1)
2434 .iterations(1)
2435 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2436 }
2437}
2438
2439TEST(F32_PPMM_2X4__SCALAR, k_eq_1_subtile_n) {
2440 for (uint32_t n = 1; n <= 4; n++) {
2441 GemmMicrokernelTester()
2442 .mr(2)
2443 .nr(4)
2444 .kr(1)
2445 .sr(1)
2446 .m(2)
2447 .n(n)
2448 .k(1)
2449 .iterations(1)
2450 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2451 }
2452}
2453
2454TEST(F32_PPMM_2X4__SCALAR, k_gt_1) {
2455 for (size_t k = 2; k < 10; k++) {
2456 GemmMicrokernelTester()
2457 .mr(2)
2458 .nr(4)
2459 .kr(1)
2460 .sr(1)
2461 .m(2)
2462 .n(4)
2463 .k(k)
2464 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2465 }
2466}
2467
2468TEST(F32_PPMM_2X4__SCALAR, k_gt_1_subtile) {
2469 for (size_t k = 2; k < 10; k++) {
2470 for (uint32_t m = 1; m <= 2; m++) {
2471 for (uint32_t n = 1; n <= 4; n++) {
2472 GemmMicrokernelTester()
2473 .mr(2)
2474 .nr(4)
2475 .kr(1)
2476 .sr(1)
2477 .m(m)
2478 .n(n)
2479 .k(k)
2480 .iterations(1)
2481 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2482 }
2483 }
2484 }
2485}
2486
2487TEST(F32_PPMM_2X4__SCALAR, n_gt_4) {
2488 for (uint32_t n = 5; n < 8; n++) {
2489 for (size_t k = 1; k <= 5; k += 2) {
2490 GemmMicrokernelTester()
2491 .mr(2)
2492 .nr(4)
2493 .kr(1)
2494 .sr(1)
2495 .m(2)
2496 .n(4)
2497 .k(k)
2498 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2499 }
2500 }
2501}
2502
2503TEST(F32_PPMM_2X4__SCALAR, n_gt_4_strided_cn) {
2504 for (uint32_t n = 5; n < 8; n++) {
2505 for (size_t k = 1; k <= 5; k += 2) {
2506 GemmMicrokernelTester()
2507 .mr(2)
2508 .nr(4)
2509 .kr(1)
2510 .sr(1)
2511 .m(2)
2512 .n(4)
2513 .k(k)
2514 .cn_stride(7)
2515 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2516 }
2517 }
2518}
2519
2520TEST(F32_PPMM_2X4__SCALAR, n_gt_4_strided_a) {
2521 for (uint32_t n = 5; n < 8; n++) {
2522 for (size_t k = 1; k <= 5; k += 2) {
2523 GemmMicrokernelTester()
2524 .mr(2)
2525 .nr(4)
2526 .kr(1)
2527 .sr(1)
2528 .m(2)
2529 .n(n)
2530 .k(k)
2531 .a_stride(7)
2532 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2533 }
2534 }
2535}
2536
2537TEST(F32_PPMM_2X4__SCALAR, n_gt_4_subtile) {
2538 for (uint32_t n = 5; n < 8; n++) {
2539 for (size_t k = 1; k <= 5; k += 2) {
2540 for (uint32_t m = 1; m <= 2; m++) {
2541 GemmMicrokernelTester()
2542 .mr(2)
2543 .nr(4)
2544 .kr(1)
2545 .sr(1)
2546 .m(m)
2547 .n(n)
2548 .k(k)
2549 .iterations(1)
2550 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2551 }
2552 }
2553 }
2554}
2555
2556TEST(F32_PPMM_2X4__SCALAR, n_div_4) {
2557 for (uint32_t n = 8; n <= 12; n += 4) {
2558 for (size_t k = 1; k <= 5; k += 2) {
2559 GemmMicrokernelTester()
2560 .mr(2)
2561 .nr(4)
2562 .kr(1)
2563 .sr(1)
2564 .m(2)
2565 .n(4)
2566 .k(k)
2567 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2568 }
2569 }
2570}
2571
2572TEST(F32_PPMM_2X4__SCALAR, n_div_4_strided_cn) {
2573 for (uint32_t n = 8; n <= 12; n += 4) {
2574 for (size_t k = 1; k <= 5; k += 2) {
2575 GemmMicrokernelTester()
2576 .mr(2)
2577 .nr(4)
2578 .kr(1)
2579 .sr(1)
2580 .m(2)
2581 .n(n)
2582 .k(k)
2583 .cn_stride(7)
2584 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2585 }
2586 }
2587}
2588
2589TEST(F32_PPMM_2X4__SCALAR, n_div_4_strided_a) {
2590 for (uint32_t n = 8; n <= 12; n += 4) {
2591 for (size_t k = 1; k <= 5; k += 2) {
2592 GemmMicrokernelTester()
2593 .mr(2)
2594 .nr(4)
2595 .kr(1)
2596 .sr(1)
2597 .m(2)
2598 .n(n)
2599 .k(k)
2600 .a_stride(7)
2601 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2602 }
2603 }
2604}
2605
2606TEST(F32_PPMM_2X4__SCALAR, n_div_4_subtile) {
2607 for (uint32_t n = 8; n <= 12; n += 4) {
2608 for (size_t k = 1; k <= 5; k += 2) {
2609 for (uint32_t m = 1; m <= 2; m++) {
2610 GemmMicrokernelTester()
2611 .mr(2)
2612 .nr(4)
2613 .kr(1)
2614 .sr(1)
2615 .m(m)
2616 .n(n)
2617 .k(k)
2618 .iterations(1)
2619 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2620 }
2621 }
2622 }
2623}
2624
2625TEST(F32_PPMM_2X4__SCALAR, strided_cm_subtile) {
2626 for (size_t k = 1; k <= 5; k += 2) {
2627 for (uint32_t m = 1; m <= 2; m++) {
2628 for (uint32_t n = 1; n <= 4; n++) {
2629 GemmMicrokernelTester()
2630 .mr(2)
2631 .nr(4)
2632 .kr(1)
2633 .sr(1)
2634 .m(m)
2635 .n(n)
2636 .k(k)
2637 .cm_stride(7)
2638 .iterations(1)
2639 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2640 }
2641 }
2642 }
2643}
2644
2645TEST(F32_PPMM_2X4__SCALAR, qmin) {
2646 GemmMicrokernelTester()
2647 .mr(2)
2648 .nr(4)
2649 .kr(1)
2650 .sr(1)
2651 .m(2)
2652 .n(4)
2653 .k(1)
2654 .qmin(128)
2655 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2656}
2657
2658TEST(F32_PPMM_2X4__SCALAR, qmax) {
2659 GemmMicrokernelTester()
2660 .mr(2)
2661 .nr(4)
2662 .kr(1)
2663 .sr(1)
2664 .m(2)
2665 .n(4)
2666 .k(1)
2667 .qmax(128)
2668 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2669}
2670
2671TEST(F32_PPMM_2X4__SCALAR, strided_cm) {
2672 GemmMicrokernelTester()
2673 .mr(2)
2674 .nr(4)
2675 .kr(1)
2676 .sr(1)
2677 .m(2)
2678 .n(4)
2679 .k(1)
2680 .cm_stride(7)
2681 .Test(xnn_f32_ppmm_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2682}
2683
2684
2685TEST(F32_PPMM_4X4__SCALAR, k_eq_1) {
2686 GemmMicrokernelTester()
2687 .mr(4)
2688 .nr(4)
2689 .kr(1)
2690 .sr(1)
2691 .m(4)
2692 .n(4)
2693 .k(1)
2694 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2695}
2696
2697TEST(F32_PPMM_4X4__SCALAR, strided_cn) {
2698 GemmMicrokernelTester()
2699 .mr(4)
2700 .nr(4)
2701 .kr(1)
2702 .sr(1)
2703 .m(4)
2704 .n(4)
2705 .k(1)
2706 .cn_stride(7)
2707 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2708}
2709
2710TEST(F32_PPMM_4X4__SCALAR, k_eq_1_strided_a) {
2711 GemmMicrokernelTester()
2712 .mr(4)
2713 .nr(4)
2714 .kr(1)
2715 .sr(1)
2716 .m(4)
2717 .n(4)
2718 .k(1)
2719 .a_stride(3)
2720 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2721}
2722
2723TEST(F32_PPMM_4X4__SCALAR, k_eq_1_subtile) {
2724 for (uint32_t m = 1; m <= 4; m++) {
2725 for (uint32_t n = 1; n <= 4; n++) {
2726 GemmMicrokernelTester()
2727 .mr(4)
2728 .nr(4)
2729 .kr(1)
2730 .sr(1)
2731 .m(m)
2732 .n(n)
2733 .k(1)
2734 .iterations(1)
2735 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2736 }
2737 }
2738}
2739
2740TEST(F32_PPMM_4X4__SCALAR, k_eq_1_subtile_m) {
2741 for (uint32_t m = 1; m <= 4; m++) {
2742 GemmMicrokernelTester()
2743 .mr(4)
2744 .nr(4)
2745 .kr(1)
2746 .sr(1)
2747 .m(m)
2748 .n(4)
2749 .k(1)
2750 .iterations(1)
2751 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2752 }
2753}
2754
2755TEST(F32_PPMM_4X4__SCALAR, k_eq_1_subtile_n) {
2756 for (uint32_t n = 1; n <= 4; n++) {
2757 GemmMicrokernelTester()
2758 .mr(4)
2759 .nr(4)
2760 .kr(1)
2761 .sr(1)
2762 .m(4)
2763 .n(n)
2764 .k(1)
2765 .iterations(1)
2766 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2767 }
2768}
2769
2770TEST(F32_PPMM_4X4__SCALAR, k_gt_1) {
2771 for (size_t k = 2; k < 10; k++) {
2772 GemmMicrokernelTester()
2773 .mr(4)
2774 .nr(4)
2775 .kr(1)
2776 .sr(1)
2777 .m(4)
2778 .n(4)
2779 .k(k)
2780 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2781 }
2782}
2783
2784TEST(F32_PPMM_4X4__SCALAR, k_gt_1_subtile) {
2785 for (size_t k = 2; k < 10; k++) {
2786 for (uint32_t m = 1; m <= 4; m++) {
2787 for (uint32_t n = 1; n <= 4; n++) {
2788 GemmMicrokernelTester()
2789 .mr(4)
2790 .nr(4)
2791 .kr(1)
2792 .sr(1)
2793 .m(m)
2794 .n(n)
2795 .k(k)
2796 .iterations(1)
2797 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2798 }
2799 }
2800 }
2801}
2802
2803TEST(F32_PPMM_4X4__SCALAR, n_gt_4) {
2804 for (uint32_t n = 5; n < 8; n++) {
2805 for (size_t k = 1; k <= 5; k += 2) {
2806 GemmMicrokernelTester()
2807 .mr(4)
2808 .nr(4)
2809 .kr(1)
2810 .sr(1)
2811 .m(4)
2812 .n(4)
2813 .k(k)
2814 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2815 }
2816 }
2817}
2818
2819TEST(F32_PPMM_4X4__SCALAR, n_gt_4_strided_cn) {
2820 for (uint32_t n = 5; n < 8; n++) {
2821 for (size_t k = 1; k <= 5; k += 2) {
2822 GemmMicrokernelTester()
2823 .mr(4)
2824 .nr(4)
2825 .kr(1)
2826 .sr(1)
2827 .m(4)
2828 .n(4)
2829 .k(k)
2830 .cn_stride(7)
2831 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2832 }
2833 }
2834}
2835
2836TEST(F32_PPMM_4X4__SCALAR, n_gt_4_strided_a) {
2837 for (uint32_t n = 5; n < 8; n++) {
2838 for (size_t k = 1; k <= 5; k += 2) {
2839 GemmMicrokernelTester()
2840 .mr(4)
2841 .nr(4)
2842 .kr(1)
2843 .sr(1)
2844 .m(4)
2845 .n(n)
2846 .k(k)
2847 .a_stride(7)
2848 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2849 }
2850 }
2851}
2852
2853TEST(F32_PPMM_4X4__SCALAR, n_gt_4_subtile) {
2854 for (uint32_t n = 5; n < 8; n++) {
2855 for (size_t k = 1; k <= 5; k += 2) {
2856 for (uint32_t m = 1; m <= 4; m++) {
2857 GemmMicrokernelTester()
2858 .mr(4)
2859 .nr(4)
2860 .kr(1)
2861 .sr(1)
2862 .m(m)
2863 .n(n)
2864 .k(k)
2865 .iterations(1)
2866 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2867 }
2868 }
2869 }
2870}
2871
2872TEST(F32_PPMM_4X4__SCALAR, n_div_4) {
2873 for (uint32_t n = 8; n <= 12; n += 4) {
2874 for (size_t k = 1; k <= 5; k += 2) {
2875 GemmMicrokernelTester()
2876 .mr(4)
2877 .nr(4)
2878 .kr(1)
2879 .sr(1)
2880 .m(4)
2881 .n(4)
2882 .k(k)
2883 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2884 }
2885 }
2886}
2887
2888TEST(F32_PPMM_4X4__SCALAR, n_div_4_strided_cn) {
2889 for (uint32_t n = 8; n <= 12; n += 4) {
2890 for (size_t k = 1; k <= 5; k += 2) {
2891 GemmMicrokernelTester()
2892 .mr(4)
2893 .nr(4)
2894 .kr(1)
2895 .sr(1)
2896 .m(4)
2897 .n(n)
2898 .k(k)
2899 .cn_stride(7)
2900 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2901 }
2902 }
2903}
2904
2905TEST(F32_PPMM_4X4__SCALAR, n_div_4_strided_a) {
2906 for (uint32_t n = 8; n <= 12; n += 4) {
2907 for (size_t k = 1; k <= 5; k += 2) {
2908 GemmMicrokernelTester()
2909 .mr(4)
2910 .nr(4)
2911 .kr(1)
2912 .sr(1)
2913 .m(4)
2914 .n(n)
2915 .k(k)
2916 .a_stride(7)
2917 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2918 }
2919 }
2920}
2921
2922TEST(F32_PPMM_4X4__SCALAR, n_div_4_subtile) {
2923 for (uint32_t n = 8; n <= 12; n += 4) {
2924 for (size_t k = 1; k <= 5; k += 2) {
2925 for (uint32_t m = 1; m <= 4; m++) {
2926 GemmMicrokernelTester()
2927 .mr(4)
2928 .nr(4)
2929 .kr(1)
2930 .sr(1)
2931 .m(m)
2932 .n(n)
2933 .k(k)
2934 .iterations(1)
2935 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2936 }
2937 }
2938 }
2939}
2940
2941TEST(F32_PPMM_4X4__SCALAR, strided_cm_subtile) {
2942 for (size_t k = 1; k <= 5; k += 2) {
2943 for (uint32_t m = 1; m <= 4; m++) {
2944 for (uint32_t n = 1; n <= 4; n++) {
2945 GemmMicrokernelTester()
2946 .mr(4)
2947 .nr(4)
2948 .kr(1)
2949 .sr(1)
2950 .m(m)
2951 .n(n)
2952 .k(k)
2953 .cm_stride(7)
2954 .iterations(1)
2955 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2956 }
2957 }
2958 }
2959}
2960
2961TEST(F32_PPMM_4X4__SCALAR, qmin) {
2962 GemmMicrokernelTester()
2963 .mr(4)
2964 .nr(4)
2965 .kr(1)
2966 .sr(1)
2967 .m(4)
2968 .n(4)
2969 .k(1)
2970 .qmin(128)
2971 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2972}
2973
2974TEST(F32_PPMM_4X4__SCALAR, qmax) {
2975 GemmMicrokernelTester()
2976 .mr(4)
2977 .nr(4)
2978 .kr(1)
2979 .sr(1)
2980 .m(4)
2981 .n(4)
2982 .k(1)
2983 .qmax(128)
2984 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2985}
2986
2987TEST(F32_PPMM_4X4__SCALAR, strided_cm) {
2988 GemmMicrokernelTester()
2989 .mr(4)
2990 .nr(4)
2991 .kr(1)
2992 .sr(1)
2993 .m(4)
2994 .n(4)
2995 .k(1)
2996 .cm_stride(7)
2997 .Test(xnn_f32_ppmm_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2998}
2999
3000
3001TEST(F32_PPMM_3X3__SCALAR, k_eq_1) {
3002 GemmMicrokernelTester()
3003 .mr(3)
3004 .nr(3)
3005 .kr(1)
3006 .sr(1)
3007 .m(3)
3008 .n(3)
3009 .k(1)
3010 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3011}
3012
3013TEST(F32_PPMM_3X3__SCALAR, strided_cn) {
3014 GemmMicrokernelTester()
3015 .mr(3)
3016 .nr(3)
3017 .kr(1)
3018 .sr(1)
3019 .m(3)
3020 .n(3)
3021 .k(1)
3022 .cn_stride(5)
3023 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3024}
3025
3026TEST(F32_PPMM_3X3__SCALAR, k_eq_1_strided_a) {
3027 GemmMicrokernelTester()
3028 .mr(3)
3029 .nr(3)
3030 .kr(1)
3031 .sr(1)
3032 .m(3)
3033 .n(3)
3034 .k(1)
3035 .a_stride(3)
3036 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3037}
3038
3039TEST(F32_PPMM_3X3__SCALAR, k_eq_1_subtile) {
3040 for (uint32_t m = 1; m <= 3; m++) {
3041 for (uint32_t n = 1; n <= 3; n++) {
3042 GemmMicrokernelTester()
3043 .mr(3)
3044 .nr(3)
3045 .kr(1)
3046 .sr(1)
3047 .m(m)
3048 .n(n)
3049 .k(1)
3050 .iterations(1)
3051 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3052 }
3053 }
3054}
3055
3056TEST(F32_PPMM_3X3__SCALAR, k_eq_1_subtile_m) {
3057 for (uint32_t m = 1; m <= 3; m++) {
3058 GemmMicrokernelTester()
3059 .mr(3)
3060 .nr(3)
3061 .kr(1)
3062 .sr(1)
3063 .m(m)
3064 .n(3)
3065 .k(1)
3066 .iterations(1)
3067 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3068 }
3069}
3070
3071TEST(F32_PPMM_3X3__SCALAR, k_eq_1_subtile_n) {
3072 for (uint32_t n = 1; n <= 3; n++) {
3073 GemmMicrokernelTester()
3074 .mr(3)
3075 .nr(3)
3076 .kr(1)
3077 .sr(1)
3078 .m(3)
3079 .n(n)
3080 .k(1)
3081 .iterations(1)
3082 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3083 }
3084}
3085
3086TEST(F32_PPMM_3X3__SCALAR, k_gt_1) {
3087 for (size_t k = 2; k < 10; k++) {
3088 GemmMicrokernelTester()
3089 .mr(3)
3090 .nr(3)
3091 .kr(1)
3092 .sr(1)
3093 .m(3)
3094 .n(3)
3095 .k(k)
3096 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3097 }
3098}
3099
3100TEST(F32_PPMM_3X3__SCALAR, k_gt_1_subtile) {
3101 for (size_t k = 2; k < 10; k++) {
3102 for (uint32_t m = 1; m <= 3; m++) {
3103 for (uint32_t n = 1; n <= 3; n++) {
3104 GemmMicrokernelTester()
3105 .mr(3)
3106 .nr(3)
3107 .kr(1)
3108 .sr(1)
3109 .m(m)
3110 .n(n)
3111 .k(k)
3112 .iterations(1)
3113 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3114 }
3115 }
3116 }
3117}
3118
3119TEST(F32_PPMM_3X3__SCALAR, n_gt_3) {
3120 for (uint32_t n = 4; n < 6; n++) {
3121 for (size_t k = 1; k <= 5; k += 2) {
3122 GemmMicrokernelTester()
3123 .mr(3)
3124 .nr(3)
3125 .kr(1)
3126 .sr(1)
3127 .m(3)
3128 .n(3)
3129 .k(k)
3130 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3131 }
3132 }
3133}
3134
3135TEST(F32_PPMM_3X3__SCALAR, n_gt_3_strided_cn) {
3136 for (uint32_t n = 4; n < 6; n++) {
3137 for (size_t k = 1; k <= 5; k += 2) {
3138 GemmMicrokernelTester()
3139 .mr(3)
3140 .nr(3)
3141 .kr(1)
3142 .sr(1)
3143 .m(3)
3144 .n(3)
3145 .k(k)
3146 .cn_stride(5)
3147 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3148 }
3149 }
3150}
3151
3152TEST(F32_PPMM_3X3__SCALAR, n_gt_3_strided_a) {
3153 for (uint32_t n = 4; n < 6; n++) {
3154 for (size_t k = 1; k <= 5; k += 2) {
3155 GemmMicrokernelTester()
3156 .mr(3)
3157 .nr(3)
3158 .kr(1)
3159 .sr(1)
3160 .m(3)
3161 .n(n)
3162 .k(k)
3163 .a_stride(7)
3164 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3165 }
3166 }
3167}
3168
3169TEST(F32_PPMM_3X3__SCALAR, n_gt_3_subtile) {
3170 for (uint32_t n = 4; n < 6; n++) {
3171 for (size_t k = 1; k <= 5; k += 2) {
3172 for (uint32_t m = 1; m <= 3; m++) {
3173 GemmMicrokernelTester()
3174 .mr(3)
3175 .nr(3)
3176 .kr(1)
3177 .sr(1)
3178 .m(m)
3179 .n(n)
3180 .k(k)
3181 .iterations(1)
3182 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3183 }
3184 }
3185 }
3186}
3187
3188TEST(F32_PPMM_3X3__SCALAR, n_div_3) {
3189 for (uint32_t n = 6; n <= 9; n += 3) {
3190 for (size_t k = 1; k <= 5; k += 2) {
3191 GemmMicrokernelTester()
3192 .mr(3)
3193 .nr(3)
3194 .kr(1)
3195 .sr(1)
3196 .m(3)
3197 .n(3)
3198 .k(k)
3199 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3200 }
3201 }
3202}
3203
3204TEST(F32_PPMM_3X3__SCALAR, n_div_3_strided_cn) {
3205 for (uint32_t n = 6; n <= 9; n += 3) {
3206 for (size_t k = 1; k <= 5; k += 2) {
3207 GemmMicrokernelTester()
3208 .mr(3)
3209 .nr(3)
3210 .kr(1)
3211 .sr(1)
3212 .m(3)
3213 .n(n)
3214 .k(k)
3215 .cn_stride(5)
3216 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3217 }
3218 }
3219}
3220
3221TEST(F32_PPMM_3X3__SCALAR, n_div_3_strided_a) {
3222 for (uint32_t n = 6; n <= 9; n += 3) {
3223 for (size_t k = 1; k <= 5; k += 2) {
3224 GemmMicrokernelTester()
3225 .mr(3)
3226 .nr(3)
3227 .kr(1)
3228 .sr(1)
3229 .m(3)
3230 .n(n)
3231 .k(k)
3232 .a_stride(7)
3233 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3234 }
3235 }
3236}
3237
3238TEST(F32_PPMM_3X3__SCALAR, n_div_3_subtile) {
3239 for (uint32_t n = 6; n <= 9; n += 3) {
3240 for (size_t k = 1; k <= 5; k += 2) {
3241 for (uint32_t m = 1; m <= 3; m++) {
3242 GemmMicrokernelTester()
3243 .mr(3)
3244 .nr(3)
3245 .kr(1)
3246 .sr(1)
3247 .m(m)
3248 .n(n)
3249 .k(k)
3250 .iterations(1)
3251 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3252 }
3253 }
3254 }
3255}
3256
3257TEST(F32_PPMM_3X3__SCALAR, strided_cm_subtile) {
3258 for (size_t k = 1; k <= 5; k += 2) {
3259 for (uint32_t m = 1; m <= 3; m++) {
3260 for (uint32_t n = 1; n <= 3; n++) {
3261 GemmMicrokernelTester()
3262 .mr(3)
3263 .nr(3)
3264 .kr(1)
3265 .sr(1)
3266 .m(m)
3267 .n(n)
3268 .k(k)
3269 .cm_stride(5)
3270 .iterations(1)
3271 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3272 }
3273 }
3274 }
3275}
3276
3277TEST(F32_PPMM_3X3__SCALAR, qmin) {
3278 GemmMicrokernelTester()
3279 .mr(3)
3280 .nr(3)
3281 .kr(1)
3282 .sr(1)
3283 .m(3)
3284 .n(3)
3285 .k(1)
3286 .qmin(128)
3287 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3288}
3289
3290TEST(F32_PPMM_3X3__SCALAR, qmax) {
3291 GemmMicrokernelTester()
3292 .mr(3)
3293 .nr(3)
3294 .kr(1)
3295 .sr(1)
3296 .m(3)
3297 .n(3)
3298 .k(1)
3299 .qmax(128)
3300 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3301}
3302
3303TEST(F32_PPMM_3X3__SCALAR, strided_cm) {
3304 GemmMicrokernelTester()
3305 .mr(3)
3306 .nr(3)
3307 .kr(1)
3308 .sr(1)
3309 .m(3)
3310 .n(3)
3311 .k(1)
3312 .cm_stride(5)
3313 .Test(xnn_f32_ppmm_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3314}