blob: 847e9c23f1bbd935e307f259e784c966d0ad1675 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/q8-gemm.yaml
11// Generator: tools/generate-gemm-test.py
12
13
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <gtest/gtest.h>
15
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
XNNPACK Teamb455b122019-09-27 18:10:33 -070019#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include "gemm-microkernel-tester.h"
23
24
Marat Dukhan1dadbf72019-10-01 10:46:20 -070025#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070026 TEST(Q8_GEMM_4X8__NEON, k_eq_8) {
27 TEST_REQUIRES_ARM_NEON;
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(4)
34 .n(8)
35 .k(8)
36 .Test(xnn_q8_gemm_ukernel_4x8__neon);
37 }
38
39 TEST(Q8_GEMM_4X8__NEON, strided_cn) {
40 TEST_REQUIRES_ARM_NEON;
41 GemmMicrokernelTester()
42 .mr(4)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(4)
47 .n(8)
48 .k(8)
49 .cn_stride(11)
50 .Test(xnn_q8_gemm_ukernel_4x8__neon);
51 }
52
53 TEST(Q8_GEMM_4X8__NEON, k_eq_8_strided_a) {
54 TEST_REQUIRES_ARM_NEON;
55 GemmMicrokernelTester()
56 .mr(4)
57 .nr(8)
58 .kr(1)
59 .sr(1)
60 .m(4)
61 .n(8)
62 .k(8)
63 .a_stride(11)
64 .Test(xnn_q8_gemm_ukernel_4x8__neon);
65 }
66
67 TEST(Q8_GEMM_4X8__NEON, k_eq_8_subtile) {
68 TEST_REQUIRES_ARM_NEON;
69 for (uint32_t m = 1; m <= 4; m++) {
70 for (uint32_t n = 1; n <= 8; n++) {
71 GemmMicrokernelTester()
72 .mr(4)
73 .nr(8)
74 .kr(1)
75 .sr(1)
76 .m(m)
77 .n(n)
78 .k(8)
79 .iterations(1)
80 .Test(xnn_q8_gemm_ukernel_4x8__neon);
81 }
82 }
83 }
84
85 TEST(Q8_GEMM_4X8__NEON, k_eq_8_subtile_m) {
86 TEST_REQUIRES_ARM_NEON;
87 for (uint32_t m = 1; m <= 4; m++) {
88 GemmMicrokernelTester()
89 .mr(4)
90 .nr(8)
91 .kr(1)
92 .sr(1)
93 .m(m)
94 .n(8)
95 .k(8)
96 .iterations(1)
97 .Test(xnn_q8_gemm_ukernel_4x8__neon);
98 }
99 }
100
101 TEST(Q8_GEMM_4X8__NEON, k_eq_8_subtile_n) {
102 TEST_REQUIRES_ARM_NEON;
103 for (uint32_t n = 1; n <= 8; n++) {
104 GemmMicrokernelTester()
105 .mr(4)
106 .nr(8)
107 .kr(1)
108 .sr(1)
109 .m(4)
110 .n(n)
111 .k(8)
112 .iterations(1)
113 .Test(xnn_q8_gemm_ukernel_4x8__neon);
114 }
115 }
116
117 TEST(Q8_GEMM_4X8__NEON, k_lt_8) {
118 TEST_REQUIRES_ARM_NEON;
119 for (size_t k = 1; k < 8; k++) {
120 GemmMicrokernelTester()
121 .mr(4)
122 .nr(8)
123 .kr(1)
124 .sr(1)
125 .m(4)
126 .n(8)
127 .k(k)
128 .Test(xnn_q8_gemm_ukernel_4x8__neon);
129 }
130 }
131
132 TEST(Q8_GEMM_4X8__NEON, k_lt_8_strided_a) {
133 TEST_REQUIRES_ARM_NEON;
134 for (size_t k = 1; k < 8; k++) {
135 GemmMicrokernelTester()
136 .mr(4)
137 .nr(8)
138 .kr(1)
139 .sr(1)
140 .m(4)
141 .n(8)
142 .k(k)
143 .a_stride(11)
144 .Test(xnn_q8_gemm_ukernel_4x8__neon);
145 }
146 }
147
148 TEST(Q8_GEMM_4X8__NEON, k_lt_8_subtile) {
149 TEST_REQUIRES_ARM_NEON;
150 for (size_t k = 1; k < 8; k++) {
151 for (uint32_t m = 1; m <= 4; m++) {
152 for (uint32_t n = 1; n <= 8; n++) {
153 GemmMicrokernelTester()
154 .mr(4)
155 .nr(8)
156 .kr(1)
157 .sr(1)
158 .m(m)
159 .n(n)
160 .k(k)
161 .iterations(1)
162 .Test(xnn_q8_gemm_ukernel_4x8__neon);
163 }
164 }
165 }
166 }
167
168 TEST(Q8_GEMM_4X8__NEON, k_gt_8) {
169 TEST_REQUIRES_ARM_NEON;
170 for (size_t k = 9; k < 16; k++) {
171 GemmMicrokernelTester()
172 .mr(4)
173 .nr(8)
174 .kr(1)
175 .sr(1)
176 .m(4)
177 .n(8)
178 .k(k)
179 .Test(xnn_q8_gemm_ukernel_4x8__neon);
180 }
181 }
182
183 TEST(Q8_GEMM_4X8__NEON, k_gt_8_strided_a) {
184 TEST_REQUIRES_ARM_NEON;
185 for (size_t k = 9; k < 16; k++) {
186 GemmMicrokernelTester()
187 .mr(4)
188 .nr(8)
189 .kr(1)
190 .sr(1)
191 .m(4)
192 .n(8)
193 .k(k)
194 .a_stride(19)
195 .Test(xnn_q8_gemm_ukernel_4x8__neon);
196 }
197 }
198
199 TEST(Q8_GEMM_4X8__NEON, k_gt_8_subtile) {
200 TEST_REQUIRES_ARM_NEON;
201 for (size_t k = 9; k < 16; k++) {
202 for (uint32_t m = 1; m <= 4; m++) {
203 for (uint32_t n = 1; n <= 8; n++) {
204 GemmMicrokernelTester()
205 .mr(4)
206 .nr(8)
207 .kr(1)
208 .sr(1)
209 .m(m)
210 .n(n)
211 .k(k)
212 .iterations(1)
213 .Test(xnn_q8_gemm_ukernel_4x8__neon);
214 }
215 }
216 }
217 }
218
219 TEST(Q8_GEMM_4X8__NEON, k_div_8) {
220 TEST_REQUIRES_ARM_NEON;
221 for (size_t k = 16; k <= 80; k += 8) {
222 GemmMicrokernelTester()
223 .mr(4)
224 .nr(8)
225 .kr(1)
226 .sr(1)
227 .m(4)
228 .n(8)
229 .k(k)
230 .Test(xnn_q8_gemm_ukernel_4x8__neon);
231 }
232 }
233
234 TEST(Q8_GEMM_4X8__NEON, k_div_8_strided_a) {
235 TEST_REQUIRES_ARM_NEON;
236 for (size_t k = 16; k <= 80; k += 8) {
237 GemmMicrokernelTester()
238 .mr(4)
239 .nr(8)
240 .kr(1)
241 .sr(1)
242 .m(4)
243 .n(8)
244 .k(k)
245 .a_stride(83)
246 .Test(xnn_q8_gemm_ukernel_4x8__neon);
247 }
248 }
249
250 TEST(Q8_GEMM_4X8__NEON, k_div_8_subtile) {
251 TEST_REQUIRES_ARM_NEON;
252 for (size_t k = 16; k <= 80; k += 8) {
253 for (uint32_t m = 1; m <= 4; m++) {
254 for (uint32_t n = 1; n <= 8; n++) {
255 GemmMicrokernelTester()
256 .mr(4)
257 .nr(8)
258 .kr(1)
259 .sr(1)
260 .m(m)
261 .n(n)
262 .k(k)
263 .iterations(1)
264 .Test(xnn_q8_gemm_ukernel_4x8__neon);
265 }
266 }
267 }
268 }
269
270 TEST(Q8_GEMM_4X8__NEON, n_gt_8) {
271 TEST_REQUIRES_ARM_NEON;
272 for (uint32_t n = 9; n < 16; n++) {
273 for (size_t k = 1; k <= 40; k += 9) {
274 GemmMicrokernelTester()
275 .mr(4)
276 .nr(8)
277 .kr(1)
278 .sr(1)
279 .m(4)
280 .n(8)
281 .k(k)
282 .Test(xnn_q8_gemm_ukernel_4x8__neon);
283 }
284 }
285 }
286
287 TEST(Q8_GEMM_4X8__NEON, n_gt_8_strided_cn) {
288 TEST_REQUIRES_ARM_NEON;
289 for (uint32_t n = 9; n < 16; n++) {
290 for (size_t k = 1; k <= 40; k += 9) {
291 GemmMicrokernelTester()
292 .mr(4)
293 .nr(8)
294 .kr(1)
295 .sr(1)
296 .m(4)
297 .n(8)
298 .k(k)
299 .cn_stride(11)
300 .Test(xnn_q8_gemm_ukernel_4x8__neon);
301 }
302 }
303 }
304
305 TEST(Q8_GEMM_4X8__NEON, n_gt_8_strided_a) {
306 TEST_REQUIRES_ARM_NEON;
307 for (uint32_t n = 9; n < 16; n++) {
308 for (size_t k = 1; k <= 40; k += 9) {
309 GemmMicrokernelTester()
310 .mr(4)
311 .nr(8)
312 .kr(1)
313 .sr(1)
314 .m(4)
315 .n(n)
316 .k(k)
317 .a_stride(43)
318 .Test(xnn_q8_gemm_ukernel_4x8__neon);
319 }
320 }
321 }
322
323 TEST(Q8_GEMM_4X8__NEON, n_gt_8_subtile) {
324 TEST_REQUIRES_ARM_NEON;
325 for (uint32_t n = 9; n < 16; n++) {
326 for (size_t k = 1; k <= 40; k += 9) {
327 for (uint32_t m = 1; m <= 4; m++) {
328 GemmMicrokernelTester()
329 .mr(4)
330 .nr(8)
331 .kr(1)
332 .sr(1)
333 .m(m)
334 .n(n)
335 .k(k)
336 .iterations(1)
337 .Test(xnn_q8_gemm_ukernel_4x8__neon);
338 }
339 }
340 }
341 }
342
343 TEST(Q8_GEMM_4X8__NEON, n_div_8) {
344 TEST_REQUIRES_ARM_NEON;
345 for (uint32_t n = 16; n <= 24; n += 8) {
346 for (size_t k = 1; k <= 40; k += 9) {
347 GemmMicrokernelTester()
348 .mr(4)
349 .nr(8)
350 .kr(1)
351 .sr(1)
352 .m(4)
353 .n(8)
354 .k(k)
355 .Test(xnn_q8_gemm_ukernel_4x8__neon);
356 }
357 }
358 }
359
360 TEST(Q8_GEMM_4X8__NEON, n_div_8_strided_cn) {
361 TEST_REQUIRES_ARM_NEON;
362 for (uint32_t n = 16; n <= 24; n += 8) {
363 for (size_t k = 1; k <= 40; k += 9) {
364 GemmMicrokernelTester()
365 .mr(4)
366 .nr(8)
367 .kr(1)
368 .sr(1)
369 .m(4)
370 .n(n)
371 .k(k)
372 .cn_stride(11)
373 .Test(xnn_q8_gemm_ukernel_4x8__neon);
374 }
375 }
376 }
377
378 TEST(Q8_GEMM_4X8__NEON, n_div_8_strided_a) {
379 TEST_REQUIRES_ARM_NEON;
380 for (uint32_t n = 16; n <= 24; n += 8) {
381 for (size_t k = 1; k <= 40; k += 9) {
382 GemmMicrokernelTester()
383 .mr(4)
384 .nr(8)
385 .kr(1)
386 .sr(1)
387 .m(4)
388 .n(n)
389 .k(k)
390 .a_stride(43)
391 .Test(xnn_q8_gemm_ukernel_4x8__neon);
392 }
393 }
394 }
395
396 TEST(Q8_GEMM_4X8__NEON, n_div_8_subtile) {
397 TEST_REQUIRES_ARM_NEON;
398 for (uint32_t n = 16; n <= 24; n += 8) {
399 for (size_t k = 1; k <= 40; k += 9) {
400 for (uint32_t m = 1; m <= 4; m++) {
401 GemmMicrokernelTester()
402 .mr(4)
403 .nr(8)
404 .kr(1)
405 .sr(1)
406 .m(m)
407 .n(n)
408 .k(k)
409 .iterations(1)
410 .Test(xnn_q8_gemm_ukernel_4x8__neon);
411 }
412 }
413 }
414 }
415
416 TEST(Q8_GEMM_4X8__NEON, strided_cm_subtile) {
417 TEST_REQUIRES_ARM_NEON;
418 for (size_t k = 1; k <= 40; k += 9) {
419 for (uint32_t m = 1; m <= 4; m++) {
420 for (uint32_t n = 1; n <= 8; n++) {
421 GemmMicrokernelTester()
422 .mr(4)
423 .nr(8)
424 .kr(1)
425 .sr(1)
426 .m(m)
427 .n(n)
428 .k(k)
429 .cm_stride(11)
430 .iterations(1)
431 .Test(xnn_q8_gemm_ukernel_4x8__neon);
432 }
433 }
434 }
435 }
436
437 TEST(Q8_GEMM_4X8__NEON, qmin) {
438 TEST_REQUIRES_ARM_NEON;
439 GemmMicrokernelTester()
440 .mr(4)
441 .nr(8)
442 .kr(1)
443 .sr(1)
444 .m(4)
445 .n(8)
446 .k(8)
447 .qmin(128)
448 .Test(xnn_q8_gemm_ukernel_4x8__neon);
449 }
450
451 TEST(Q8_GEMM_4X8__NEON, qmax) {
452 TEST_REQUIRES_ARM_NEON;
453 GemmMicrokernelTester()
454 .mr(4)
455 .nr(8)
456 .kr(1)
457 .sr(1)
458 .m(4)
459 .n(8)
460 .k(8)
461 .qmax(128)
462 .Test(xnn_q8_gemm_ukernel_4x8__neon);
463 }
464
465 TEST(Q8_GEMM_4X8__NEON, strided_cm) {
466 TEST_REQUIRES_ARM_NEON;
467 GemmMicrokernelTester()
468 .mr(4)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(4)
473 .n(8)
474 .k(8)
475 .cm_stride(11)
476 .Test(xnn_q8_gemm_ukernel_4x8__neon);
477 }
478
479 TEST(Q8_GEMM_4X8__NEON, no_a_zero_point) {
480 TEST_REQUIRES_ARM_NEON;
481 for (size_t k = 1; k <= 40; k += 9) {
482 GemmMicrokernelTester()
483 .mr(4)
484 .nr(8)
485 .kr(1)
486 .sr(1)
487 .m(4)
488 .n(8)
489 .k(k)
490 .a_zero_point(0)
491 .Test(xnn_q8_gemm_ukernel_4x8__neon);
492 }
493 }
494
495 TEST(Q8_GEMM_4X8__NEON, no_b_zero_point) {
496 TEST_REQUIRES_ARM_NEON;
497 for (size_t k = 1; k <= 40; k += 9) {
498 GemmMicrokernelTester()
499 .mr(4)
500 .nr(8)
501 .kr(1)
502 .sr(1)
503 .m(4)
504 .n(8)
505 .k(k)
506 .b_zero_point(0)
507 .Test(xnn_q8_gemm_ukernel_4x8__neon);
508 }
509 }
510
511 TEST(Q8_GEMM_4X8__NEON, no_zero_point) {
512 TEST_REQUIRES_ARM_NEON;
513 for (size_t k = 1; k <= 40; k += 9) {
514 GemmMicrokernelTester()
515 .mr(4)
516 .nr(8)
517 .kr(1)
518 .sr(1)
519 .m(4)
520 .n(8)
521 .k(k)
522 .a_zero_point(0)
523 .b_zero_point(0)
524 .Test(xnn_q8_gemm_ukernel_4x8__neon);
525 }
526 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700527#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700528
529
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700530#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700531 TEST(Q8_GEMM_8X8__NEON, k_eq_8) {
532 TEST_REQUIRES_ARM_NEON;
533 GemmMicrokernelTester()
534 .mr(8)
535 .nr(8)
536 .kr(1)
537 .sr(1)
538 .m(8)
539 .n(8)
540 .k(8)
541 .Test(xnn_q8_gemm_ukernel_8x8__neon);
542 }
543
544 TEST(Q8_GEMM_8X8__NEON, strided_cn) {
545 TEST_REQUIRES_ARM_NEON;
546 GemmMicrokernelTester()
547 .mr(8)
548 .nr(8)
549 .kr(1)
550 .sr(1)
551 .m(8)
552 .n(8)
553 .k(8)
554 .cn_stride(11)
555 .Test(xnn_q8_gemm_ukernel_8x8__neon);
556 }
557
558 TEST(Q8_GEMM_8X8__NEON, k_eq_8_strided_a) {
559 TEST_REQUIRES_ARM_NEON;
560 GemmMicrokernelTester()
561 .mr(8)
562 .nr(8)
563 .kr(1)
564 .sr(1)
565 .m(8)
566 .n(8)
567 .k(8)
568 .a_stride(11)
569 .Test(xnn_q8_gemm_ukernel_8x8__neon);
570 }
571
572 TEST(Q8_GEMM_8X8__NEON, k_eq_8_subtile) {
573 TEST_REQUIRES_ARM_NEON;
574 for (uint32_t m = 1; m <= 8; m++) {
575 for (uint32_t n = 1; n <= 8; n++) {
576 GemmMicrokernelTester()
577 .mr(8)
578 .nr(8)
579 .kr(1)
580 .sr(1)
581 .m(m)
582 .n(n)
583 .k(8)
584 .iterations(1)
585 .Test(xnn_q8_gemm_ukernel_8x8__neon);
586 }
587 }
588 }
589
590 TEST(Q8_GEMM_8X8__NEON, k_eq_8_subtile_m) {
591 TEST_REQUIRES_ARM_NEON;
592 for (uint32_t m = 1; m <= 8; m++) {
593 GemmMicrokernelTester()
594 .mr(8)
595 .nr(8)
596 .kr(1)
597 .sr(1)
598 .m(m)
599 .n(8)
600 .k(8)
601 .iterations(1)
602 .Test(xnn_q8_gemm_ukernel_8x8__neon);
603 }
604 }
605
606 TEST(Q8_GEMM_8X8__NEON, k_eq_8_subtile_n) {
607 TEST_REQUIRES_ARM_NEON;
608 for (uint32_t n = 1; n <= 8; n++) {
609 GemmMicrokernelTester()
610 .mr(8)
611 .nr(8)
612 .kr(1)
613 .sr(1)
614 .m(8)
615 .n(n)
616 .k(8)
617 .iterations(1)
618 .Test(xnn_q8_gemm_ukernel_8x8__neon);
619 }
620 }
621
622 TEST(Q8_GEMM_8X8__NEON, k_lt_8) {
623 TEST_REQUIRES_ARM_NEON;
624 for (size_t k = 1; k < 8; k++) {
625 GemmMicrokernelTester()
626 .mr(8)
627 .nr(8)
628 .kr(1)
629 .sr(1)
630 .m(8)
631 .n(8)
632 .k(k)
633 .Test(xnn_q8_gemm_ukernel_8x8__neon);
634 }
635 }
636
637 TEST(Q8_GEMM_8X8__NEON, k_lt_8_strided_a) {
638 TEST_REQUIRES_ARM_NEON;
639 for (size_t k = 1; k < 8; k++) {
640 GemmMicrokernelTester()
641 .mr(8)
642 .nr(8)
643 .kr(1)
644 .sr(1)
645 .m(8)
646 .n(8)
647 .k(k)
648 .a_stride(11)
649 .Test(xnn_q8_gemm_ukernel_8x8__neon);
650 }
651 }
652
653 TEST(Q8_GEMM_8X8__NEON, k_lt_8_subtile) {
654 TEST_REQUIRES_ARM_NEON;
655 for (size_t k = 1; k < 8; k++) {
656 for (uint32_t m = 1; m <= 8; m++) {
657 for (uint32_t n = 1; n <= 8; n++) {
658 GemmMicrokernelTester()
659 .mr(8)
660 .nr(8)
661 .kr(1)
662 .sr(1)
663 .m(m)
664 .n(n)
665 .k(k)
666 .iterations(1)
667 .Test(xnn_q8_gemm_ukernel_8x8__neon);
668 }
669 }
670 }
671 }
672
673 TEST(Q8_GEMM_8X8__NEON, k_gt_8) {
674 TEST_REQUIRES_ARM_NEON;
675 for (size_t k = 9; k < 16; k++) {
676 GemmMicrokernelTester()
677 .mr(8)
678 .nr(8)
679 .kr(1)
680 .sr(1)
681 .m(8)
682 .n(8)
683 .k(k)
684 .Test(xnn_q8_gemm_ukernel_8x8__neon);
685 }
686 }
687
688 TEST(Q8_GEMM_8X8__NEON, k_gt_8_strided_a) {
689 TEST_REQUIRES_ARM_NEON;
690 for (size_t k = 9; k < 16; k++) {
691 GemmMicrokernelTester()
692 .mr(8)
693 .nr(8)
694 .kr(1)
695 .sr(1)
696 .m(8)
697 .n(8)
698 .k(k)
699 .a_stride(19)
700 .Test(xnn_q8_gemm_ukernel_8x8__neon);
701 }
702 }
703
704 TEST(Q8_GEMM_8X8__NEON, k_gt_8_subtile) {
705 TEST_REQUIRES_ARM_NEON;
706 for (size_t k = 9; k < 16; k++) {
707 for (uint32_t m = 1; m <= 8; m++) {
708 for (uint32_t n = 1; n <= 8; n++) {
709 GemmMicrokernelTester()
710 .mr(8)
711 .nr(8)
712 .kr(1)
713 .sr(1)
714 .m(m)
715 .n(n)
716 .k(k)
717 .iterations(1)
718 .Test(xnn_q8_gemm_ukernel_8x8__neon);
719 }
720 }
721 }
722 }
723
724 TEST(Q8_GEMM_8X8__NEON, k_div_8) {
725 TEST_REQUIRES_ARM_NEON;
726 for (size_t k = 16; k <= 80; k += 8) {
727 GemmMicrokernelTester()
728 .mr(8)
729 .nr(8)
730 .kr(1)
731 .sr(1)
732 .m(8)
733 .n(8)
734 .k(k)
735 .Test(xnn_q8_gemm_ukernel_8x8__neon);
736 }
737 }
738
739 TEST(Q8_GEMM_8X8__NEON, k_div_8_strided_a) {
740 TEST_REQUIRES_ARM_NEON;
741 for (size_t k = 16; k <= 80; k += 8) {
742 GemmMicrokernelTester()
743 .mr(8)
744 .nr(8)
745 .kr(1)
746 .sr(1)
747 .m(8)
748 .n(8)
749 .k(k)
750 .a_stride(83)
751 .Test(xnn_q8_gemm_ukernel_8x8__neon);
752 }
753 }
754
755 TEST(Q8_GEMM_8X8__NEON, k_div_8_subtile) {
756 TEST_REQUIRES_ARM_NEON;
757 for (size_t k = 16; k <= 80; k += 8) {
758 for (uint32_t m = 1; m <= 8; m++) {
759 for (uint32_t n = 1; n <= 8; n++) {
760 GemmMicrokernelTester()
761 .mr(8)
762 .nr(8)
763 .kr(1)
764 .sr(1)
765 .m(m)
766 .n(n)
767 .k(k)
768 .iterations(1)
769 .Test(xnn_q8_gemm_ukernel_8x8__neon);
770 }
771 }
772 }
773 }
774
775 TEST(Q8_GEMM_8X8__NEON, n_gt_8) {
776 TEST_REQUIRES_ARM_NEON;
777 for (uint32_t n = 9; n < 16; n++) {
778 for (size_t k = 1; k <= 40; k += 9) {
779 GemmMicrokernelTester()
780 .mr(8)
781 .nr(8)
782 .kr(1)
783 .sr(1)
784 .m(8)
785 .n(8)
786 .k(k)
787 .Test(xnn_q8_gemm_ukernel_8x8__neon);
788 }
789 }
790 }
791
792 TEST(Q8_GEMM_8X8__NEON, n_gt_8_strided_cn) {
793 TEST_REQUIRES_ARM_NEON;
794 for (uint32_t n = 9; n < 16; n++) {
795 for (size_t k = 1; k <= 40; k += 9) {
796 GemmMicrokernelTester()
797 .mr(8)
798 .nr(8)
799 .kr(1)
800 .sr(1)
801 .m(8)
802 .n(8)
803 .k(k)
804 .cn_stride(11)
805 .Test(xnn_q8_gemm_ukernel_8x8__neon);
806 }
807 }
808 }
809
810 TEST(Q8_GEMM_8X8__NEON, n_gt_8_strided_a) {
811 TEST_REQUIRES_ARM_NEON;
812 for (uint32_t n = 9; n < 16; n++) {
813 for (size_t k = 1; k <= 40; k += 9) {
814 GemmMicrokernelTester()
815 .mr(8)
816 .nr(8)
817 .kr(1)
818 .sr(1)
819 .m(8)
820 .n(n)
821 .k(k)
822 .a_stride(43)
823 .Test(xnn_q8_gemm_ukernel_8x8__neon);
824 }
825 }
826 }
827
828 TEST(Q8_GEMM_8X8__NEON, n_gt_8_subtile) {
829 TEST_REQUIRES_ARM_NEON;
830 for (uint32_t n = 9; n < 16; n++) {
831 for (size_t k = 1; k <= 40; k += 9) {
832 for (uint32_t m = 1; m <= 8; m++) {
833 GemmMicrokernelTester()
834 .mr(8)
835 .nr(8)
836 .kr(1)
837 .sr(1)
838 .m(m)
839 .n(n)
840 .k(k)
841 .iterations(1)
842 .Test(xnn_q8_gemm_ukernel_8x8__neon);
843 }
844 }
845 }
846 }
847
848 TEST(Q8_GEMM_8X8__NEON, n_div_8) {
849 TEST_REQUIRES_ARM_NEON;
850 for (uint32_t n = 16; n <= 24; n += 8) {
851 for (size_t k = 1; k <= 40; k += 9) {
852 GemmMicrokernelTester()
853 .mr(8)
854 .nr(8)
855 .kr(1)
856 .sr(1)
857 .m(8)
858 .n(8)
859 .k(k)
860 .Test(xnn_q8_gemm_ukernel_8x8__neon);
861 }
862 }
863 }
864
865 TEST(Q8_GEMM_8X8__NEON, n_div_8_strided_cn) {
866 TEST_REQUIRES_ARM_NEON;
867 for (uint32_t n = 16; n <= 24; n += 8) {
868 for (size_t k = 1; k <= 40; k += 9) {
869 GemmMicrokernelTester()
870 .mr(8)
871 .nr(8)
872 .kr(1)
873 .sr(1)
874 .m(8)
875 .n(n)
876 .k(k)
877 .cn_stride(11)
878 .Test(xnn_q8_gemm_ukernel_8x8__neon);
879 }
880 }
881 }
882
883 TEST(Q8_GEMM_8X8__NEON, n_div_8_strided_a) {
884 TEST_REQUIRES_ARM_NEON;
885 for (uint32_t n = 16; n <= 24; n += 8) {
886 for (size_t k = 1; k <= 40; k += 9) {
887 GemmMicrokernelTester()
888 .mr(8)
889 .nr(8)
890 .kr(1)
891 .sr(1)
892 .m(8)
893 .n(n)
894 .k(k)
895 .a_stride(43)
896 .Test(xnn_q8_gemm_ukernel_8x8__neon);
897 }
898 }
899 }
900
901 TEST(Q8_GEMM_8X8__NEON, n_div_8_subtile) {
902 TEST_REQUIRES_ARM_NEON;
903 for (uint32_t n = 16; n <= 24; n += 8) {
904 for (size_t k = 1; k <= 40; k += 9) {
905 for (uint32_t m = 1; m <= 8; m++) {
906 GemmMicrokernelTester()
907 .mr(8)
908 .nr(8)
909 .kr(1)
910 .sr(1)
911 .m(m)
912 .n(n)
913 .k(k)
914 .iterations(1)
915 .Test(xnn_q8_gemm_ukernel_8x8__neon);
916 }
917 }
918 }
919 }
920
921 TEST(Q8_GEMM_8X8__NEON, strided_cm_subtile) {
922 TEST_REQUIRES_ARM_NEON;
923 for (size_t k = 1; k <= 40; k += 9) {
924 for (uint32_t m = 1; m <= 8; m++) {
925 for (uint32_t n = 1; n <= 8; n++) {
926 GemmMicrokernelTester()
927 .mr(8)
928 .nr(8)
929 .kr(1)
930 .sr(1)
931 .m(m)
932 .n(n)
933 .k(k)
934 .cm_stride(11)
935 .iterations(1)
936 .Test(xnn_q8_gemm_ukernel_8x8__neon);
937 }
938 }
939 }
940 }
941
942 TEST(Q8_GEMM_8X8__NEON, qmin) {
943 TEST_REQUIRES_ARM_NEON;
944 GemmMicrokernelTester()
945 .mr(8)
946 .nr(8)
947 .kr(1)
948 .sr(1)
949 .m(8)
950 .n(8)
951 .k(8)
952 .qmin(128)
953 .Test(xnn_q8_gemm_ukernel_8x8__neon);
954 }
955
956 TEST(Q8_GEMM_8X8__NEON, qmax) {
957 TEST_REQUIRES_ARM_NEON;
958 GemmMicrokernelTester()
959 .mr(8)
960 .nr(8)
961 .kr(1)
962 .sr(1)
963 .m(8)
964 .n(8)
965 .k(8)
966 .qmax(128)
967 .Test(xnn_q8_gemm_ukernel_8x8__neon);
968 }
969
970 TEST(Q8_GEMM_8X8__NEON, strided_cm) {
971 TEST_REQUIRES_ARM_NEON;
972 GemmMicrokernelTester()
973 .mr(8)
974 .nr(8)
975 .kr(1)
976 .sr(1)
977 .m(8)
978 .n(8)
979 .k(8)
980 .cm_stride(11)
981 .Test(xnn_q8_gemm_ukernel_8x8__neon);
982 }
983
984 TEST(Q8_GEMM_8X8__NEON, no_a_zero_point) {
985 TEST_REQUIRES_ARM_NEON;
986 for (size_t k = 1; k <= 40; k += 9) {
987 GemmMicrokernelTester()
988 .mr(8)
989 .nr(8)
990 .kr(1)
991 .sr(1)
992 .m(8)
993 .n(8)
994 .k(k)
995 .a_zero_point(0)
996 .Test(xnn_q8_gemm_ukernel_8x8__neon);
997 }
998 }
999
1000 TEST(Q8_GEMM_8X8__NEON, no_b_zero_point) {
1001 TEST_REQUIRES_ARM_NEON;
1002 for (size_t k = 1; k <= 40; k += 9) {
1003 GemmMicrokernelTester()
1004 .mr(8)
1005 .nr(8)
1006 .kr(1)
1007 .sr(1)
1008 .m(8)
1009 .n(8)
1010 .k(k)
1011 .b_zero_point(0)
1012 .Test(xnn_q8_gemm_ukernel_8x8__neon);
1013 }
1014 }
1015
1016 TEST(Q8_GEMM_8X8__NEON, no_zero_point) {
1017 TEST_REQUIRES_ARM_NEON;
1018 for (size_t k = 1; k <= 40; k += 9) {
1019 GemmMicrokernelTester()
1020 .mr(8)
1021 .nr(8)
1022 .kr(1)
1023 .sr(1)
1024 .m(8)
1025 .n(8)
1026 .k(k)
1027 .a_zero_point(0)
1028 .b_zero_point(0)
1029 .Test(xnn_q8_gemm_ukernel_8x8__neon);
1030 }
1031 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001032#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001033
1034
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001035#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001036 TEST(Q8_GEMM_2X4C8__SSE2, k_eq_8) {
1037 TEST_REQUIRES_X86_SSE2;
1038 GemmMicrokernelTester()
1039 .mr(2)
1040 .nr(4)
1041 .kr(8)
1042 .sr(1)
1043 .m(2)
1044 .n(4)
1045 .k(8)
1046 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1047 }
1048
1049 TEST(Q8_GEMM_2X4C8__SSE2, strided_cn) {
1050 TEST_REQUIRES_X86_SSE2;
1051 GemmMicrokernelTester()
1052 .mr(2)
1053 .nr(4)
1054 .kr(8)
1055 .sr(1)
1056 .m(2)
1057 .n(4)
1058 .k(8)
1059 .cn_stride(7)
1060 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1061 }
1062
1063 TEST(Q8_GEMM_2X4C8__SSE2, k_eq_8_strided_a) {
1064 TEST_REQUIRES_X86_SSE2;
1065 GemmMicrokernelTester()
1066 .mr(2)
1067 .nr(4)
1068 .kr(8)
1069 .sr(1)
1070 .m(2)
1071 .n(4)
1072 .k(8)
1073 .a_stride(11)
1074 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1075 }
1076
1077 TEST(Q8_GEMM_2X4C8__SSE2, k_eq_8_subtile) {
1078 TEST_REQUIRES_X86_SSE2;
1079 for (uint32_t m = 1; m <= 2; m++) {
1080 for (uint32_t n = 1; n <= 4; n++) {
1081 GemmMicrokernelTester()
1082 .mr(2)
1083 .nr(4)
1084 .kr(8)
1085 .sr(1)
1086 .m(m)
1087 .n(n)
1088 .k(8)
1089 .iterations(1)
1090 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1091 }
1092 }
1093 }
1094
1095 TEST(Q8_GEMM_2X4C8__SSE2, k_eq_8_subtile_m) {
1096 TEST_REQUIRES_X86_SSE2;
1097 for (uint32_t m = 1; m <= 2; m++) {
1098 GemmMicrokernelTester()
1099 .mr(2)
1100 .nr(4)
1101 .kr(8)
1102 .sr(1)
1103 .m(m)
1104 .n(4)
1105 .k(8)
1106 .iterations(1)
1107 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1108 }
1109 }
1110
1111 TEST(Q8_GEMM_2X4C8__SSE2, k_eq_8_subtile_n) {
1112 TEST_REQUIRES_X86_SSE2;
1113 for (uint32_t n = 1; n <= 4; n++) {
1114 GemmMicrokernelTester()
1115 .mr(2)
1116 .nr(4)
1117 .kr(8)
1118 .sr(1)
1119 .m(2)
1120 .n(n)
1121 .k(8)
1122 .iterations(1)
1123 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1124 }
1125 }
1126
1127 TEST(Q8_GEMM_2X4C8__SSE2, k_lt_8) {
1128 TEST_REQUIRES_X86_SSE2;
1129 for (size_t k = 1; k < 8; k++) {
1130 GemmMicrokernelTester()
1131 .mr(2)
1132 .nr(4)
1133 .kr(8)
1134 .sr(1)
1135 .m(2)
1136 .n(4)
1137 .k(k)
1138 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1139 }
1140 }
1141
1142 TEST(Q8_GEMM_2X4C8__SSE2, k_lt_8_strided_a) {
1143 TEST_REQUIRES_X86_SSE2;
1144 for (size_t k = 1; k < 8; k++) {
1145 GemmMicrokernelTester()
1146 .mr(2)
1147 .nr(4)
1148 .kr(8)
1149 .sr(1)
1150 .m(2)
1151 .n(4)
1152 .k(k)
1153 .a_stride(11)
1154 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1155 }
1156 }
1157
1158 TEST(Q8_GEMM_2X4C8__SSE2, k_lt_8_subtile) {
1159 TEST_REQUIRES_X86_SSE2;
1160 for (size_t k = 1; k < 8; k++) {
1161 for (uint32_t m = 1; m <= 2; m++) {
1162 for (uint32_t n = 1; n <= 4; n++) {
1163 GemmMicrokernelTester()
1164 .mr(2)
1165 .nr(4)
1166 .kr(8)
1167 .sr(1)
1168 .m(m)
1169 .n(n)
1170 .k(k)
1171 .iterations(1)
1172 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1173 }
1174 }
1175 }
1176 }
1177
1178 TEST(Q8_GEMM_2X4C8__SSE2, k_gt_8) {
1179 TEST_REQUIRES_X86_SSE2;
1180 for (size_t k = 9; k < 16; k++) {
1181 GemmMicrokernelTester()
1182 .mr(2)
1183 .nr(4)
1184 .kr(8)
1185 .sr(1)
1186 .m(2)
1187 .n(4)
1188 .k(k)
1189 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1190 }
1191 }
1192
1193 TEST(Q8_GEMM_2X4C8__SSE2, k_gt_8_strided_a) {
1194 TEST_REQUIRES_X86_SSE2;
1195 for (size_t k = 9; k < 16; k++) {
1196 GemmMicrokernelTester()
1197 .mr(2)
1198 .nr(4)
1199 .kr(8)
1200 .sr(1)
1201 .m(2)
1202 .n(4)
1203 .k(k)
1204 .a_stride(19)
1205 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1206 }
1207 }
1208
1209 TEST(Q8_GEMM_2X4C8__SSE2, k_gt_8_subtile) {
1210 TEST_REQUIRES_X86_SSE2;
1211 for (size_t k = 9; k < 16; k++) {
1212 for (uint32_t m = 1; m <= 2; m++) {
1213 for (uint32_t n = 1; n <= 4; n++) {
1214 GemmMicrokernelTester()
1215 .mr(2)
1216 .nr(4)
1217 .kr(8)
1218 .sr(1)
1219 .m(m)
1220 .n(n)
1221 .k(k)
1222 .iterations(1)
1223 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1224 }
1225 }
1226 }
1227 }
1228
1229 TEST(Q8_GEMM_2X4C8__SSE2, k_div_8) {
1230 TEST_REQUIRES_X86_SSE2;
1231 for (size_t k = 16; k <= 80; k += 8) {
1232 GemmMicrokernelTester()
1233 .mr(2)
1234 .nr(4)
1235 .kr(8)
1236 .sr(1)
1237 .m(2)
1238 .n(4)
1239 .k(k)
1240 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1241 }
1242 }
1243
1244 TEST(Q8_GEMM_2X4C8__SSE2, k_div_8_strided_a) {
1245 TEST_REQUIRES_X86_SSE2;
1246 for (size_t k = 16; k <= 80; k += 8) {
1247 GemmMicrokernelTester()
1248 .mr(2)
1249 .nr(4)
1250 .kr(8)
1251 .sr(1)
1252 .m(2)
1253 .n(4)
1254 .k(k)
1255 .a_stride(83)
1256 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1257 }
1258 }
1259
1260 TEST(Q8_GEMM_2X4C8__SSE2, k_div_8_subtile) {
1261 TEST_REQUIRES_X86_SSE2;
1262 for (size_t k = 16; k <= 80; k += 8) {
1263 for (uint32_t m = 1; m <= 2; m++) {
1264 for (uint32_t n = 1; n <= 4; n++) {
1265 GemmMicrokernelTester()
1266 .mr(2)
1267 .nr(4)
1268 .kr(8)
1269 .sr(1)
1270 .m(m)
1271 .n(n)
1272 .k(k)
1273 .iterations(1)
1274 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1275 }
1276 }
1277 }
1278 }
1279
1280 TEST(Q8_GEMM_2X4C8__SSE2, n_gt_4) {
1281 TEST_REQUIRES_X86_SSE2;
1282 for (uint32_t n = 5; n < 8; n++) {
1283 for (size_t k = 1; k <= 40; k += 9) {
1284 GemmMicrokernelTester()
1285 .mr(2)
1286 .nr(4)
1287 .kr(8)
1288 .sr(1)
1289 .m(2)
1290 .n(4)
1291 .k(k)
1292 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1293 }
1294 }
1295 }
1296
1297 TEST(Q8_GEMM_2X4C8__SSE2, n_gt_4_strided_cn) {
1298 TEST_REQUIRES_X86_SSE2;
1299 for (uint32_t n = 5; n < 8; n++) {
1300 for (size_t k = 1; k <= 40; k += 9) {
1301 GemmMicrokernelTester()
1302 .mr(2)
1303 .nr(4)
1304 .kr(8)
1305 .sr(1)
1306 .m(2)
1307 .n(4)
1308 .k(k)
1309 .cn_stride(7)
1310 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1311 }
1312 }
1313 }
1314
1315 TEST(Q8_GEMM_2X4C8__SSE2, n_gt_4_strided_a) {
1316 TEST_REQUIRES_X86_SSE2;
1317 for (uint32_t n = 5; n < 8; n++) {
1318 for (size_t k = 1; k <= 40; k += 9) {
1319 GemmMicrokernelTester()
1320 .mr(2)
1321 .nr(4)
1322 .kr(8)
1323 .sr(1)
1324 .m(2)
1325 .n(n)
1326 .k(k)
1327 .a_stride(43)
1328 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1329 }
1330 }
1331 }
1332
1333 TEST(Q8_GEMM_2X4C8__SSE2, n_gt_4_subtile) {
1334 TEST_REQUIRES_X86_SSE2;
1335 for (uint32_t n = 5; n < 8; n++) {
1336 for (size_t k = 1; k <= 40; k += 9) {
1337 for (uint32_t m = 1; m <= 2; m++) {
1338 GemmMicrokernelTester()
1339 .mr(2)
1340 .nr(4)
1341 .kr(8)
1342 .sr(1)
1343 .m(m)
1344 .n(n)
1345 .k(k)
1346 .iterations(1)
1347 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1348 }
1349 }
1350 }
1351 }
1352
1353 TEST(Q8_GEMM_2X4C8__SSE2, n_div_4) {
1354 TEST_REQUIRES_X86_SSE2;
1355 for (uint32_t n = 8; n <= 12; n += 4) {
1356 for (size_t k = 1; k <= 40; k += 9) {
1357 GemmMicrokernelTester()
1358 .mr(2)
1359 .nr(4)
1360 .kr(8)
1361 .sr(1)
1362 .m(2)
1363 .n(4)
1364 .k(k)
1365 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1366 }
1367 }
1368 }
1369
1370 TEST(Q8_GEMM_2X4C8__SSE2, n_div_4_strided_cn) {
1371 TEST_REQUIRES_X86_SSE2;
1372 for (uint32_t n = 8; n <= 12; n += 4) {
1373 for (size_t k = 1; k <= 40; k += 9) {
1374 GemmMicrokernelTester()
1375 .mr(2)
1376 .nr(4)
1377 .kr(8)
1378 .sr(1)
1379 .m(2)
1380 .n(n)
1381 .k(k)
1382 .cn_stride(7)
1383 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1384 }
1385 }
1386 }
1387
1388 TEST(Q8_GEMM_2X4C8__SSE2, n_div_4_strided_a) {
1389 TEST_REQUIRES_X86_SSE2;
1390 for (uint32_t n = 8; n <= 12; n += 4) {
1391 for (size_t k = 1; k <= 40; k += 9) {
1392 GemmMicrokernelTester()
1393 .mr(2)
1394 .nr(4)
1395 .kr(8)
1396 .sr(1)
1397 .m(2)
1398 .n(n)
1399 .k(k)
1400 .a_stride(43)
1401 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1402 }
1403 }
1404 }
1405
1406 TEST(Q8_GEMM_2X4C8__SSE2, n_div_4_subtile) {
1407 TEST_REQUIRES_X86_SSE2;
1408 for (uint32_t n = 8; n <= 12; n += 4) {
1409 for (size_t k = 1; k <= 40; k += 9) {
1410 for (uint32_t m = 1; m <= 2; m++) {
1411 GemmMicrokernelTester()
1412 .mr(2)
1413 .nr(4)
1414 .kr(8)
1415 .sr(1)
1416 .m(m)
1417 .n(n)
1418 .k(k)
1419 .iterations(1)
1420 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1421 }
1422 }
1423 }
1424 }
1425
1426 TEST(Q8_GEMM_2X4C8__SSE2, strided_cm_subtile) {
1427 TEST_REQUIRES_X86_SSE2;
1428 for (size_t k = 1; k <= 40; k += 9) {
1429 for (uint32_t m = 1; m <= 2; m++) {
1430 for (uint32_t n = 1; n <= 4; n++) {
1431 GemmMicrokernelTester()
1432 .mr(2)
1433 .nr(4)
1434 .kr(8)
1435 .sr(1)
1436 .m(m)
1437 .n(n)
1438 .k(k)
1439 .cm_stride(7)
1440 .iterations(1)
1441 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1442 }
1443 }
1444 }
1445 }
1446
1447 TEST(Q8_GEMM_2X4C8__SSE2, qmin) {
1448 TEST_REQUIRES_X86_SSE2;
1449 GemmMicrokernelTester()
1450 .mr(2)
1451 .nr(4)
1452 .kr(8)
1453 .sr(1)
1454 .m(2)
1455 .n(4)
1456 .k(8)
1457 .qmin(128)
1458 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1459 }
1460
1461 TEST(Q8_GEMM_2X4C8__SSE2, qmax) {
1462 TEST_REQUIRES_X86_SSE2;
1463 GemmMicrokernelTester()
1464 .mr(2)
1465 .nr(4)
1466 .kr(8)
1467 .sr(1)
1468 .m(2)
1469 .n(4)
1470 .k(8)
1471 .qmax(128)
1472 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1473 }
1474
1475 TEST(Q8_GEMM_2X4C8__SSE2, strided_cm) {
1476 TEST_REQUIRES_X86_SSE2;
1477 GemmMicrokernelTester()
1478 .mr(2)
1479 .nr(4)
1480 .kr(8)
1481 .sr(1)
1482 .m(2)
1483 .n(4)
1484 .k(8)
1485 .cm_stride(7)
1486 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1487 }
1488
1489 TEST(Q8_GEMM_2X4C8__SSE2, no_a_zero_point) {
1490 TEST_REQUIRES_X86_SSE2;
1491 for (size_t k = 1; k <= 40; k += 9) {
1492 GemmMicrokernelTester()
1493 .mr(2)
1494 .nr(4)
1495 .kr(8)
1496 .sr(1)
1497 .m(2)
1498 .n(4)
1499 .k(k)
1500 .a_zero_point(0)
1501 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1502 }
1503 }
1504
1505 TEST(Q8_GEMM_2X4C8__SSE2, no_b_zero_point) {
1506 TEST_REQUIRES_X86_SSE2;
1507 for (size_t k = 1; k <= 40; k += 9) {
1508 GemmMicrokernelTester()
1509 .mr(2)
1510 .nr(4)
1511 .kr(8)
1512 .sr(1)
1513 .m(2)
1514 .n(4)
1515 .k(k)
1516 .b_zero_point(0)
1517 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1518 }
1519 }
1520
1521 TEST(Q8_GEMM_2X4C8__SSE2, no_zero_point) {
1522 TEST_REQUIRES_X86_SSE2;
1523 for (size_t k = 1; k <= 40; k += 9) {
1524 GemmMicrokernelTester()
1525 .mr(2)
1526 .nr(4)
1527 .kr(8)
1528 .sr(1)
1529 .m(2)
1530 .n(4)
1531 .k(k)
1532 .a_zero_point(0)
1533 .b_zero_point(0)
1534 .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
1535 }
1536 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001537#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001538
1539
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001540#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001541 TEST(Q8_GEMM_4X4C2__SSE2, k_eq_8) {
1542 TEST_REQUIRES_X86_SSE2;
1543 GemmMicrokernelTester()
1544 .mr(4)
1545 .nr(4)
1546 .kr(2)
1547 .sr(1)
1548 .m(4)
1549 .n(4)
1550 .k(8)
1551 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1552 }
1553
1554 TEST(Q8_GEMM_4X4C2__SSE2, strided_cn) {
1555 TEST_REQUIRES_X86_SSE2;
1556 GemmMicrokernelTester()
1557 .mr(4)
1558 .nr(4)
1559 .kr(2)
1560 .sr(1)
1561 .m(4)
1562 .n(4)
1563 .k(8)
1564 .cn_stride(7)
1565 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1566 }
1567
1568 TEST(Q8_GEMM_4X4C2__SSE2, k_eq_8_strided_a) {
1569 TEST_REQUIRES_X86_SSE2;
1570 GemmMicrokernelTester()
1571 .mr(4)
1572 .nr(4)
1573 .kr(2)
1574 .sr(1)
1575 .m(4)
1576 .n(4)
1577 .k(8)
1578 .a_stride(11)
1579 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1580 }
1581
1582 TEST(Q8_GEMM_4X4C2__SSE2, k_eq_8_subtile) {
1583 TEST_REQUIRES_X86_SSE2;
1584 for (uint32_t m = 1; m <= 4; m++) {
1585 for (uint32_t n = 1; n <= 4; n++) {
1586 GemmMicrokernelTester()
1587 .mr(4)
1588 .nr(4)
1589 .kr(2)
1590 .sr(1)
1591 .m(m)
1592 .n(n)
1593 .k(8)
1594 .iterations(1)
1595 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1596 }
1597 }
1598 }
1599
1600 TEST(Q8_GEMM_4X4C2__SSE2, k_eq_8_subtile_m) {
1601 TEST_REQUIRES_X86_SSE2;
1602 for (uint32_t m = 1; m <= 4; m++) {
1603 GemmMicrokernelTester()
1604 .mr(4)
1605 .nr(4)
1606 .kr(2)
1607 .sr(1)
1608 .m(m)
1609 .n(4)
1610 .k(8)
1611 .iterations(1)
1612 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1613 }
1614 }
1615
1616 TEST(Q8_GEMM_4X4C2__SSE2, k_eq_8_subtile_n) {
1617 TEST_REQUIRES_X86_SSE2;
1618 for (uint32_t n = 1; n <= 4; n++) {
1619 GemmMicrokernelTester()
1620 .mr(4)
1621 .nr(4)
1622 .kr(2)
1623 .sr(1)
1624 .m(4)
1625 .n(n)
1626 .k(8)
1627 .iterations(1)
1628 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1629 }
1630 }
1631
1632 TEST(Q8_GEMM_4X4C2__SSE2, k_lt_8) {
1633 TEST_REQUIRES_X86_SSE2;
1634 for (size_t k = 1; k < 8; k++) {
1635 GemmMicrokernelTester()
1636 .mr(4)
1637 .nr(4)
1638 .kr(2)
1639 .sr(1)
1640 .m(4)
1641 .n(4)
1642 .k(k)
1643 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1644 }
1645 }
1646
1647 TEST(Q8_GEMM_4X4C2__SSE2, k_lt_8_strided_a) {
1648 TEST_REQUIRES_X86_SSE2;
1649 for (size_t k = 1; k < 8; k++) {
1650 GemmMicrokernelTester()
1651 .mr(4)
1652 .nr(4)
1653 .kr(2)
1654 .sr(1)
1655 .m(4)
1656 .n(4)
1657 .k(k)
1658 .a_stride(11)
1659 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1660 }
1661 }
1662
1663 TEST(Q8_GEMM_4X4C2__SSE2, k_lt_8_subtile) {
1664 TEST_REQUIRES_X86_SSE2;
1665 for (size_t k = 1; k < 8; k++) {
1666 for (uint32_t m = 1; m <= 4; m++) {
1667 for (uint32_t n = 1; n <= 4; n++) {
1668 GemmMicrokernelTester()
1669 .mr(4)
1670 .nr(4)
1671 .kr(2)
1672 .sr(1)
1673 .m(m)
1674 .n(n)
1675 .k(k)
1676 .iterations(1)
1677 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1678 }
1679 }
1680 }
1681 }
1682
1683 TEST(Q8_GEMM_4X4C2__SSE2, k_gt_8) {
1684 TEST_REQUIRES_X86_SSE2;
1685 for (size_t k = 9; k < 16; k++) {
1686 GemmMicrokernelTester()
1687 .mr(4)
1688 .nr(4)
1689 .kr(2)
1690 .sr(1)
1691 .m(4)
1692 .n(4)
1693 .k(k)
1694 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1695 }
1696 }
1697
1698 TEST(Q8_GEMM_4X4C2__SSE2, k_gt_8_strided_a) {
1699 TEST_REQUIRES_X86_SSE2;
1700 for (size_t k = 9; k < 16; k++) {
1701 GemmMicrokernelTester()
1702 .mr(4)
1703 .nr(4)
1704 .kr(2)
1705 .sr(1)
1706 .m(4)
1707 .n(4)
1708 .k(k)
1709 .a_stride(19)
1710 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1711 }
1712 }
1713
1714 TEST(Q8_GEMM_4X4C2__SSE2, k_gt_8_subtile) {
1715 TEST_REQUIRES_X86_SSE2;
1716 for (size_t k = 9; k < 16; k++) {
1717 for (uint32_t m = 1; m <= 4; m++) {
1718 for (uint32_t n = 1; n <= 4; n++) {
1719 GemmMicrokernelTester()
1720 .mr(4)
1721 .nr(4)
1722 .kr(2)
1723 .sr(1)
1724 .m(m)
1725 .n(n)
1726 .k(k)
1727 .iterations(1)
1728 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1729 }
1730 }
1731 }
1732 }
1733
1734 TEST(Q8_GEMM_4X4C2__SSE2, k_div_8) {
1735 TEST_REQUIRES_X86_SSE2;
1736 for (size_t k = 16; k <= 80; k += 8) {
1737 GemmMicrokernelTester()
1738 .mr(4)
1739 .nr(4)
1740 .kr(2)
1741 .sr(1)
1742 .m(4)
1743 .n(4)
1744 .k(k)
1745 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1746 }
1747 }
1748
1749 TEST(Q8_GEMM_4X4C2__SSE2, k_div_8_strided_a) {
1750 TEST_REQUIRES_X86_SSE2;
1751 for (size_t k = 16; k <= 80; k += 8) {
1752 GemmMicrokernelTester()
1753 .mr(4)
1754 .nr(4)
1755 .kr(2)
1756 .sr(1)
1757 .m(4)
1758 .n(4)
1759 .k(k)
1760 .a_stride(83)
1761 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1762 }
1763 }
1764
1765 TEST(Q8_GEMM_4X4C2__SSE2, k_div_8_subtile) {
1766 TEST_REQUIRES_X86_SSE2;
1767 for (size_t k = 16; k <= 80; k += 8) {
1768 for (uint32_t m = 1; m <= 4; m++) {
1769 for (uint32_t n = 1; n <= 4; n++) {
1770 GemmMicrokernelTester()
1771 .mr(4)
1772 .nr(4)
1773 .kr(2)
1774 .sr(1)
1775 .m(m)
1776 .n(n)
1777 .k(k)
1778 .iterations(1)
1779 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1780 }
1781 }
1782 }
1783 }
1784
1785 TEST(Q8_GEMM_4X4C2__SSE2, n_gt_4) {
1786 TEST_REQUIRES_X86_SSE2;
1787 for (uint32_t n = 5; n < 8; n++) {
1788 for (size_t k = 1; k <= 40; k += 9) {
1789 GemmMicrokernelTester()
1790 .mr(4)
1791 .nr(4)
1792 .kr(2)
1793 .sr(1)
1794 .m(4)
1795 .n(4)
1796 .k(k)
1797 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1798 }
1799 }
1800 }
1801
1802 TEST(Q8_GEMM_4X4C2__SSE2, n_gt_4_strided_cn) {
1803 TEST_REQUIRES_X86_SSE2;
1804 for (uint32_t n = 5; n < 8; n++) {
1805 for (size_t k = 1; k <= 40; k += 9) {
1806 GemmMicrokernelTester()
1807 .mr(4)
1808 .nr(4)
1809 .kr(2)
1810 .sr(1)
1811 .m(4)
1812 .n(4)
1813 .k(k)
1814 .cn_stride(7)
1815 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1816 }
1817 }
1818 }
1819
1820 TEST(Q8_GEMM_4X4C2__SSE2, n_gt_4_strided_a) {
1821 TEST_REQUIRES_X86_SSE2;
1822 for (uint32_t n = 5; n < 8; n++) {
1823 for (size_t k = 1; k <= 40; k += 9) {
1824 GemmMicrokernelTester()
1825 .mr(4)
1826 .nr(4)
1827 .kr(2)
1828 .sr(1)
1829 .m(4)
1830 .n(n)
1831 .k(k)
1832 .a_stride(43)
1833 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1834 }
1835 }
1836 }
1837
1838 TEST(Q8_GEMM_4X4C2__SSE2, n_gt_4_subtile) {
1839 TEST_REQUIRES_X86_SSE2;
1840 for (uint32_t n = 5; n < 8; n++) {
1841 for (size_t k = 1; k <= 40; k += 9) {
1842 for (uint32_t m = 1; m <= 4; m++) {
1843 GemmMicrokernelTester()
1844 .mr(4)
1845 .nr(4)
1846 .kr(2)
1847 .sr(1)
1848 .m(m)
1849 .n(n)
1850 .k(k)
1851 .iterations(1)
1852 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1853 }
1854 }
1855 }
1856 }
1857
1858 TEST(Q8_GEMM_4X4C2__SSE2, n_div_4) {
1859 TEST_REQUIRES_X86_SSE2;
1860 for (uint32_t n = 8; n <= 12; n += 4) {
1861 for (size_t k = 1; k <= 40; k += 9) {
1862 GemmMicrokernelTester()
1863 .mr(4)
1864 .nr(4)
1865 .kr(2)
1866 .sr(1)
1867 .m(4)
1868 .n(4)
1869 .k(k)
1870 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1871 }
1872 }
1873 }
1874
1875 TEST(Q8_GEMM_4X4C2__SSE2, n_div_4_strided_cn) {
1876 TEST_REQUIRES_X86_SSE2;
1877 for (uint32_t n = 8; n <= 12; n += 4) {
1878 for (size_t k = 1; k <= 40; k += 9) {
1879 GemmMicrokernelTester()
1880 .mr(4)
1881 .nr(4)
1882 .kr(2)
1883 .sr(1)
1884 .m(4)
1885 .n(n)
1886 .k(k)
1887 .cn_stride(7)
1888 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1889 }
1890 }
1891 }
1892
1893 TEST(Q8_GEMM_4X4C2__SSE2, n_div_4_strided_a) {
1894 TEST_REQUIRES_X86_SSE2;
1895 for (uint32_t n = 8; n <= 12; n += 4) {
1896 for (size_t k = 1; k <= 40; k += 9) {
1897 GemmMicrokernelTester()
1898 .mr(4)
1899 .nr(4)
1900 .kr(2)
1901 .sr(1)
1902 .m(4)
1903 .n(n)
1904 .k(k)
1905 .a_stride(43)
1906 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1907 }
1908 }
1909 }
1910
1911 TEST(Q8_GEMM_4X4C2__SSE2, n_div_4_subtile) {
1912 TEST_REQUIRES_X86_SSE2;
1913 for (uint32_t n = 8; n <= 12; n += 4) {
1914 for (size_t k = 1; k <= 40; k += 9) {
1915 for (uint32_t m = 1; m <= 4; m++) {
1916 GemmMicrokernelTester()
1917 .mr(4)
1918 .nr(4)
1919 .kr(2)
1920 .sr(1)
1921 .m(m)
1922 .n(n)
1923 .k(k)
1924 .iterations(1)
1925 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1926 }
1927 }
1928 }
1929 }
1930
1931 TEST(Q8_GEMM_4X4C2__SSE2, strided_cm_subtile) {
1932 TEST_REQUIRES_X86_SSE2;
1933 for (size_t k = 1; k <= 40; k += 9) {
1934 for (uint32_t m = 1; m <= 4; m++) {
1935 for (uint32_t n = 1; n <= 4; n++) {
1936 GemmMicrokernelTester()
1937 .mr(4)
1938 .nr(4)
1939 .kr(2)
1940 .sr(1)
1941 .m(m)
1942 .n(n)
1943 .k(k)
1944 .cm_stride(7)
1945 .iterations(1)
1946 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1947 }
1948 }
1949 }
1950 }
1951
1952 TEST(Q8_GEMM_4X4C2__SSE2, qmin) {
1953 TEST_REQUIRES_X86_SSE2;
1954 GemmMicrokernelTester()
1955 .mr(4)
1956 .nr(4)
1957 .kr(2)
1958 .sr(1)
1959 .m(4)
1960 .n(4)
1961 .k(8)
1962 .qmin(128)
1963 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1964 }
1965
1966 TEST(Q8_GEMM_4X4C2__SSE2, qmax) {
1967 TEST_REQUIRES_X86_SSE2;
1968 GemmMicrokernelTester()
1969 .mr(4)
1970 .nr(4)
1971 .kr(2)
1972 .sr(1)
1973 .m(4)
1974 .n(4)
1975 .k(8)
1976 .qmax(128)
1977 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1978 }
1979
1980 TEST(Q8_GEMM_4X4C2__SSE2, strided_cm) {
1981 TEST_REQUIRES_X86_SSE2;
1982 GemmMicrokernelTester()
1983 .mr(4)
1984 .nr(4)
1985 .kr(2)
1986 .sr(1)
1987 .m(4)
1988 .n(4)
1989 .k(8)
1990 .cm_stride(7)
1991 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
1992 }
1993
1994 TEST(Q8_GEMM_4X4C2__SSE2, no_a_zero_point) {
1995 TEST_REQUIRES_X86_SSE2;
1996 for (size_t k = 1; k <= 40; k += 9) {
1997 GemmMicrokernelTester()
1998 .mr(4)
1999 .nr(4)
2000 .kr(2)
2001 .sr(1)
2002 .m(4)
2003 .n(4)
2004 .k(k)
2005 .a_zero_point(0)
2006 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
2007 }
2008 }
2009
2010 TEST(Q8_GEMM_4X4C2__SSE2, no_b_zero_point) {
2011 TEST_REQUIRES_X86_SSE2;
2012 for (size_t k = 1; k <= 40; k += 9) {
2013 GemmMicrokernelTester()
2014 .mr(4)
2015 .nr(4)
2016 .kr(2)
2017 .sr(1)
2018 .m(4)
2019 .n(4)
2020 .k(k)
2021 .b_zero_point(0)
2022 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
2023 }
2024 }
2025
2026 TEST(Q8_GEMM_4X4C2__SSE2, no_zero_point) {
2027 TEST_REQUIRES_X86_SSE2;
2028 for (size_t k = 1; k <= 40; k += 9) {
2029 GemmMicrokernelTester()
2030 .mr(4)
2031 .nr(4)
2032 .kr(2)
2033 .sr(1)
2034 .m(4)
2035 .n(4)
2036 .k(k)
2037 .a_zero_point(0)
2038 .b_zero_point(0)
2039 .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
2040 }
2041 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002042#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002043
2044
2045TEST(Q8_GEMM_2X2__SCALAR, k_eq_1) {
2046 GemmMicrokernelTester()
2047 .mr(2)
2048 .nr(2)
2049 .kr(1)
2050 .sr(1)
2051 .m(2)
2052 .n(2)
2053 .k(1)
2054 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2055}
2056
2057TEST(Q8_GEMM_2X2__SCALAR, strided_cn) {
2058 GemmMicrokernelTester()
2059 .mr(2)
2060 .nr(2)
2061 .kr(1)
2062 .sr(1)
2063 .m(2)
2064 .n(2)
2065 .k(1)
2066 .cn_stride(5)
2067 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2068}
2069
2070TEST(Q8_GEMM_2X2__SCALAR, k_eq_1_strided_a) {
2071 GemmMicrokernelTester()
2072 .mr(2)
2073 .nr(2)
2074 .kr(1)
2075 .sr(1)
2076 .m(2)
2077 .n(2)
2078 .k(1)
2079 .a_stride(3)
2080 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2081}
2082
2083TEST(Q8_GEMM_2X2__SCALAR, k_eq_1_subtile) {
2084 for (uint32_t m = 1; m <= 2; m++) {
2085 for (uint32_t n = 1; n <= 2; n++) {
2086 GemmMicrokernelTester()
2087 .mr(2)
2088 .nr(2)
2089 .kr(1)
2090 .sr(1)
2091 .m(m)
2092 .n(n)
2093 .k(1)
2094 .iterations(1)
2095 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2096 }
2097 }
2098}
2099
2100TEST(Q8_GEMM_2X2__SCALAR, k_eq_1_subtile_m) {
2101 for (uint32_t m = 1; m <= 2; m++) {
2102 GemmMicrokernelTester()
2103 .mr(2)
2104 .nr(2)
2105 .kr(1)
2106 .sr(1)
2107 .m(m)
2108 .n(2)
2109 .k(1)
2110 .iterations(1)
2111 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2112 }
2113}
2114
2115TEST(Q8_GEMM_2X2__SCALAR, k_eq_1_subtile_n) {
2116 for (uint32_t n = 1; n <= 2; n++) {
2117 GemmMicrokernelTester()
2118 .mr(2)
2119 .nr(2)
2120 .kr(1)
2121 .sr(1)
2122 .m(2)
2123 .n(n)
2124 .k(1)
2125 .iterations(1)
2126 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2127 }
2128}
2129
2130TEST(Q8_GEMM_2X2__SCALAR, k_gt_1) {
2131 for (size_t k = 2; k < 10; k++) {
2132 GemmMicrokernelTester()
2133 .mr(2)
2134 .nr(2)
2135 .kr(1)
2136 .sr(1)
2137 .m(2)
2138 .n(2)
2139 .k(k)
2140 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2141 }
2142}
2143
2144TEST(Q8_GEMM_2X2__SCALAR, k_gt_1_strided_a) {
2145 for (size_t k = 2; k < 10; k++) {
2146 GemmMicrokernelTester()
2147 .mr(2)
2148 .nr(2)
2149 .kr(1)
2150 .sr(1)
2151 .m(2)
2152 .n(2)
2153 .k(k)
2154 .a_stride(11)
2155 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2156 }
2157}
2158
2159TEST(Q8_GEMM_2X2__SCALAR, k_gt_1_subtile) {
2160 for (size_t k = 2; k < 10; k++) {
2161 for (uint32_t m = 1; m <= 2; m++) {
2162 for (uint32_t n = 1; n <= 2; n++) {
2163 GemmMicrokernelTester()
2164 .mr(2)
2165 .nr(2)
2166 .kr(1)
2167 .sr(1)
2168 .m(m)
2169 .n(n)
2170 .k(k)
2171 .iterations(1)
2172 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2173 }
2174 }
2175 }
2176}
2177
2178TEST(Q8_GEMM_2X2__SCALAR, n_gt_2) {
2179 for (uint32_t n = 3; n < 4; n++) {
2180 for (size_t k = 1; k <= 5; k += 2) {
2181 GemmMicrokernelTester()
2182 .mr(2)
2183 .nr(2)
2184 .kr(1)
2185 .sr(1)
2186 .m(2)
2187 .n(2)
2188 .k(k)
2189 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2190 }
2191 }
2192}
2193
2194TEST(Q8_GEMM_2X2__SCALAR, n_gt_2_strided_cn) {
2195 for (uint32_t n = 3; n < 4; n++) {
2196 for (size_t k = 1; k <= 5; k += 2) {
2197 GemmMicrokernelTester()
2198 .mr(2)
2199 .nr(2)
2200 .kr(1)
2201 .sr(1)
2202 .m(2)
2203 .n(2)
2204 .k(k)
2205 .cn_stride(5)
2206 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2207 }
2208 }
2209}
2210
2211TEST(Q8_GEMM_2X2__SCALAR, n_gt_2_strided_a) {
2212 for (uint32_t n = 3; n < 4; n++) {
2213 for (size_t k = 1; k <= 5; k += 2) {
2214 GemmMicrokernelTester()
2215 .mr(2)
2216 .nr(2)
2217 .kr(1)
2218 .sr(1)
2219 .m(2)
2220 .n(n)
2221 .k(k)
2222 .a_stride(7)
2223 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2224 }
2225 }
2226}
2227
2228TEST(Q8_GEMM_2X2__SCALAR, n_gt_2_subtile) {
2229 for (uint32_t n = 3; n < 4; n++) {
2230 for (size_t k = 1; k <= 5; k += 2) {
2231 for (uint32_t m = 1; m <= 2; m++) {
2232 GemmMicrokernelTester()
2233 .mr(2)
2234 .nr(2)
2235 .kr(1)
2236 .sr(1)
2237 .m(m)
2238 .n(n)
2239 .k(k)
2240 .iterations(1)
2241 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2242 }
2243 }
2244 }
2245}
2246
2247TEST(Q8_GEMM_2X2__SCALAR, n_div_2) {
2248 for (uint32_t n = 4; n <= 6; n += 2) {
2249 for (size_t k = 1; k <= 5; k += 2) {
2250 GemmMicrokernelTester()
2251 .mr(2)
2252 .nr(2)
2253 .kr(1)
2254 .sr(1)
2255 .m(2)
2256 .n(2)
2257 .k(k)
2258 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2259 }
2260 }
2261}
2262
2263TEST(Q8_GEMM_2X2__SCALAR, n_div_2_strided_cn) {
2264 for (uint32_t n = 4; n <= 6; n += 2) {
2265 for (size_t k = 1; k <= 5; k += 2) {
2266 GemmMicrokernelTester()
2267 .mr(2)
2268 .nr(2)
2269 .kr(1)
2270 .sr(1)
2271 .m(2)
2272 .n(n)
2273 .k(k)
2274 .cn_stride(5)
2275 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2276 }
2277 }
2278}
2279
2280TEST(Q8_GEMM_2X2__SCALAR, n_div_2_strided_a) {
2281 for (uint32_t n = 4; n <= 6; n += 2) {
2282 for (size_t k = 1; k <= 5; k += 2) {
2283 GemmMicrokernelTester()
2284 .mr(2)
2285 .nr(2)
2286 .kr(1)
2287 .sr(1)
2288 .m(2)
2289 .n(n)
2290 .k(k)
2291 .a_stride(7)
2292 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2293 }
2294 }
2295}
2296
2297TEST(Q8_GEMM_2X2__SCALAR, n_div_2_subtile) {
2298 for (uint32_t n = 4; n <= 6; n += 2) {
2299 for (size_t k = 1; k <= 5; k += 2) {
2300 for (uint32_t m = 1; m <= 2; m++) {
2301 GemmMicrokernelTester()
2302 .mr(2)
2303 .nr(2)
2304 .kr(1)
2305 .sr(1)
2306 .m(m)
2307 .n(n)
2308 .k(k)
2309 .iterations(1)
2310 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2311 }
2312 }
2313 }
2314}
2315
2316TEST(Q8_GEMM_2X2__SCALAR, strided_cm_subtile) {
2317 for (size_t k = 1; k <= 5; k += 2) {
2318 for (uint32_t m = 1; m <= 2; m++) {
2319 for (uint32_t n = 1; n <= 2; n++) {
2320 GemmMicrokernelTester()
2321 .mr(2)
2322 .nr(2)
2323 .kr(1)
2324 .sr(1)
2325 .m(m)
2326 .n(n)
2327 .k(k)
2328 .cm_stride(5)
2329 .iterations(1)
2330 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2331 }
2332 }
2333 }
2334}
2335
2336TEST(Q8_GEMM_2X2__SCALAR, qmin) {
2337 GemmMicrokernelTester()
2338 .mr(2)
2339 .nr(2)
2340 .kr(1)
2341 .sr(1)
2342 .m(2)
2343 .n(2)
2344 .k(1)
2345 .qmin(128)
2346 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2347}
2348
2349TEST(Q8_GEMM_2X2__SCALAR, qmax) {
2350 GemmMicrokernelTester()
2351 .mr(2)
2352 .nr(2)
2353 .kr(1)
2354 .sr(1)
2355 .m(2)
2356 .n(2)
2357 .k(1)
2358 .qmax(128)
2359 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2360}
2361
2362TEST(Q8_GEMM_2X2__SCALAR, strided_cm) {
2363 GemmMicrokernelTester()
2364 .mr(2)
2365 .nr(2)
2366 .kr(1)
2367 .sr(1)
2368 .m(2)
2369 .n(2)
2370 .k(1)
2371 .cm_stride(5)
2372 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2373}
2374
2375TEST(Q8_GEMM_2X2__SCALAR, no_a_zero_point) {
2376 for (size_t k = 1; k <= 5; k += 2) {
2377 GemmMicrokernelTester()
2378 .mr(2)
2379 .nr(2)
2380 .kr(1)
2381 .sr(1)
2382 .m(2)
2383 .n(2)
2384 .k(k)
2385 .a_zero_point(0)
2386 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2387 }
2388}
2389
2390TEST(Q8_GEMM_2X2__SCALAR, no_b_zero_point) {
2391 for (size_t k = 1; k <= 5; k += 2) {
2392 GemmMicrokernelTester()
2393 .mr(2)
2394 .nr(2)
2395 .kr(1)
2396 .sr(1)
2397 .m(2)
2398 .n(2)
2399 .k(k)
2400 .b_zero_point(0)
2401 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2402 }
2403}
2404
2405TEST(Q8_GEMM_2X2__SCALAR, no_zero_point) {
2406 for (size_t k = 1; k <= 5; k += 2) {
2407 GemmMicrokernelTester()
2408 .mr(2)
2409 .nr(2)
2410 .kr(1)
2411 .sr(1)
2412 .m(2)
2413 .n(2)
2414 .k(k)
2415 .a_zero_point(0)
2416 .b_zero_point(0)
2417 .Test(xnn_q8_gemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2418 }
2419}