blob: e10829c49b461bcb0ec46a99ff77735259c5e42e [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/q8-igemm.yaml
11// Generator: tools/generate-gemm-test.py
12
13
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <gtest/gtest.h>
15
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
XNNPACK Teamb455b122019-09-27 18:10:33 -070019#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include "gemm-microkernel-tester.h"
23
24
Marat Dukhan1dadbf72019-10-01 10:46:20 -070025#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070026 TEST(Q8_IGEMM_4X8__NEON, k_eq_8) {
27 TEST_REQUIRES_ARM_NEON;
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(4)
34 .n(8)
35 .k(8)
36 .Test(xnn_q8_igemm_ukernel_4x8__neon);
37 }
38
39 TEST(Q8_IGEMM_4X8__NEON, strided_cn) {
40 TEST_REQUIRES_ARM_NEON;
41 GemmMicrokernelTester()
42 .mr(4)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(4)
47 .n(8)
48 .k(8)
49 .cn_stride(11)
50 .Test(xnn_q8_igemm_ukernel_4x8__neon);
51 }
52
53 TEST(Q8_IGEMM_4X8__NEON, k_eq_8_subtile) {
54 TEST_REQUIRES_ARM_NEON;
55 for (uint32_t m = 1; m <= 4; m++) {
56 for (uint32_t n = 1; n <= 8; n++) {
57 GemmMicrokernelTester()
58 .mr(4)
59 .nr(8)
60 .kr(1)
61 .sr(1)
62 .m(m)
63 .n(n)
64 .k(8)
65 .iterations(1)
66 .Test(xnn_q8_igemm_ukernel_4x8__neon);
67 }
68 }
69 }
70
71 TEST(Q8_IGEMM_4X8__NEON, k_eq_8_subtile_m) {
72 TEST_REQUIRES_ARM_NEON;
73 for (uint32_t m = 1; m <= 4; m++) {
74 GemmMicrokernelTester()
75 .mr(4)
76 .nr(8)
77 .kr(1)
78 .sr(1)
79 .m(m)
80 .n(8)
81 .k(8)
82 .iterations(1)
83 .Test(xnn_q8_igemm_ukernel_4x8__neon);
84 }
85 }
86
87 TEST(Q8_IGEMM_4X8__NEON, k_eq_8_subtile_n) {
88 TEST_REQUIRES_ARM_NEON;
89 for (uint32_t n = 1; n <= 8; n++) {
90 GemmMicrokernelTester()
91 .mr(4)
92 .nr(8)
93 .kr(1)
94 .sr(1)
95 .m(4)
96 .n(n)
97 .k(8)
98 .iterations(1)
99 .Test(xnn_q8_igemm_ukernel_4x8__neon);
100 }
101 }
102
103 TEST(Q8_IGEMM_4X8__NEON, k_lt_8) {
104 TEST_REQUIRES_ARM_NEON;
105 for (size_t k = 1; k < 8; k++) {
106 GemmMicrokernelTester()
107 .mr(4)
108 .nr(8)
109 .kr(1)
110 .sr(1)
111 .m(4)
112 .n(8)
113 .k(k)
114 .Test(xnn_q8_igemm_ukernel_4x8__neon);
115 }
116 }
117
118 TEST(Q8_IGEMM_4X8__NEON, k_lt_8_subtile) {
119 TEST_REQUIRES_ARM_NEON;
120 for (size_t k = 1; k < 8; k++) {
121 for (uint32_t m = 1; m <= 4; m++) {
122 for (uint32_t n = 1; n <= 8; n++) {
123 GemmMicrokernelTester()
124 .mr(4)
125 .nr(8)
126 .kr(1)
127 .sr(1)
128 .m(m)
129 .n(n)
130 .k(k)
131 .iterations(1)
132 .Test(xnn_q8_igemm_ukernel_4x8__neon);
133 }
134 }
135 }
136 }
137
138 TEST(Q8_IGEMM_4X8__NEON, k_gt_8) {
139 TEST_REQUIRES_ARM_NEON;
140 for (size_t k = 9; k < 16; k++) {
141 GemmMicrokernelTester()
142 .mr(4)
143 .nr(8)
144 .kr(1)
145 .sr(1)
146 .m(4)
147 .n(8)
148 .k(k)
149 .Test(xnn_q8_igemm_ukernel_4x8__neon);
150 }
151 }
152
153 TEST(Q8_IGEMM_4X8__NEON, k_gt_8_subtile) {
154 TEST_REQUIRES_ARM_NEON;
155 for (size_t k = 9; k < 16; k++) {
156 for (uint32_t m = 1; m <= 4; m++) {
157 for (uint32_t n = 1; n <= 8; n++) {
158 GemmMicrokernelTester()
159 .mr(4)
160 .nr(8)
161 .kr(1)
162 .sr(1)
163 .m(m)
164 .n(n)
165 .k(k)
166 .iterations(1)
167 .Test(xnn_q8_igemm_ukernel_4x8__neon);
168 }
169 }
170 }
171 }
172
173 TEST(Q8_IGEMM_4X8__NEON, k_div_8) {
174 TEST_REQUIRES_ARM_NEON;
175 for (size_t k = 16; k <= 80; k += 8) {
176 GemmMicrokernelTester()
177 .mr(4)
178 .nr(8)
179 .kr(1)
180 .sr(1)
181 .m(4)
182 .n(8)
183 .k(k)
184 .Test(xnn_q8_igemm_ukernel_4x8__neon);
185 }
186 }
187
188 TEST(Q8_IGEMM_4X8__NEON, k_div_8_subtile) {
189 TEST_REQUIRES_ARM_NEON;
190 for (size_t k = 16; k <= 80; k += 8) {
191 for (uint32_t m = 1; m <= 4; m++) {
192 for (uint32_t n = 1; n <= 8; n++) {
193 GemmMicrokernelTester()
194 .mr(4)
195 .nr(8)
196 .kr(1)
197 .sr(1)
198 .m(m)
199 .n(n)
200 .k(k)
201 .iterations(1)
202 .Test(xnn_q8_igemm_ukernel_4x8__neon);
203 }
204 }
205 }
206 }
207
208 TEST(Q8_IGEMM_4X8__NEON, n_gt_8) {
209 TEST_REQUIRES_ARM_NEON;
210 for (uint32_t n = 9; n < 16; n++) {
211 for (size_t k = 1; k <= 40; k += 9) {
212 GemmMicrokernelTester()
213 .mr(4)
214 .nr(8)
215 .kr(1)
216 .sr(1)
217 .m(4)
218 .n(8)
219 .k(k)
220 .Test(xnn_q8_igemm_ukernel_4x8__neon);
221 }
222 }
223 }
224
225 TEST(Q8_IGEMM_4X8__NEON, n_gt_8_strided_cn) {
226 TEST_REQUIRES_ARM_NEON;
227 for (uint32_t n = 9; n < 16; n++) {
228 for (size_t k = 1; k <= 40; k += 9) {
229 GemmMicrokernelTester()
230 .mr(4)
231 .nr(8)
232 .kr(1)
233 .sr(1)
234 .m(4)
235 .n(8)
236 .k(k)
237 .cn_stride(11)
238 .Test(xnn_q8_igemm_ukernel_4x8__neon);
239 }
240 }
241 }
242
243 TEST(Q8_IGEMM_4X8__NEON, n_gt_8_subtile) {
244 TEST_REQUIRES_ARM_NEON;
245 for (uint32_t n = 9; n < 16; n++) {
246 for (size_t k = 1; k <= 40; k += 9) {
247 for (uint32_t m = 1; m <= 4; m++) {
248 GemmMicrokernelTester()
249 .mr(4)
250 .nr(8)
251 .kr(1)
252 .sr(1)
253 .m(m)
254 .n(n)
255 .k(k)
256 .iterations(1)
257 .Test(xnn_q8_igemm_ukernel_4x8__neon);
258 }
259 }
260 }
261 }
262
263 TEST(Q8_IGEMM_4X8__NEON, n_div_8) {
264 TEST_REQUIRES_ARM_NEON;
265 for (uint32_t n = 16; n <= 24; n += 8) {
266 for (size_t k = 1; k <= 40; k += 9) {
267 GemmMicrokernelTester()
268 .mr(4)
269 .nr(8)
270 .kr(1)
271 .sr(1)
272 .m(4)
273 .n(8)
274 .k(k)
275 .Test(xnn_q8_igemm_ukernel_4x8__neon);
276 }
277 }
278 }
279
280 TEST(Q8_IGEMM_4X8__NEON, n_div_8_strided_cn) {
281 TEST_REQUIRES_ARM_NEON;
282 for (uint32_t n = 16; n <= 24; n += 8) {
283 for (size_t k = 1; k <= 40; k += 9) {
284 GemmMicrokernelTester()
285 .mr(4)
286 .nr(8)
287 .kr(1)
288 .sr(1)
289 .m(4)
290 .n(n)
291 .k(k)
292 .cn_stride(11)
293 .Test(xnn_q8_igemm_ukernel_4x8__neon);
294 }
295 }
296 }
297
298 TEST(Q8_IGEMM_4X8__NEON, n_div_8_subtile) {
299 TEST_REQUIRES_ARM_NEON;
300 for (uint32_t n = 16; n <= 24; n += 8) {
301 for (size_t k = 1; k <= 40; k += 9) {
302 for (uint32_t m = 1; m <= 4; m++) {
303 GemmMicrokernelTester()
304 .mr(4)
305 .nr(8)
306 .kr(1)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .iterations(1)
312 .Test(xnn_q8_igemm_ukernel_4x8__neon);
313 }
314 }
315 }
316 }
317
318 TEST(Q8_IGEMM_4X8__NEON, small_kernel) {
319 TEST_REQUIRES_ARM_NEON;
320 for (size_t k = 1; k <= 40; k += 9) {
321 GemmMicrokernelTester()
322 .mr(4)
323 .nr(8)
324 .kr(1)
325 .sr(1)
326 .m(4)
327 .n(8)
328 .k(k)
329 .ks(3)
330 .Test(xnn_q8_igemm_ukernel_4x8__neon);
331 }
332 }
333
334 TEST(Q8_IGEMM_4X8__NEON, small_kernel_subtile) {
335 TEST_REQUIRES_ARM_NEON;
336 for (size_t k = 1; k <= 40; k += 9) {
337 for (uint32_t m = 1; m <= 4; m++) {
338 for (uint32_t n = 1; n <= 8; n++) {
339 GemmMicrokernelTester()
340 .mr(4)
341 .nr(8)
342 .kr(1)
343 .sr(1)
344 .m(m)
345 .n(n)
346 .k(k)
347 .ks(3)
348 .iterations(1)
349 .Test(xnn_q8_igemm_ukernel_4x8__neon);
350 }
351 }
352 }
353 }
354
355 TEST(Q8_IGEMM_4X8__NEON, n_gt_8_small_kernel) {
356 TEST_REQUIRES_ARM_NEON;
357 for (uint32_t n = 9; n < 16; n++) {
358 for (size_t k = 1; k <= 40; k += 9) {
359 GemmMicrokernelTester()
360 .mr(4)
361 .nr(8)
362 .kr(1)
363 .sr(1)
364 .m(4)
365 .n(8)
366 .k(k)
367 .ks(3)
368 .Test(xnn_q8_igemm_ukernel_4x8__neon);
369 }
370 }
371 }
372
373 TEST(Q8_IGEMM_4X8__NEON, n_div_8_small_kernel) {
374 TEST_REQUIRES_ARM_NEON;
375 for (uint32_t n = 16; n <= 24; n += 8) {
376 for (size_t k = 1; k <= 40; k += 9) {
377 GemmMicrokernelTester()
378 .mr(4)
379 .nr(8)
380 .kr(1)
381 .sr(1)
382 .m(4)
383 .n(8)
384 .k(k)
385 .ks(3)
386 .Test(xnn_q8_igemm_ukernel_4x8__neon);
387 }
388 }
389 }
390
391 TEST(Q8_IGEMM_4X8__NEON, strided_cm_subtile) {
392 TEST_REQUIRES_ARM_NEON;
393 for (size_t k = 1; k <= 40; k += 9) {
394 for (uint32_t m = 1; m <= 4; m++) {
395 for (uint32_t n = 1; n <= 8; n++) {
396 GemmMicrokernelTester()
397 .mr(4)
398 .nr(8)
399 .kr(1)
400 .sr(1)
401 .m(m)
402 .n(n)
403 .k(k)
404 .cm_stride(11)
405 .iterations(1)
406 .Test(xnn_q8_igemm_ukernel_4x8__neon);
407 }
408 }
409 }
410 }
411
412 TEST(Q8_IGEMM_4X8__NEON, a_offset) {
413 TEST_REQUIRES_ARM_NEON;
414 for (size_t k = 1; k <= 40; k += 9) {
415 GemmMicrokernelTester()
416 .mr(4)
417 .nr(8)
418 .kr(1)
419 .sr(1)
420 .m(4)
421 .n(8)
422 .k(k)
423 .ks(3)
424 .a_offset(163)
425 .Test(xnn_q8_igemm_ukernel_4x8__neon);
426 }
427 }
428
429 TEST(Q8_IGEMM_4X8__NEON, zero) {
430 TEST_REQUIRES_ARM_NEON;
431 for (uint32_t mz = 0; mz < 4; mz++) {
432 for (size_t k = 1; k <= 40; k += 9) {
433 GemmMicrokernelTester()
434 .mr(4)
435 .nr(8)
436 .kr(1)
437 .sr(1)
438 .m(4)
439 .n(8)
440 .k(k)
441 .ks(3)
442 .a_offset(163)
443 .zero_index(mz)
444 .Test(xnn_q8_igemm_ukernel_4x8__neon);
445 }
446 }
447 }
448
449 TEST(Q8_IGEMM_4X8__NEON, qmin) {
450 TEST_REQUIRES_ARM_NEON;
451 GemmMicrokernelTester()
452 .mr(4)
453 .nr(8)
454 .kr(1)
455 .sr(1)
456 .m(4)
457 .n(8)
458 .k(8)
459 .qmin(128)
460 .Test(xnn_q8_igemm_ukernel_4x8__neon);
461 }
462
463 TEST(Q8_IGEMM_4X8__NEON, qmax) {
464 TEST_REQUIRES_ARM_NEON;
465 GemmMicrokernelTester()
466 .mr(4)
467 .nr(8)
468 .kr(1)
469 .sr(1)
470 .m(4)
471 .n(8)
472 .k(8)
473 .qmax(128)
474 .Test(xnn_q8_igemm_ukernel_4x8__neon);
475 }
476
477 TEST(Q8_IGEMM_4X8__NEON, strided_cm) {
478 TEST_REQUIRES_ARM_NEON;
479 GemmMicrokernelTester()
480 .mr(4)
481 .nr(8)
482 .kr(1)
483 .sr(1)
484 .m(4)
485 .n(8)
486 .k(8)
487 .cm_stride(11)
488 .Test(xnn_q8_igemm_ukernel_4x8__neon);
489 }
490
491 TEST(Q8_IGEMM_4X8__NEON, no_a_zero_point) {
492 TEST_REQUIRES_ARM_NEON;
493 for (size_t k = 1; k <= 40; k += 9) {
494 GemmMicrokernelTester()
495 .mr(4)
496 .nr(8)
497 .kr(1)
498 .sr(1)
499 .m(4)
500 .n(8)
501 .k(k)
502 .a_zero_point(0)
503 .Test(xnn_q8_igemm_ukernel_4x8__neon);
504 }
505 }
506
507 TEST(Q8_IGEMM_4X8__NEON, no_b_zero_point) {
508 TEST_REQUIRES_ARM_NEON;
509 for (size_t k = 1; k <= 40; k += 9) {
510 GemmMicrokernelTester()
511 .mr(4)
512 .nr(8)
513 .kr(1)
514 .sr(1)
515 .m(4)
516 .n(8)
517 .k(k)
518 .b_zero_point(0)
519 .Test(xnn_q8_igemm_ukernel_4x8__neon);
520 }
521 }
522
523 TEST(Q8_IGEMM_4X8__NEON, no_zero_point) {
524 TEST_REQUIRES_ARM_NEON;
525 for (size_t k = 1; k <= 40; k += 9) {
526 GemmMicrokernelTester()
527 .mr(4)
528 .nr(8)
529 .kr(1)
530 .sr(1)
531 .m(4)
532 .n(8)
533 .k(k)
534 .a_zero_point(0)
535 .b_zero_point(0)
536 .Test(xnn_q8_igemm_ukernel_4x8__neon);
537 }
538 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700539#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700540
541
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700542#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700543 TEST(Q8_IGEMM_8X8__NEON, k_eq_8) {
544 TEST_REQUIRES_ARM_NEON;
545 GemmMicrokernelTester()
546 .mr(8)
547 .nr(8)
548 .kr(1)
549 .sr(1)
550 .m(8)
551 .n(8)
552 .k(8)
553 .Test(xnn_q8_igemm_ukernel_8x8__neon);
554 }
555
556 TEST(Q8_IGEMM_8X8__NEON, strided_cn) {
557 TEST_REQUIRES_ARM_NEON;
558 GemmMicrokernelTester()
559 .mr(8)
560 .nr(8)
561 .kr(1)
562 .sr(1)
563 .m(8)
564 .n(8)
565 .k(8)
566 .cn_stride(11)
567 .Test(xnn_q8_igemm_ukernel_8x8__neon);
568 }
569
570 TEST(Q8_IGEMM_8X8__NEON, k_eq_8_subtile) {
571 TEST_REQUIRES_ARM_NEON;
572 for (uint32_t m = 1; m <= 8; m++) {
573 for (uint32_t n = 1; n <= 8; n++) {
574 GemmMicrokernelTester()
575 .mr(8)
576 .nr(8)
577 .kr(1)
578 .sr(1)
579 .m(m)
580 .n(n)
581 .k(8)
582 .iterations(1)
583 .Test(xnn_q8_igemm_ukernel_8x8__neon);
584 }
585 }
586 }
587
588 TEST(Q8_IGEMM_8X8__NEON, k_eq_8_subtile_m) {
589 TEST_REQUIRES_ARM_NEON;
590 for (uint32_t m = 1; m <= 8; m++) {
591 GemmMicrokernelTester()
592 .mr(8)
593 .nr(8)
594 .kr(1)
595 .sr(1)
596 .m(m)
597 .n(8)
598 .k(8)
599 .iterations(1)
600 .Test(xnn_q8_igemm_ukernel_8x8__neon);
601 }
602 }
603
604 TEST(Q8_IGEMM_8X8__NEON, k_eq_8_subtile_n) {
605 TEST_REQUIRES_ARM_NEON;
606 for (uint32_t n = 1; n <= 8; n++) {
607 GemmMicrokernelTester()
608 .mr(8)
609 .nr(8)
610 .kr(1)
611 .sr(1)
612 .m(8)
613 .n(n)
614 .k(8)
615 .iterations(1)
616 .Test(xnn_q8_igemm_ukernel_8x8__neon);
617 }
618 }
619
620 TEST(Q8_IGEMM_8X8__NEON, k_lt_8) {
621 TEST_REQUIRES_ARM_NEON;
622 for (size_t k = 1; k < 8; k++) {
623 GemmMicrokernelTester()
624 .mr(8)
625 .nr(8)
626 .kr(1)
627 .sr(1)
628 .m(8)
629 .n(8)
630 .k(k)
631 .Test(xnn_q8_igemm_ukernel_8x8__neon);
632 }
633 }
634
635 TEST(Q8_IGEMM_8X8__NEON, k_lt_8_subtile) {
636 TEST_REQUIRES_ARM_NEON;
637 for (size_t k = 1; k < 8; k++) {
638 for (uint32_t m = 1; m <= 8; m++) {
639 for (uint32_t n = 1; n <= 8; n++) {
640 GemmMicrokernelTester()
641 .mr(8)
642 .nr(8)
643 .kr(1)
644 .sr(1)
645 .m(m)
646 .n(n)
647 .k(k)
648 .iterations(1)
649 .Test(xnn_q8_igemm_ukernel_8x8__neon);
650 }
651 }
652 }
653 }
654
655 TEST(Q8_IGEMM_8X8__NEON, k_gt_8) {
656 TEST_REQUIRES_ARM_NEON;
657 for (size_t k = 9; k < 16; k++) {
658 GemmMicrokernelTester()
659 .mr(8)
660 .nr(8)
661 .kr(1)
662 .sr(1)
663 .m(8)
664 .n(8)
665 .k(k)
666 .Test(xnn_q8_igemm_ukernel_8x8__neon);
667 }
668 }
669
670 TEST(Q8_IGEMM_8X8__NEON, k_gt_8_subtile) {
671 TEST_REQUIRES_ARM_NEON;
672 for (size_t k = 9; k < 16; k++) {
673 for (uint32_t m = 1; m <= 8; m++) {
674 for (uint32_t n = 1; n <= 8; n++) {
675 GemmMicrokernelTester()
676 .mr(8)
677 .nr(8)
678 .kr(1)
679 .sr(1)
680 .m(m)
681 .n(n)
682 .k(k)
683 .iterations(1)
684 .Test(xnn_q8_igemm_ukernel_8x8__neon);
685 }
686 }
687 }
688 }
689
690 TEST(Q8_IGEMM_8X8__NEON, k_div_8) {
691 TEST_REQUIRES_ARM_NEON;
692 for (size_t k = 16; k <= 80; k += 8) {
693 GemmMicrokernelTester()
694 .mr(8)
695 .nr(8)
696 .kr(1)
697 .sr(1)
698 .m(8)
699 .n(8)
700 .k(k)
701 .Test(xnn_q8_igemm_ukernel_8x8__neon);
702 }
703 }
704
705 TEST(Q8_IGEMM_8X8__NEON, k_div_8_subtile) {
706 TEST_REQUIRES_ARM_NEON;
707 for (size_t k = 16; k <= 80; k += 8) {
708 for (uint32_t m = 1; m <= 8; m++) {
709 for (uint32_t n = 1; n <= 8; n++) {
710 GemmMicrokernelTester()
711 .mr(8)
712 .nr(8)
713 .kr(1)
714 .sr(1)
715 .m(m)
716 .n(n)
717 .k(k)
718 .iterations(1)
719 .Test(xnn_q8_igemm_ukernel_8x8__neon);
720 }
721 }
722 }
723 }
724
725 TEST(Q8_IGEMM_8X8__NEON, n_gt_8) {
726 TEST_REQUIRES_ARM_NEON;
727 for (uint32_t n = 9; n < 16; n++) {
728 for (size_t k = 1; k <= 40; k += 9) {
729 GemmMicrokernelTester()
730 .mr(8)
731 .nr(8)
732 .kr(1)
733 .sr(1)
734 .m(8)
735 .n(8)
736 .k(k)
737 .Test(xnn_q8_igemm_ukernel_8x8__neon);
738 }
739 }
740 }
741
742 TEST(Q8_IGEMM_8X8__NEON, n_gt_8_strided_cn) {
743 TEST_REQUIRES_ARM_NEON;
744 for (uint32_t n = 9; n < 16; n++) {
745 for (size_t k = 1; k <= 40; k += 9) {
746 GemmMicrokernelTester()
747 .mr(8)
748 .nr(8)
749 .kr(1)
750 .sr(1)
751 .m(8)
752 .n(8)
753 .k(k)
754 .cn_stride(11)
755 .Test(xnn_q8_igemm_ukernel_8x8__neon);
756 }
757 }
758 }
759
760 TEST(Q8_IGEMM_8X8__NEON, n_gt_8_subtile) {
761 TEST_REQUIRES_ARM_NEON;
762 for (uint32_t n = 9; n < 16; n++) {
763 for (size_t k = 1; k <= 40; k += 9) {
764 for (uint32_t m = 1; m <= 8; m++) {
765 GemmMicrokernelTester()
766 .mr(8)
767 .nr(8)
768 .kr(1)
769 .sr(1)
770 .m(m)
771 .n(n)
772 .k(k)
773 .iterations(1)
774 .Test(xnn_q8_igemm_ukernel_8x8__neon);
775 }
776 }
777 }
778 }
779
780 TEST(Q8_IGEMM_8X8__NEON, n_div_8) {
781 TEST_REQUIRES_ARM_NEON;
782 for (uint32_t n = 16; n <= 24; n += 8) {
783 for (size_t k = 1; k <= 40; k += 9) {
784 GemmMicrokernelTester()
785 .mr(8)
786 .nr(8)
787 .kr(1)
788 .sr(1)
789 .m(8)
790 .n(8)
791 .k(k)
792 .Test(xnn_q8_igemm_ukernel_8x8__neon);
793 }
794 }
795 }
796
797 TEST(Q8_IGEMM_8X8__NEON, n_div_8_strided_cn) {
798 TEST_REQUIRES_ARM_NEON;
799 for (uint32_t n = 16; n <= 24; n += 8) {
800 for (size_t k = 1; k <= 40; k += 9) {
801 GemmMicrokernelTester()
802 .mr(8)
803 .nr(8)
804 .kr(1)
805 .sr(1)
806 .m(8)
807 .n(n)
808 .k(k)
809 .cn_stride(11)
810 .Test(xnn_q8_igemm_ukernel_8x8__neon);
811 }
812 }
813 }
814
815 TEST(Q8_IGEMM_8X8__NEON, n_div_8_subtile) {
816 TEST_REQUIRES_ARM_NEON;
817 for (uint32_t n = 16; n <= 24; n += 8) {
818 for (size_t k = 1; k <= 40; k += 9) {
819 for (uint32_t m = 1; m <= 8; m++) {
820 GemmMicrokernelTester()
821 .mr(8)
822 .nr(8)
823 .kr(1)
824 .sr(1)
825 .m(m)
826 .n(n)
827 .k(k)
828 .iterations(1)
829 .Test(xnn_q8_igemm_ukernel_8x8__neon);
830 }
831 }
832 }
833 }
834
835 TEST(Q8_IGEMM_8X8__NEON, small_kernel) {
836 TEST_REQUIRES_ARM_NEON;
837 for (size_t k = 1; k <= 40; k += 9) {
838 GemmMicrokernelTester()
839 .mr(8)
840 .nr(8)
841 .kr(1)
842 .sr(1)
843 .m(8)
844 .n(8)
845 .k(k)
846 .ks(3)
847 .Test(xnn_q8_igemm_ukernel_8x8__neon);
848 }
849 }
850
851 TEST(Q8_IGEMM_8X8__NEON, small_kernel_subtile) {
852 TEST_REQUIRES_ARM_NEON;
853 for (size_t k = 1; k <= 40; k += 9) {
854 for (uint32_t m = 1; m <= 8; m++) {
855 for (uint32_t n = 1; n <= 8; n++) {
856 GemmMicrokernelTester()
857 .mr(8)
858 .nr(8)
859 .kr(1)
860 .sr(1)
861 .m(m)
862 .n(n)
863 .k(k)
864 .ks(3)
865 .iterations(1)
866 .Test(xnn_q8_igemm_ukernel_8x8__neon);
867 }
868 }
869 }
870 }
871
872 TEST(Q8_IGEMM_8X8__NEON, n_gt_8_small_kernel) {
873 TEST_REQUIRES_ARM_NEON;
874 for (uint32_t n = 9; n < 16; n++) {
875 for (size_t k = 1; k <= 40; k += 9) {
876 GemmMicrokernelTester()
877 .mr(8)
878 .nr(8)
879 .kr(1)
880 .sr(1)
881 .m(8)
882 .n(8)
883 .k(k)
884 .ks(3)
885 .Test(xnn_q8_igemm_ukernel_8x8__neon);
886 }
887 }
888 }
889
890 TEST(Q8_IGEMM_8X8__NEON, n_div_8_small_kernel) {
891 TEST_REQUIRES_ARM_NEON;
892 for (uint32_t n = 16; n <= 24; n += 8) {
893 for (size_t k = 1; k <= 40; k += 9) {
894 GemmMicrokernelTester()
895 .mr(8)
896 .nr(8)
897 .kr(1)
898 .sr(1)
899 .m(8)
900 .n(8)
901 .k(k)
902 .ks(3)
903 .Test(xnn_q8_igemm_ukernel_8x8__neon);
904 }
905 }
906 }
907
908 TEST(Q8_IGEMM_8X8__NEON, strided_cm_subtile) {
909 TEST_REQUIRES_ARM_NEON;
910 for (size_t k = 1; k <= 40; k += 9) {
911 for (uint32_t m = 1; m <= 8; m++) {
912 for (uint32_t n = 1; n <= 8; n++) {
913 GemmMicrokernelTester()
914 .mr(8)
915 .nr(8)
916 .kr(1)
917 .sr(1)
918 .m(m)
919 .n(n)
920 .k(k)
921 .cm_stride(11)
922 .iterations(1)
923 .Test(xnn_q8_igemm_ukernel_8x8__neon);
924 }
925 }
926 }
927 }
928
929 TEST(Q8_IGEMM_8X8__NEON, a_offset) {
930 TEST_REQUIRES_ARM_NEON;
931 for (size_t k = 1; k <= 40; k += 9) {
932 GemmMicrokernelTester()
933 .mr(8)
934 .nr(8)
935 .kr(1)
936 .sr(1)
937 .m(8)
938 .n(8)
939 .k(k)
940 .ks(3)
941 .a_offset(331)
942 .Test(xnn_q8_igemm_ukernel_8x8__neon);
943 }
944 }
945
946 TEST(Q8_IGEMM_8X8__NEON, zero) {
947 TEST_REQUIRES_ARM_NEON;
948 for (uint32_t mz = 0; mz < 8; mz++) {
949 for (size_t k = 1; k <= 40; k += 9) {
950 GemmMicrokernelTester()
951 .mr(8)
952 .nr(8)
953 .kr(1)
954 .sr(1)
955 .m(8)
956 .n(8)
957 .k(k)
958 .ks(3)
959 .a_offset(331)
960 .zero_index(mz)
961 .Test(xnn_q8_igemm_ukernel_8x8__neon);
962 }
963 }
964 }
965
966 TEST(Q8_IGEMM_8X8__NEON, qmin) {
967 TEST_REQUIRES_ARM_NEON;
968 GemmMicrokernelTester()
969 .mr(8)
970 .nr(8)
971 .kr(1)
972 .sr(1)
973 .m(8)
974 .n(8)
975 .k(8)
976 .qmin(128)
977 .Test(xnn_q8_igemm_ukernel_8x8__neon);
978 }
979
980 TEST(Q8_IGEMM_8X8__NEON, qmax) {
981 TEST_REQUIRES_ARM_NEON;
982 GemmMicrokernelTester()
983 .mr(8)
984 .nr(8)
985 .kr(1)
986 .sr(1)
987 .m(8)
988 .n(8)
989 .k(8)
990 .qmax(128)
991 .Test(xnn_q8_igemm_ukernel_8x8__neon);
992 }
993
994 TEST(Q8_IGEMM_8X8__NEON, strided_cm) {
995 TEST_REQUIRES_ARM_NEON;
996 GemmMicrokernelTester()
997 .mr(8)
998 .nr(8)
999 .kr(1)
1000 .sr(1)
1001 .m(8)
1002 .n(8)
1003 .k(8)
1004 .cm_stride(11)
1005 .Test(xnn_q8_igemm_ukernel_8x8__neon);
1006 }
1007
1008 TEST(Q8_IGEMM_8X8__NEON, no_a_zero_point) {
1009 TEST_REQUIRES_ARM_NEON;
1010 for (size_t k = 1; k <= 40; k += 9) {
1011 GemmMicrokernelTester()
1012 .mr(8)
1013 .nr(8)
1014 .kr(1)
1015 .sr(1)
1016 .m(8)
1017 .n(8)
1018 .k(k)
1019 .a_zero_point(0)
1020 .Test(xnn_q8_igemm_ukernel_8x8__neon);
1021 }
1022 }
1023
1024 TEST(Q8_IGEMM_8X8__NEON, no_b_zero_point) {
1025 TEST_REQUIRES_ARM_NEON;
1026 for (size_t k = 1; k <= 40; k += 9) {
1027 GemmMicrokernelTester()
1028 .mr(8)
1029 .nr(8)
1030 .kr(1)
1031 .sr(1)
1032 .m(8)
1033 .n(8)
1034 .k(k)
1035 .b_zero_point(0)
1036 .Test(xnn_q8_igemm_ukernel_8x8__neon);
1037 }
1038 }
1039
1040 TEST(Q8_IGEMM_8X8__NEON, no_zero_point) {
1041 TEST_REQUIRES_ARM_NEON;
1042 for (size_t k = 1; k <= 40; k += 9) {
1043 GemmMicrokernelTester()
1044 .mr(8)
1045 .nr(8)
1046 .kr(1)
1047 .sr(1)
1048 .m(8)
1049 .n(8)
1050 .k(k)
1051 .a_zero_point(0)
1052 .b_zero_point(0)
1053 .Test(xnn_q8_igemm_ukernel_8x8__neon);
1054 }
1055 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001056#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001057
1058
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001059#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001060 TEST(Q8_IGEMM_4X4C2__SSE2, k_eq_8) {
1061 TEST_REQUIRES_X86_SSE2;
1062 GemmMicrokernelTester()
1063 .mr(4)
1064 .nr(4)
1065 .kr(2)
1066 .sr(1)
1067 .m(4)
1068 .n(4)
1069 .k(8)
1070 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1071 }
1072
1073 TEST(Q8_IGEMM_4X4C2__SSE2, strided_cn) {
1074 TEST_REQUIRES_X86_SSE2;
1075 GemmMicrokernelTester()
1076 .mr(4)
1077 .nr(4)
1078 .kr(2)
1079 .sr(1)
1080 .m(4)
1081 .n(4)
1082 .k(8)
1083 .cn_stride(7)
1084 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1085 }
1086
1087 TEST(Q8_IGEMM_4X4C2__SSE2, k_eq_8_subtile) {
1088 TEST_REQUIRES_X86_SSE2;
1089 for (uint32_t m = 1; m <= 4; m++) {
1090 for (uint32_t n = 1; n <= 4; n++) {
1091 GemmMicrokernelTester()
1092 .mr(4)
1093 .nr(4)
1094 .kr(2)
1095 .sr(1)
1096 .m(m)
1097 .n(n)
1098 .k(8)
1099 .iterations(1)
1100 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1101 }
1102 }
1103 }
1104
1105 TEST(Q8_IGEMM_4X4C2__SSE2, k_eq_8_subtile_m) {
1106 TEST_REQUIRES_X86_SSE2;
1107 for (uint32_t m = 1; m <= 4; m++) {
1108 GemmMicrokernelTester()
1109 .mr(4)
1110 .nr(4)
1111 .kr(2)
1112 .sr(1)
1113 .m(m)
1114 .n(4)
1115 .k(8)
1116 .iterations(1)
1117 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1118 }
1119 }
1120
1121 TEST(Q8_IGEMM_4X4C2__SSE2, k_eq_8_subtile_n) {
1122 TEST_REQUIRES_X86_SSE2;
1123 for (uint32_t n = 1; n <= 4; n++) {
1124 GemmMicrokernelTester()
1125 .mr(4)
1126 .nr(4)
1127 .kr(2)
1128 .sr(1)
1129 .m(4)
1130 .n(n)
1131 .k(8)
1132 .iterations(1)
1133 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1134 }
1135 }
1136
1137 TEST(Q8_IGEMM_4X4C2__SSE2, k_lt_8) {
1138 TEST_REQUIRES_X86_SSE2;
1139 for (size_t k = 1; k < 8; k++) {
1140 GemmMicrokernelTester()
1141 .mr(4)
1142 .nr(4)
1143 .kr(2)
1144 .sr(1)
1145 .m(4)
1146 .n(4)
1147 .k(k)
1148 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1149 }
1150 }
1151
1152 TEST(Q8_IGEMM_4X4C2__SSE2, k_lt_8_subtile) {
1153 TEST_REQUIRES_X86_SSE2;
1154 for (size_t k = 1; k < 8; k++) {
1155 for (uint32_t m = 1; m <= 4; m++) {
1156 for (uint32_t n = 1; n <= 4; n++) {
1157 GemmMicrokernelTester()
1158 .mr(4)
1159 .nr(4)
1160 .kr(2)
1161 .sr(1)
1162 .m(m)
1163 .n(n)
1164 .k(k)
1165 .iterations(1)
1166 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1167 }
1168 }
1169 }
1170 }
1171
1172 TEST(Q8_IGEMM_4X4C2__SSE2, k_gt_8) {
1173 TEST_REQUIRES_X86_SSE2;
1174 for (size_t k = 9; k < 16; k++) {
1175 GemmMicrokernelTester()
1176 .mr(4)
1177 .nr(4)
1178 .kr(2)
1179 .sr(1)
1180 .m(4)
1181 .n(4)
1182 .k(k)
1183 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1184 }
1185 }
1186
1187 TEST(Q8_IGEMM_4X4C2__SSE2, k_gt_8_subtile) {
1188 TEST_REQUIRES_X86_SSE2;
1189 for (size_t k = 9; k < 16; k++) {
1190 for (uint32_t m = 1; m <= 4; m++) {
1191 for (uint32_t n = 1; n <= 4; n++) {
1192 GemmMicrokernelTester()
1193 .mr(4)
1194 .nr(4)
1195 .kr(2)
1196 .sr(1)
1197 .m(m)
1198 .n(n)
1199 .k(k)
1200 .iterations(1)
1201 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1202 }
1203 }
1204 }
1205 }
1206
1207 TEST(Q8_IGEMM_4X4C2__SSE2, k_div_8) {
1208 TEST_REQUIRES_X86_SSE2;
1209 for (size_t k = 16; k <= 80; k += 8) {
1210 GemmMicrokernelTester()
1211 .mr(4)
1212 .nr(4)
1213 .kr(2)
1214 .sr(1)
1215 .m(4)
1216 .n(4)
1217 .k(k)
1218 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1219 }
1220 }
1221
1222 TEST(Q8_IGEMM_4X4C2__SSE2, k_div_8_subtile) {
1223 TEST_REQUIRES_X86_SSE2;
1224 for (size_t k = 16; k <= 80; k += 8) {
1225 for (uint32_t m = 1; m <= 4; m++) {
1226 for (uint32_t n = 1; n <= 4; n++) {
1227 GemmMicrokernelTester()
1228 .mr(4)
1229 .nr(4)
1230 .kr(2)
1231 .sr(1)
1232 .m(m)
1233 .n(n)
1234 .k(k)
1235 .iterations(1)
1236 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1237 }
1238 }
1239 }
1240 }
1241
1242 TEST(Q8_IGEMM_4X4C2__SSE2, n_gt_4) {
1243 TEST_REQUIRES_X86_SSE2;
1244 for (uint32_t n = 5; n < 8; n++) {
1245 for (size_t k = 1; k <= 40; k += 9) {
1246 GemmMicrokernelTester()
1247 .mr(4)
1248 .nr(4)
1249 .kr(2)
1250 .sr(1)
1251 .m(4)
1252 .n(4)
1253 .k(k)
1254 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1255 }
1256 }
1257 }
1258
1259 TEST(Q8_IGEMM_4X4C2__SSE2, n_gt_4_strided_cn) {
1260 TEST_REQUIRES_X86_SSE2;
1261 for (uint32_t n = 5; n < 8; n++) {
1262 for (size_t k = 1; k <= 40; k += 9) {
1263 GemmMicrokernelTester()
1264 .mr(4)
1265 .nr(4)
1266 .kr(2)
1267 .sr(1)
1268 .m(4)
1269 .n(4)
1270 .k(k)
1271 .cn_stride(7)
1272 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1273 }
1274 }
1275 }
1276
1277 TEST(Q8_IGEMM_4X4C2__SSE2, n_gt_4_subtile) {
1278 TEST_REQUIRES_X86_SSE2;
1279 for (uint32_t n = 5; n < 8; n++) {
1280 for (size_t k = 1; k <= 40; k += 9) {
1281 for (uint32_t m = 1; m <= 4; m++) {
1282 GemmMicrokernelTester()
1283 .mr(4)
1284 .nr(4)
1285 .kr(2)
1286 .sr(1)
1287 .m(m)
1288 .n(n)
1289 .k(k)
1290 .iterations(1)
1291 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1292 }
1293 }
1294 }
1295 }
1296
1297 TEST(Q8_IGEMM_4X4C2__SSE2, n_div_4) {
1298 TEST_REQUIRES_X86_SSE2;
1299 for (uint32_t n = 8; n <= 12; n += 4) {
1300 for (size_t k = 1; k <= 40; k += 9) {
1301 GemmMicrokernelTester()
1302 .mr(4)
1303 .nr(4)
1304 .kr(2)
1305 .sr(1)
1306 .m(4)
1307 .n(4)
1308 .k(k)
1309 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1310 }
1311 }
1312 }
1313
1314 TEST(Q8_IGEMM_4X4C2__SSE2, n_div_4_strided_cn) {
1315 TEST_REQUIRES_X86_SSE2;
1316 for (uint32_t n = 8; n <= 12; n += 4) {
1317 for (size_t k = 1; k <= 40; k += 9) {
1318 GemmMicrokernelTester()
1319 .mr(4)
1320 .nr(4)
1321 .kr(2)
1322 .sr(1)
1323 .m(4)
1324 .n(n)
1325 .k(k)
1326 .cn_stride(7)
1327 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1328 }
1329 }
1330 }
1331
1332 TEST(Q8_IGEMM_4X4C2__SSE2, n_div_4_subtile) {
1333 TEST_REQUIRES_X86_SSE2;
1334 for (uint32_t n = 8; n <= 12; n += 4) {
1335 for (size_t k = 1; k <= 40; k += 9) {
1336 for (uint32_t m = 1; m <= 4; m++) {
1337 GemmMicrokernelTester()
1338 .mr(4)
1339 .nr(4)
1340 .kr(2)
1341 .sr(1)
1342 .m(m)
1343 .n(n)
1344 .k(k)
1345 .iterations(1)
1346 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1347 }
1348 }
1349 }
1350 }
1351
1352 TEST(Q8_IGEMM_4X4C2__SSE2, small_kernel) {
1353 TEST_REQUIRES_X86_SSE2;
1354 for (size_t k = 1; k <= 40; k += 9) {
1355 GemmMicrokernelTester()
1356 .mr(4)
1357 .nr(4)
1358 .kr(2)
1359 .sr(1)
1360 .m(4)
1361 .n(4)
1362 .k(k)
1363 .ks(3)
1364 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1365 }
1366 }
1367
1368 TEST(Q8_IGEMM_4X4C2__SSE2, small_kernel_subtile) {
1369 TEST_REQUIRES_X86_SSE2;
1370 for (size_t k = 1; k <= 40; k += 9) {
1371 for (uint32_t m = 1; m <= 4; m++) {
1372 for (uint32_t n = 1; n <= 4; n++) {
1373 GemmMicrokernelTester()
1374 .mr(4)
1375 .nr(4)
1376 .kr(2)
1377 .sr(1)
1378 .m(m)
1379 .n(n)
1380 .k(k)
1381 .ks(3)
1382 .iterations(1)
1383 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1384 }
1385 }
1386 }
1387 }
1388
1389 TEST(Q8_IGEMM_4X4C2__SSE2, n_gt_4_small_kernel) {
1390 TEST_REQUIRES_X86_SSE2;
1391 for (uint32_t n = 5; n < 8; n++) {
1392 for (size_t k = 1; k <= 40; k += 9) {
1393 GemmMicrokernelTester()
1394 .mr(4)
1395 .nr(4)
1396 .kr(2)
1397 .sr(1)
1398 .m(4)
1399 .n(4)
1400 .k(k)
1401 .ks(3)
1402 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1403 }
1404 }
1405 }
1406
1407 TEST(Q8_IGEMM_4X4C2__SSE2, n_div_4_small_kernel) {
1408 TEST_REQUIRES_X86_SSE2;
1409 for (uint32_t n = 8; n <= 12; n += 4) {
1410 for (size_t k = 1; k <= 40; k += 9) {
1411 GemmMicrokernelTester()
1412 .mr(4)
1413 .nr(4)
1414 .kr(2)
1415 .sr(1)
1416 .m(4)
1417 .n(4)
1418 .k(k)
1419 .ks(3)
1420 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1421 }
1422 }
1423 }
1424
1425 TEST(Q8_IGEMM_4X4C2__SSE2, strided_cm_subtile) {
1426 TEST_REQUIRES_X86_SSE2;
1427 for (size_t k = 1; k <= 40; k += 9) {
1428 for (uint32_t m = 1; m <= 4; m++) {
1429 for (uint32_t n = 1; n <= 4; n++) {
1430 GemmMicrokernelTester()
1431 .mr(4)
1432 .nr(4)
1433 .kr(2)
1434 .sr(1)
1435 .m(m)
1436 .n(n)
1437 .k(k)
1438 .cm_stride(7)
1439 .iterations(1)
1440 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1441 }
1442 }
1443 }
1444 }
1445
1446 TEST(Q8_IGEMM_4X4C2__SSE2, a_offset) {
1447 TEST_REQUIRES_X86_SSE2;
1448 for (size_t k = 1; k <= 40; k += 9) {
1449 GemmMicrokernelTester()
1450 .mr(4)
1451 .nr(4)
1452 .kr(2)
1453 .sr(1)
1454 .m(4)
1455 .n(4)
1456 .k(k)
1457 .ks(3)
1458 .a_offset(163)
1459 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1460 }
1461 }
1462
1463 TEST(Q8_IGEMM_4X4C2__SSE2, zero) {
1464 TEST_REQUIRES_X86_SSE2;
1465 for (uint32_t mz = 0; mz < 4; mz++) {
1466 for (size_t k = 1; k <= 40; k += 9) {
1467 GemmMicrokernelTester()
1468 .mr(4)
1469 .nr(4)
1470 .kr(2)
1471 .sr(1)
1472 .m(4)
1473 .n(4)
1474 .k(k)
1475 .ks(3)
1476 .a_offset(163)
1477 .zero_index(mz)
1478 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1479 }
1480 }
1481 }
1482
1483 TEST(Q8_IGEMM_4X4C2__SSE2, qmin) {
1484 TEST_REQUIRES_X86_SSE2;
1485 GemmMicrokernelTester()
1486 .mr(4)
1487 .nr(4)
1488 .kr(2)
1489 .sr(1)
1490 .m(4)
1491 .n(4)
1492 .k(8)
1493 .qmin(128)
1494 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1495 }
1496
1497 TEST(Q8_IGEMM_4X4C2__SSE2, qmax) {
1498 TEST_REQUIRES_X86_SSE2;
1499 GemmMicrokernelTester()
1500 .mr(4)
1501 .nr(4)
1502 .kr(2)
1503 .sr(1)
1504 .m(4)
1505 .n(4)
1506 .k(8)
1507 .qmax(128)
1508 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1509 }
1510
1511 TEST(Q8_IGEMM_4X4C2__SSE2, strided_cm) {
1512 TEST_REQUIRES_X86_SSE2;
1513 GemmMicrokernelTester()
1514 .mr(4)
1515 .nr(4)
1516 .kr(2)
1517 .sr(1)
1518 .m(4)
1519 .n(4)
1520 .k(8)
1521 .cm_stride(7)
1522 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1523 }
1524
1525 TEST(Q8_IGEMM_4X4C2__SSE2, no_a_zero_point) {
1526 TEST_REQUIRES_X86_SSE2;
1527 for (size_t k = 1; k <= 40; k += 9) {
1528 GemmMicrokernelTester()
1529 .mr(4)
1530 .nr(4)
1531 .kr(2)
1532 .sr(1)
1533 .m(4)
1534 .n(4)
1535 .k(k)
1536 .a_zero_point(0)
1537 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1538 }
1539 }
1540
1541 TEST(Q8_IGEMM_4X4C2__SSE2, no_b_zero_point) {
1542 TEST_REQUIRES_X86_SSE2;
1543 for (size_t k = 1; k <= 40; k += 9) {
1544 GemmMicrokernelTester()
1545 .mr(4)
1546 .nr(4)
1547 .kr(2)
1548 .sr(1)
1549 .m(4)
1550 .n(4)
1551 .k(k)
1552 .b_zero_point(0)
1553 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1554 }
1555 }
1556
1557 TEST(Q8_IGEMM_4X4C2__SSE2, no_zero_point) {
1558 TEST_REQUIRES_X86_SSE2;
1559 for (size_t k = 1; k <= 40; k += 9) {
1560 GemmMicrokernelTester()
1561 .mr(4)
1562 .nr(4)
1563 .kr(2)
1564 .sr(1)
1565 .m(4)
1566 .n(4)
1567 .k(k)
1568 .a_zero_point(0)
1569 .b_zero_point(0)
1570 .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
1571 }
1572 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001573#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001574
1575
1576TEST(Q8_IGEMM_2X2__SCALAR, k_eq_1) {
1577 GemmMicrokernelTester()
1578 .mr(2)
1579 .nr(2)
1580 .kr(1)
1581 .sr(1)
1582 .m(2)
1583 .n(2)
1584 .k(1)
1585 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1586}
1587
1588TEST(Q8_IGEMM_2X2__SCALAR, strided_cn) {
1589 GemmMicrokernelTester()
1590 .mr(2)
1591 .nr(2)
1592 .kr(1)
1593 .sr(1)
1594 .m(2)
1595 .n(2)
1596 .k(1)
1597 .cn_stride(5)
1598 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1599}
1600
1601TEST(Q8_IGEMM_2X2__SCALAR, k_eq_1_subtile) {
1602 for (uint32_t m = 1; m <= 2; m++) {
1603 for (uint32_t n = 1; n <= 2; n++) {
1604 GemmMicrokernelTester()
1605 .mr(2)
1606 .nr(2)
1607 .kr(1)
1608 .sr(1)
1609 .m(m)
1610 .n(n)
1611 .k(1)
1612 .iterations(1)
1613 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1614 }
1615 }
1616}
1617
1618TEST(Q8_IGEMM_2X2__SCALAR, k_eq_1_subtile_m) {
1619 for (uint32_t m = 1; m <= 2; m++) {
1620 GemmMicrokernelTester()
1621 .mr(2)
1622 .nr(2)
1623 .kr(1)
1624 .sr(1)
1625 .m(m)
1626 .n(2)
1627 .k(1)
1628 .iterations(1)
1629 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1630 }
1631}
1632
1633TEST(Q8_IGEMM_2X2__SCALAR, k_eq_1_subtile_n) {
1634 for (uint32_t n = 1; n <= 2; n++) {
1635 GemmMicrokernelTester()
1636 .mr(2)
1637 .nr(2)
1638 .kr(1)
1639 .sr(1)
1640 .m(2)
1641 .n(n)
1642 .k(1)
1643 .iterations(1)
1644 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1645 }
1646}
1647
1648TEST(Q8_IGEMM_2X2__SCALAR, k_gt_1) {
1649 for (size_t k = 2; k < 10; k++) {
1650 GemmMicrokernelTester()
1651 .mr(2)
1652 .nr(2)
1653 .kr(1)
1654 .sr(1)
1655 .m(2)
1656 .n(2)
1657 .k(k)
1658 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1659 }
1660}
1661
1662TEST(Q8_IGEMM_2X2__SCALAR, k_gt_1_subtile) {
1663 for (size_t k = 2; k < 10; k++) {
1664 for (uint32_t m = 1; m <= 2; m++) {
1665 for (uint32_t n = 1; n <= 2; n++) {
1666 GemmMicrokernelTester()
1667 .mr(2)
1668 .nr(2)
1669 .kr(1)
1670 .sr(1)
1671 .m(m)
1672 .n(n)
1673 .k(k)
1674 .iterations(1)
1675 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1676 }
1677 }
1678 }
1679}
1680
1681TEST(Q8_IGEMM_2X2__SCALAR, n_gt_2) {
1682 for (uint32_t n = 3; n < 4; n++) {
1683 for (size_t k = 1; k <= 5; k += 2) {
1684 GemmMicrokernelTester()
1685 .mr(2)
1686 .nr(2)
1687 .kr(1)
1688 .sr(1)
1689 .m(2)
1690 .n(2)
1691 .k(k)
1692 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1693 }
1694 }
1695}
1696
1697TEST(Q8_IGEMM_2X2__SCALAR, n_gt_2_strided_cn) {
1698 for (uint32_t n = 3; n < 4; n++) {
1699 for (size_t k = 1; k <= 5; k += 2) {
1700 GemmMicrokernelTester()
1701 .mr(2)
1702 .nr(2)
1703 .kr(1)
1704 .sr(1)
1705 .m(2)
1706 .n(2)
1707 .k(k)
1708 .cn_stride(5)
1709 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1710 }
1711 }
1712}
1713
1714TEST(Q8_IGEMM_2X2__SCALAR, n_gt_2_subtile) {
1715 for (uint32_t n = 3; n < 4; n++) {
1716 for (size_t k = 1; k <= 5; k += 2) {
1717 for (uint32_t m = 1; m <= 2; m++) {
1718 GemmMicrokernelTester()
1719 .mr(2)
1720 .nr(2)
1721 .kr(1)
1722 .sr(1)
1723 .m(m)
1724 .n(n)
1725 .k(k)
1726 .iterations(1)
1727 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1728 }
1729 }
1730 }
1731}
1732
1733TEST(Q8_IGEMM_2X2__SCALAR, n_div_2) {
1734 for (uint32_t n = 4; n <= 6; n += 2) {
1735 for (size_t k = 1; k <= 5; k += 2) {
1736 GemmMicrokernelTester()
1737 .mr(2)
1738 .nr(2)
1739 .kr(1)
1740 .sr(1)
1741 .m(2)
1742 .n(2)
1743 .k(k)
1744 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1745 }
1746 }
1747}
1748
1749TEST(Q8_IGEMM_2X2__SCALAR, n_div_2_strided_cn) {
1750 for (uint32_t n = 4; n <= 6; n += 2) {
1751 for (size_t k = 1; k <= 5; k += 2) {
1752 GemmMicrokernelTester()
1753 .mr(2)
1754 .nr(2)
1755 .kr(1)
1756 .sr(1)
1757 .m(2)
1758 .n(n)
1759 .k(k)
1760 .cn_stride(5)
1761 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1762 }
1763 }
1764}
1765
1766TEST(Q8_IGEMM_2X2__SCALAR, n_div_2_subtile) {
1767 for (uint32_t n = 4; n <= 6; n += 2) {
1768 for (size_t k = 1; k <= 5; k += 2) {
1769 for (uint32_t m = 1; m <= 2; m++) {
1770 GemmMicrokernelTester()
1771 .mr(2)
1772 .nr(2)
1773 .kr(1)
1774 .sr(1)
1775 .m(m)
1776 .n(n)
1777 .k(k)
1778 .iterations(1)
1779 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1780 }
1781 }
1782 }
1783}
1784
1785TEST(Q8_IGEMM_2X2__SCALAR, small_kernel) {
1786 for (size_t k = 1; k <= 5; k += 2) {
1787 GemmMicrokernelTester()
1788 .mr(2)
1789 .nr(2)
1790 .kr(1)
1791 .sr(1)
1792 .m(2)
1793 .n(2)
1794 .k(k)
1795 .ks(3)
1796 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1797 }
1798}
1799
1800TEST(Q8_IGEMM_2X2__SCALAR, small_kernel_subtile) {
1801 for (size_t k = 1; k <= 5; k += 2) {
1802 for (uint32_t m = 1; m <= 2; m++) {
1803 for (uint32_t n = 1; n <= 2; n++) {
1804 GemmMicrokernelTester()
1805 .mr(2)
1806 .nr(2)
1807 .kr(1)
1808 .sr(1)
1809 .m(m)
1810 .n(n)
1811 .k(k)
1812 .ks(3)
1813 .iterations(1)
1814 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1815 }
1816 }
1817 }
1818}
1819
1820TEST(Q8_IGEMM_2X2__SCALAR, n_gt_2_small_kernel) {
1821 for (uint32_t n = 3; n < 4; n++) {
1822 for (size_t k = 1; k <= 5; k += 2) {
1823 GemmMicrokernelTester()
1824 .mr(2)
1825 .nr(2)
1826 .kr(1)
1827 .sr(1)
1828 .m(2)
1829 .n(2)
1830 .k(k)
1831 .ks(3)
1832 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1833 }
1834 }
1835}
1836
1837TEST(Q8_IGEMM_2X2__SCALAR, n_div_2_small_kernel) {
1838 for (uint32_t n = 4; n <= 6; n += 2) {
1839 for (size_t k = 1; k <= 5; k += 2) {
1840 GemmMicrokernelTester()
1841 .mr(2)
1842 .nr(2)
1843 .kr(1)
1844 .sr(1)
1845 .m(2)
1846 .n(2)
1847 .k(k)
1848 .ks(3)
1849 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1850 }
1851 }
1852}
1853
1854TEST(Q8_IGEMM_2X2__SCALAR, strided_cm_subtile) {
1855 for (size_t k = 1; k <= 5; k += 2) {
1856 for (uint32_t m = 1; m <= 2; m++) {
1857 for (uint32_t n = 1; n <= 2; n++) {
1858 GemmMicrokernelTester()
1859 .mr(2)
1860 .nr(2)
1861 .kr(1)
1862 .sr(1)
1863 .m(m)
1864 .n(n)
1865 .k(k)
1866 .cm_stride(5)
1867 .iterations(1)
1868 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1869 }
1870 }
1871 }
1872}
1873
1874TEST(Q8_IGEMM_2X2__SCALAR, a_offset) {
1875 for (size_t k = 1; k <= 5; k += 2) {
1876 GemmMicrokernelTester()
1877 .mr(2)
1878 .nr(2)
1879 .kr(1)
1880 .sr(1)
1881 .m(2)
1882 .n(2)
1883 .k(k)
1884 .ks(3)
1885 .a_offset(13)
1886 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1887 }
1888}
1889
1890TEST(Q8_IGEMM_2X2__SCALAR, zero) {
1891 for (uint32_t mz = 0; mz < 2; mz++) {
1892 for (size_t k = 1; k <= 5; k += 2) {
1893 GemmMicrokernelTester()
1894 .mr(2)
1895 .nr(2)
1896 .kr(1)
1897 .sr(1)
1898 .m(2)
1899 .n(2)
1900 .k(k)
1901 .ks(3)
1902 .a_offset(13)
1903 .zero_index(mz)
1904 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1905 }
1906 }
1907}
1908
1909TEST(Q8_IGEMM_2X2__SCALAR, qmin) {
1910 GemmMicrokernelTester()
1911 .mr(2)
1912 .nr(2)
1913 .kr(1)
1914 .sr(1)
1915 .m(2)
1916 .n(2)
1917 .k(1)
1918 .qmin(128)
1919 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1920}
1921
1922TEST(Q8_IGEMM_2X2__SCALAR, qmax) {
1923 GemmMicrokernelTester()
1924 .mr(2)
1925 .nr(2)
1926 .kr(1)
1927 .sr(1)
1928 .m(2)
1929 .n(2)
1930 .k(1)
1931 .qmax(128)
1932 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1933}
1934
1935TEST(Q8_IGEMM_2X2__SCALAR, strided_cm) {
1936 GemmMicrokernelTester()
1937 .mr(2)
1938 .nr(2)
1939 .kr(1)
1940 .sr(1)
1941 .m(2)
1942 .n(2)
1943 .k(1)
1944 .cm_stride(5)
1945 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1946}
1947
1948TEST(Q8_IGEMM_2X2__SCALAR, no_a_zero_point) {
1949 for (size_t k = 1; k <= 5; k += 2) {
1950 GemmMicrokernelTester()
1951 .mr(2)
1952 .nr(2)
1953 .kr(1)
1954 .sr(1)
1955 .m(2)
1956 .n(2)
1957 .k(k)
1958 .a_zero_point(0)
1959 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1960 }
1961}
1962
1963TEST(Q8_IGEMM_2X2__SCALAR, no_b_zero_point) {
1964 for (size_t k = 1; k <= 5; k += 2) {
1965 GemmMicrokernelTester()
1966 .mr(2)
1967 .nr(2)
1968 .kr(1)
1969 .sr(1)
1970 .m(2)
1971 .n(2)
1972 .k(k)
1973 .b_zero_point(0)
1974 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1975 }
1976}
1977
1978TEST(Q8_IGEMM_2X2__SCALAR, no_zero_point) {
1979 for (size_t k = 1; k <= 5; k += 2) {
1980 GemmMicrokernelTester()
1981 .mr(2)
1982 .nr(2)
1983 .kr(1)
1984 .sr(1)
1985 .m(2)
1986 .n(2)
1987 .k(k)
1988 .a_zero_point(0)
1989 .b_zero_point(0)
1990 .Test(xnn_q8_igemm_ukernel_2x2__scalar, GemmMicrokernelTester::Variant::Scalar);
1991 }
1992}