blob: c7a9983027b1aefbc2674f1edaa4c3d886683275 [file] [log] [blame]
Alan Kellycd21b022022-01-14 01:44:59 -08001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/x8-transpose.yaml
8// Generator: tools/generate-transpose-test.py
9
10
11#include <gtest/gtest.h>
12
13#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
16#include <xnnpack/transpose.h>
17#include "transpose-microkernel-tester.h"
18
19
Alan Kelly667e0f12022-01-14 09:37:59 -080020TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2) {
21 TransposeMicrokernelTester()
22 .input_stride(2)
23 .output_stride(1)
24 .block_width(2)
25 .block_height(1)
26 .iterations(1)
27 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
28}
29
30TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_2_bw_1_4) {
31 for(size_t i = 1; i <= 2; ++i){
32 for(size_t j = 1; j <= 4; ++j){
33 TransposeMicrokernelTester()
34 .input_stride(j)
35 .output_stride(i)
36 .block_width(j)
37 .block_height(i)
38 .iterations(1)
39 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
40 }
41 }
42}
43
44TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_4) {
45 TransposeMicrokernelTester()
46 .input_stride(4)
47 .output_stride(1)
48 .block_width(4)
49 .block_height(1)
50 .iterations(1)
51 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
52}
53
54TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_3_4) {
55 for(size_t i = 3; i < 4; ++i){
56 TransposeMicrokernelTester()
57 .input_stride(i)
58 .output_stride(1)
59 .block_width(i)
60 .block_height(1)
61 .iterations(1)
62 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
63 }
64}
65
66TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_3_4) {
67 for(size_t i = 3; i < 4; ++i){
68 TransposeMicrokernelTester()
69 .input_stride(i)
70 .output_stride(2)
71 .block_width(i)
72 .block_height(2)
73 .iterations(1)
74 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
75 }
76}
77
78TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_2) {
79 TransposeMicrokernelTester()
80 .input_stride(2)
81 .output_stride(2)
82 .block_width(2)
83 .block_height(2)
84 .iterations(1)
85 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
86}
87
88TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_2){
89 for(size_t i = 2; i < 2; ++i){
90 TransposeMicrokernelTester()
91 .input_stride(2)
92 .output_stride(i)
93 .block_width(2)
94 .block_height(i)
95 .iterations(1)
96 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
97 }
98}
99
100TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_4){
101 for(size_t i = 2; i < 2; ++i){
102 TransposeMicrokernelTester()
103 .input_stride(4)
104 .output_stride(i)
105 .block_width(4)
106 .block_height(i)
107 .iterations(1)
108 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
109 }
110}
111
112TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_3_4) {
113 for(size_t i = 2; i < 2; ++i){
114 for(size_t j = 3; j < 4; ++j){
115 TransposeMicrokernelTester()
116 .input_stride(j)
117 .output_stride(i)
118 .block_width(j)
119 .block_height(i)
120 .iterations(1)
121 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
122 }
123 }
124}
125
126TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4) {
127 TransposeMicrokernelTester()
128 .input_stride(4)
129 .output_stride(1)
130 .block_width(2)
131 .block_height(1)
132 .iterations(1)
133 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
134}
135
136TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_os_2) {
137 TransposeMicrokernelTester()
138 .input_stride(2)
139 .output_stride(2)
140 .block_width(2)
141 .block_height(1)
142 .iterations(1)
143 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
144}
145
146TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4_os_2) {
147 TransposeMicrokernelTester()
148 .input_stride(4)
149 .output_stride(2)
150 .block_width(2)
151 .block_height(1)
152 .iterations(1)
153 .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
154}
155
156TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4) {
157 TransposeMicrokernelTester()
158 .input_stride(4)
159 .output_stride(1)
160 .block_width(4)
161 .block_height(1)
162 .iterations(1)
163 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
164}
165
166TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_2_bw_1_8) {
167 for(size_t i = 1; i <= 2; ++i){
168 for(size_t j = 1; j <= 8; ++j){
169 TransposeMicrokernelTester()
170 .input_stride(j)
171 .output_stride(i)
172 .block_width(j)
173 .block_height(i)
174 .iterations(1)
175 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
176 }
177 }
178}
179
180TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_8) {
181 TransposeMicrokernelTester()
182 .input_stride(8)
183 .output_stride(1)
184 .block_width(8)
185 .block_height(1)
186 .iterations(1)
187 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
188}
189
190TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_5_8) {
191 for(size_t i = 5; i < 8; ++i){
192 TransposeMicrokernelTester()
193 .input_stride(i)
194 .output_stride(1)
195 .block_width(i)
196 .block_height(1)
197 .iterations(1)
198 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
199 }
200}
201
202TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_5_8) {
203 for(size_t i = 5; i < 8; ++i){
204 TransposeMicrokernelTester()
205 .input_stride(i)
206 .output_stride(2)
207 .block_width(i)
208 .block_height(2)
209 .iterations(1)
210 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
211 }
212}
213
214TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_4) {
215 TransposeMicrokernelTester()
216 .input_stride(4)
217 .output_stride(2)
218 .block_width(4)
219 .block_height(2)
220 .iterations(1)
221 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
222}
223
224TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_4){
225 for(size_t i = 2; i < 2; ++i){
226 TransposeMicrokernelTester()
227 .input_stride(4)
228 .output_stride(i)
229 .block_width(4)
230 .block_height(i)
231 .iterations(1)
232 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
233 }
234}
235
236TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_8){
237 for(size_t i = 2; i < 2; ++i){
238 TransposeMicrokernelTester()
239 .input_stride(8)
240 .output_stride(i)
241 .block_width(8)
242 .block_height(i)
243 .iterations(1)
244 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
245 }
246}
247
248TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_5_8) {
249 for(size_t i = 2; i < 2; ++i){
250 for(size_t j = 5; j < 8; ++j){
251 TransposeMicrokernelTester()
252 .input_stride(j)
253 .output_stride(i)
254 .block_width(j)
255 .block_height(i)
256 .iterations(1)
257 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
258 }
259 }
260}
261
262TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8) {
263 TransposeMicrokernelTester()
264 .input_stride(8)
265 .output_stride(1)
266 .block_width(4)
267 .block_height(1)
268 .iterations(1)
269 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
270}
271
272TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_os_2) {
273 TransposeMicrokernelTester()
274 .input_stride(4)
275 .output_stride(2)
276 .block_width(4)
277 .block_height(1)
278 .iterations(1)
279 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
280}
281
282TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8_os_2) {
283 TransposeMicrokernelTester()
284 .input_stride(8)
285 .output_stride(2)
286 .block_width(4)
287 .block_height(1)
288 .iterations(1)
289 .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
290}
291
292TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1) {
293 TransposeMicrokernelTester()
294 .input_stride(1)
295 .output_stride(2)
296 .block_width(1)
297 .block_height(2)
298 .iterations(1)
299 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
300}
301
302TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_1_4_bw_1_2) {
303 for(size_t i = 1; i <= 4; ++i){
304 for(size_t j = 1; j <= 2; ++j){
305 TransposeMicrokernelTester()
306 .input_stride(j)
307 .output_stride(i)
308 .block_width(j)
309 .block_height(i)
310 .iterations(1)
311 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
312 }
313 }
314}
315
316TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2) {
317 TransposeMicrokernelTester()
318 .input_stride(2)
319 .output_stride(2)
320 .block_width(2)
321 .block_height(2)
322 .iterations(1)
323 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
324}
325
326TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2_2) {
327 for(size_t i = 2; i < 2; ++i){
328 TransposeMicrokernelTester()
329 .input_stride(i)
330 .output_stride(2)
331 .block_width(i)
332 .block_height(2)
333 .iterations(1)
334 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
335 }
336}
337
338TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_2_2) {
339 for(size_t i = 2; i < 2; ++i){
340 TransposeMicrokernelTester()
341 .input_stride(i)
342 .output_stride(4)
343 .block_width(i)
344 .block_height(4)
345 .iterations(1)
346 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
347 }
348}
349
350TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_1) {
351 TransposeMicrokernelTester()
352 .input_stride(1)
353 .output_stride(4)
354 .block_width(1)
355 .block_height(4)
356 .iterations(1)
357 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
358}
359
360TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_1){
361 for(size_t i = 3; i < 4; ++i){
362 TransposeMicrokernelTester()
363 .input_stride(1)
364 .output_stride(i)
365 .block_width(1)
366 .block_height(i)
367 .iterations(1)
368 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
369 }
370}
371
372TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2){
373 for(size_t i = 3; i < 4; ++i){
374 TransposeMicrokernelTester()
375 .input_stride(2)
376 .output_stride(i)
377 .block_width(2)
378 .block_height(i)
379 .iterations(1)
380 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
381 }
382}
383
384TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2_2) {
385 for(size_t i = 3; i < 4; ++i){
386 for(size_t j = 2; j < 2; ++j){
387 TransposeMicrokernelTester()
388 .input_stride(j)
389 .output_stride(i)
390 .block_width(j)
391 .block_height(i)
392 .iterations(1)
393 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
394 }
395 }
396}
397
398TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2) {
399 TransposeMicrokernelTester()
400 .input_stride(2)
401 .output_stride(2)
402 .block_width(1)
403 .block_height(2)
404 .iterations(1)
405 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
406}
407
408TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_os_4) {
409 TransposeMicrokernelTester()
410 .input_stride(1)
411 .output_stride(4)
412 .block_width(1)
413 .block_height(2)
414 .iterations(1)
415 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
416}
417
418TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2_os_4) {
419 TransposeMicrokernelTester()
420 .input_stride(2)
421 .output_stride(4)
422 .block_width(1)
423 .block_height(2)
424 .iterations(1)
425 .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
426}
427
428TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2) {
429 TransposeMicrokernelTester()
430 .input_stride(2)
431 .output_stride(2)
432 .block_width(2)
433 .block_height(2)
434 .iterations(1)
435 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
436}
437
438TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_1_4_bw_1_4) {
439 for(size_t i = 1; i <= 4; ++i){
440 for(size_t j = 1; j <= 4; ++j){
441 TransposeMicrokernelTester()
442 .input_stride(j)
443 .output_stride(i)
444 .block_width(j)
445 .block_height(i)
446 .iterations(1)
447 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
448 }
449 }
450}
451
452TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_4) {
453 TransposeMicrokernelTester()
454 .input_stride(4)
455 .output_stride(2)
456 .block_width(4)
457 .block_height(2)
458 .iterations(1)
459 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
460}
461
462TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_3_4) {
463 for(size_t i = 3; i < 4; ++i){
464 TransposeMicrokernelTester()
465 .input_stride(i)
466 .output_stride(2)
467 .block_width(i)
468 .block_height(2)
469 .iterations(1)
470 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
471 }
472}
473
474TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_3_4) {
475 for(size_t i = 3; i < 4; ++i){
476 TransposeMicrokernelTester()
477 .input_stride(i)
478 .output_stride(4)
479 .block_width(i)
480 .block_height(4)
481 .iterations(1)
482 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
483 }
484}
485
486TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_2) {
487 TransposeMicrokernelTester()
488 .input_stride(2)
489 .output_stride(4)
490 .block_width(2)
491 .block_height(4)
492 .iterations(1)
493 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
494}
495
496TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_2){
497 for(size_t i = 3; i < 4; ++i){
498 TransposeMicrokernelTester()
499 .input_stride(2)
500 .output_stride(i)
501 .block_width(2)
502 .block_height(i)
503 .iterations(1)
504 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
505 }
506}
507
508TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_4){
509 for(size_t i = 3; i < 4; ++i){
510 TransposeMicrokernelTester()
511 .input_stride(4)
512 .output_stride(i)
513 .block_width(4)
514 .block_height(i)
515 .iterations(1)
516 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
517 }
518}
519
520TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_3_4) {
521 for(size_t i = 3; i < 4; ++i){
522 for(size_t j = 3; j < 4; ++j){
523 TransposeMicrokernelTester()
524 .input_stride(j)
525 .output_stride(i)
526 .block_width(j)
527 .block_height(i)
528 .iterations(1)
529 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
530 }
531 }
532}
533
534TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4) {
535 TransposeMicrokernelTester()
536 .input_stride(4)
537 .output_stride(2)
538 .block_width(2)
539 .block_height(2)
540 .iterations(1)
541 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
542}
543
544TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_os_4) {
545 TransposeMicrokernelTester()
546 .input_stride(2)
547 .output_stride(4)
548 .block_width(2)
549 .block_height(2)
550 .iterations(1)
551 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
552}
553
554TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4_os_4) {
555 TransposeMicrokernelTester()
556 .input_stride(4)
557 .output_stride(4)
558 .block_width(2)
559 .block_height(2)
560 .iterations(1)
561 .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
562}
563
564TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4) {
565 TransposeMicrokernelTester()
566 .input_stride(4)
567 .output_stride(2)
568 .block_width(4)
569 .block_height(2)
570 .iterations(1)
571 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
572}
573
574TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_1_4_bw_1_8) {
575 for(size_t i = 1; i <= 4; ++i){
576 for(size_t j = 1; j <= 8; ++j){
577 TransposeMicrokernelTester()
578 .input_stride(j)
579 .output_stride(i)
580 .block_width(j)
581 .block_height(i)
582 .iterations(1)
583 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
584 }
585 }
586}
587
588TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_8) {
589 TransposeMicrokernelTester()
590 .input_stride(8)
591 .output_stride(2)
592 .block_width(8)
593 .block_height(2)
594 .iterations(1)
595 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
596}
597
598TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_5_8) {
599 for(size_t i = 5; i < 8; ++i){
600 TransposeMicrokernelTester()
601 .input_stride(i)
602 .output_stride(2)
603 .block_width(i)
604 .block_height(2)
605 .iterations(1)
606 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
607 }
608}
609
610TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_5_8) {
611 for(size_t i = 5; i < 8; ++i){
612 TransposeMicrokernelTester()
613 .input_stride(i)
614 .output_stride(4)
615 .block_width(i)
616 .block_height(4)
617 .iterations(1)
618 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
619 }
620}
621
622TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_4) {
623 TransposeMicrokernelTester()
624 .input_stride(4)
625 .output_stride(4)
626 .block_width(4)
627 .block_height(4)
628 .iterations(1)
629 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
630}
631
632TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_4){
633 for(size_t i = 3; i < 4; ++i){
634 TransposeMicrokernelTester()
635 .input_stride(4)
636 .output_stride(i)
637 .block_width(4)
638 .block_height(i)
639 .iterations(1)
640 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
641 }
642}
643
644TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_8){
645 for(size_t i = 3; i < 4; ++i){
646 TransposeMicrokernelTester()
647 .input_stride(8)
648 .output_stride(i)
649 .block_width(8)
650 .block_height(i)
651 .iterations(1)
652 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
653 }
654}
655
656TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_5_8) {
657 for(size_t i = 3; i < 4; ++i){
658 for(size_t j = 5; j < 8; ++j){
659 TransposeMicrokernelTester()
660 .input_stride(j)
661 .output_stride(i)
662 .block_width(j)
663 .block_height(i)
664 .iterations(1)
665 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
666 }
667 }
668}
669
670TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8) {
671 TransposeMicrokernelTester()
672 .input_stride(8)
673 .output_stride(2)
674 .block_width(4)
675 .block_height(2)
676 .iterations(1)
677 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
678}
679
680TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_os_4) {
681 TransposeMicrokernelTester()
682 .input_stride(4)
683 .output_stride(4)
684 .block_width(4)
685 .block_height(2)
686 .iterations(1)
687 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
688}
689
690TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8_os_4) {
691 TransposeMicrokernelTester()
692 .input_stride(8)
693 .output_stride(4)
694 .block_width(4)
695 .block_height(2)
696 .iterations(1)
697 .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
698}
699
700TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1) {
701 TransposeMicrokernelTester()
702 .input_stride(1)
703 .output_stride(4)
704 .block_width(1)
705 .block_height(4)
706 .iterations(1)
707 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
708}
709
710TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_1_8_bw_1_2) {
711 for(size_t i = 1; i <= 8; ++i){
712 for(size_t j = 1; j <= 2; ++j){
713 TransposeMicrokernelTester()
714 .input_stride(j)
715 .output_stride(i)
716 .block_width(j)
717 .block_height(i)
718 .iterations(1)
719 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
720 }
721 }
722}
723
724TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2) {
725 TransposeMicrokernelTester()
726 .input_stride(2)
727 .output_stride(4)
728 .block_width(2)
729 .block_height(4)
730 .iterations(1)
731 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
732}
733
734TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2_2) {
735 for(size_t i = 2; i < 2; ++i){
736 TransposeMicrokernelTester()
737 .input_stride(i)
738 .output_stride(4)
739 .block_width(i)
740 .block_height(4)
741 .iterations(1)
742 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
743 }
744}
745
746TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_2_2) {
747 for(size_t i = 2; i < 2; ++i){
748 TransposeMicrokernelTester()
749 .input_stride(i)
750 .output_stride(8)
751 .block_width(i)
752 .block_height(8)
753 .iterations(1)
754 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
755 }
756}
757
758TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_1) {
759 TransposeMicrokernelTester()
760 .input_stride(1)
761 .output_stride(8)
762 .block_width(1)
763 .block_height(8)
764 .iterations(1)
765 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
766}
767
768TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_1){
769 for(size_t i = 5; i < 8; ++i){
770 TransposeMicrokernelTester()
771 .input_stride(1)
772 .output_stride(i)
773 .block_width(1)
774 .block_height(i)
775 .iterations(1)
776 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
777 }
778}
779
780TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2){
781 for(size_t i = 5; i < 8; ++i){
782 TransposeMicrokernelTester()
783 .input_stride(2)
784 .output_stride(i)
785 .block_width(2)
786 .block_height(i)
787 .iterations(1)
788 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
789 }
790}
791
792TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2_2) {
793 for(size_t i = 5; i < 8; ++i){
794 for(size_t j = 2; j < 2; ++j){
795 TransposeMicrokernelTester()
796 .input_stride(j)
797 .output_stride(i)
798 .block_width(j)
799 .block_height(i)
800 .iterations(1)
801 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
802 }
803 }
804}
805
806TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2) {
807 TransposeMicrokernelTester()
808 .input_stride(2)
809 .output_stride(4)
810 .block_width(1)
811 .block_height(4)
812 .iterations(1)
813 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
814}
815
816TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_os_8) {
817 TransposeMicrokernelTester()
818 .input_stride(1)
819 .output_stride(8)
820 .block_width(1)
821 .block_height(4)
822 .iterations(1)
823 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
824}
825
826TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2_os_8) {
827 TransposeMicrokernelTester()
828 .input_stride(2)
829 .output_stride(8)
830 .block_width(1)
831 .block_height(4)
832 .iterations(1)
833 .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
834}
835
836TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2) {
837 TransposeMicrokernelTester()
838 .input_stride(2)
839 .output_stride(4)
840 .block_width(2)
841 .block_height(4)
842 .iterations(1)
843 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
844}
845
846TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_1_8_bw_1_4) {
847 for(size_t i = 1; i <= 8; ++i){
848 for(size_t j = 1; j <= 4; ++j){
849 TransposeMicrokernelTester()
850 .input_stride(j)
851 .output_stride(i)
852 .block_width(j)
853 .block_height(i)
854 .iterations(1)
855 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
856 }
857 }
858}
859
860TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_4) {
861 TransposeMicrokernelTester()
862 .input_stride(4)
863 .output_stride(4)
864 .block_width(4)
865 .block_height(4)
866 .iterations(1)
867 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
868}
869
870TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_3_4) {
871 for(size_t i = 3; i < 4; ++i){
872 TransposeMicrokernelTester()
873 .input_stride(i)
874 .output_stride(4)
875 .block_width(i)
876 .block_height(4)
877 .iterations(1)
878 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
879 }
880}
881
882TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_3_4) {
883 for(size_t i = 3; i < 4; ++i){
884 TransposeMicrokernelTester()
885 .input_stride(i)
886 .output_stride(8)
887 .block_width(i)
888 .block_height(8)
889 .iterations(1)
890 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
891 }
892}
893
894TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_2) {
895 TransposeMicrokernelTester()
896 .input_stride(2)
897 .output_stride(8)
898 .block_width(2)
899 .block_height(8)
900 .iterations(1)
901 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
902}
903
904TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_2){
905 for(size_t i = 5; i < 8; ++i){
906 TransposeMicrokernelTester()
907 .input_stride(2)
908 .output_stride(i)
909 .block_width(2)
910 .block_height(i)
911 .iterations(1)
912 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
913 }
914}
915
916TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_4){
917 for(size_t i = 5; i < 8; ++i){
918 TransposeMicrokernelTester()
919 .input_stride(4)
920 .output_stride(i)
921 .block_width(4)
922 .block_height(i)
923 .iterations(1)
924 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
925 }
926}
927
928TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_3_4) {
929 for(size_t i = 5; i < 8; ++i){
930 for(size_t j = 3; j < 4; ++j){
931 TransposeMicrokernelTester()
932 .input_stride(j)
933 .output_stride(i)
934 .block_width(j)
935 .block_height(i)
936 .iterations(1)
937 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
938 }
939 }
940}
941
942TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4) {
943 TransposeMicrokernelTester()
944 .input_stride(4)
945 .output_stride(4)
946 .block_width(2)
947 .block_height(4)
948 .iterations(1)
949 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
950}
951
952TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_os_8) {
953 TransposeMicrokernelTester()
954 .input_stride(2)
955 .output_stride(8)
956 .block_width(2)
957 .block_height(4)
958 .iterations(1)
959 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
960}
961
962TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4_os_8) {
963 TransposeMicrokernelTester()
964 .input_stride(4)
965 .output_stride(8)
966 .block_width(2)
967 .block_height(4)
968 .iterations(1)
969 .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
970}
971
972TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4) {
973 TransposeMicrokernelTester()
974 .input_stride(4)
975 .output_stride(4)
976 .block_width(4)
977 .block_height(4)
978 .iterations(1)
979 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
980}
981
982TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_1_8_bw_1_8) {
983 for(size_t i = 1; i <= 8; ++i){
984 for(size_t j = 1; j <= 8; ++j){
985 TransposeMicrokernelTester()
986 .input_stride(j)
987 .output_stride(i)
988 .block_width(j)
989 .block_height(i)
990 .iterations(1)
991 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
992 }
993 }
994}
995
996TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_8) {
997 TransposeMicrokernelTester()
998 .input_stride(8)
999 .output_stride(4)
1000 .block_width(8)
1001 .block_height(4)
1002 .iterations(1)
1003 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1004}
1005
1006TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_5_8) {
1007 for(size_t i = 5; i < 8; ++i){
1008 TransposeMicrokernelTester()
1009 .input_stride(i)
1010 .output_stride(4)
1011 .block_width(i)
1012 .block_height(4)
1013 .iterations(1)
1014 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1015 }
1016}
1017
1018TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_5_8) {
1019 for(size_t i = 5; i < 8; ++i){
1020 TransposeMicrokernelTester()
1021 .input_stride(i)
1022 .output_stride(8)
1023 .block_width(i)
1024 .block_height(8)
1025 .iterations(1)
1026 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1027 }
1028}
1029
1030TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_4) {
1031 TransposeMicrokernelTester()
1032 .input_stride(4)
1033 .output_stride(8)
1034 .block_width(4)
1035 .block_height(8)
1036 .iterations(1)
1037 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1038}
1039
1040TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_4){
1041 for(size_t i = 5; i < 8; ++i){
1042 TransposeMicrokernelTester()
1043 .input_stride(4)
1044 .output_stride(i)
1045 .block_width(4)
1046 .block_height(i)
1047 .iterations(1)
1048 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1049 }
1050}
1051
1052TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_8){
1053 for(size_t i = 5; i < 8; ++i){
1054 TransposeMicrokernelTester()
1055 .input_stride(8)
1056 .output_stride(i)
1057 .block_width(8)
1058 .block_height(i)
1059 .iterations(1)
1060 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1061 }
1062}
1063
1064TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_5_8) {
1065 for(size_t i = 5; i < 8; ++i){
1066 for(size_t j = 5; j < 8; ++j){
1067 TransposeMicrokernelTester()
1068 .input_stride(j)
1069 .output_stride(i)
1070 .block_width(j)
1071 .block_height(i)
1072 .iterations(1)
1073 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1074 }
1075 }
1076}
1077
1078TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8) {
1079 TransposeMicrokernelTester()
1080 .input_stride(8)
1081 .output_stride(4)
1082 .block_width(4)
1083 .block_height(4)
1084 .iterations(1)
1085 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1086}
1087
1088TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_os_8) {
1089 TransposeMicrokernelTester()
1090 .input_stride(4)
1091 .output_stride(8)
1092 .block_width(4)
1093 .block_height(4)
1094 .iterations(1)
1095 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1096}
1097
1098TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8_os_8) {
1099 TransposeMicrokernelTester()
1100 .input_stride(8)
1101 .output_stride(8)
1102 .block_width(4)
1103 .block_height(4)
1104 .iterations(1)
1105 .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1106}
1107
Alan Kelly5da6d382022-01-14 03:19:43 -08001108#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kellyf2b233b2022-01-31 02:53:57 -08001109 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001110 TEST_REQUIRES_X86_SSE2;
1111 TransposeMicrokernelTester()
1112 .input_stride(16)
1113 .output_stride(16)
1114 .block_width(16)
1115 .block_height(16)
1116 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001117 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001118 }
Alan Kellycd21b022022-01-14 01:44:59 -08001119
Alan Kellyf2b233b2022-01-31 02:53:57 -08001120 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_1_32_bw_1_32) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001121 TEST_REQUIRES_X86_SSE2;
1122 for(size_t i = 1; i <= 32; ++i){
1123 for(size_t j = 1; j <= 32; ++j){
1124 TransposeMicrokernelTester()
1125 .input_stride(j)
1126 .output_stride(i)
1127 .block_width(j)
1128 .block_height(i)
1129 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001130 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001131 }
Alan Kellycd21b022022-01-14 01:44:59 -08001132 }
1133 }
Alan Kellycd21b022022-01-14 01:44:59 -08001134
Alan Kellyf2b233b2022-01-31 02:53:57 -08001135 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_32) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001136 TEST_REQUIRES_X86_SSE2;
Alan Kellycd21b022022-01-14 01:44:59 -08001137 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001138 .input_stride(32)
1139 .output_stride(16)
1140 .block_width(32)
1141 .block_height(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001142 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001143 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001144 }
Alan Kellycd21b022022-01-14 01:44:59 -08001145
Alan Kellyf2b233b2022-01-31 02:53:57 -08001146 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_17_32) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001147 TEST_REQUIRES_X86_SSE2;
1148 for(size_t i = 17; i < 32; ++i){
Alan Kellycd21b022022-01-14 01:44:59 -08001149 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001150 .input_stride(i)
1151 .output_stride(16)
1152 .block_width(i)
1153 .block_height(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001154 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001155 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001156 }
1157 }
Alan Kellycd21b022022-01-14 01:44:59 -08001158
Alan Kellyf2b233b2022-01-31 02:53:57 -08001159 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_32_bw_17_32) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001160 TEST_REQUIRES_X86_SSE2;
1161 for(size_t i = 17; i < 32; ++i){
Alan Kellycd21b022022-01-14 01:44:59 -08001162 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001163 .input_stride(i)
1164 .output_stride(32)
1165 .block_width(i)
1166 .block_height(32)
Alan Kellycd21b022022-01-14 01:44:59 -08001167 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001168 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001169 }
1170 }
Alan Kellycd21b022022-01-14 01:44:59 -08001171
Alan Kellyf2b233b2022-01-31 02:53:57 -08001172 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_32_bw_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001173 TEST_REQUIRES_X86_SSE2;
Alan Kellycd21b022022-01-14 01:44:59 -08001174 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001175 .input_stride(16)
1176 .output_stride(32)
1177 .block_width(16)
1178 .block_height(32)
Alan Kellycd21b022022-01-14 01:44:59 -08001179 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001180 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001181 }
Alan Kellycd21b022022-01-14 01:44:59 -08001182
Alan Kellyf2b233b2022-01-31 02:53:57 -08001183 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_17_32_bw_16){
Alan Kelly5da6d382022-01-14 03:19:43 -08001184 TEST_REQUIRES_X86_SSE2;
1185 for(size_t i = 17; i < 32; ++i){
Alan Kellycd21b022022-01-14 01:44:59 -08001186 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001187 .input_stride(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001188 .output_stride(i)
Alan Kelly5da6d382022-01-14 03:19:43 -08001189 .block_width(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001190 .block_height(i)
1191 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001192 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001193 }
1194 }
Alan Kellycd21b022022-01-14 01:44:59 -08001195
Alan Kellyf2b233b2022-01-31 02:53:57 -08001196 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_17_32_bw_32){
Alan Kelly5da6d382022-01-14 03:19:43 -08001197 TEST_REQUIRES_X86_SSE2;
1198 for(size_t i = 17; i < 32; ++i){
Alan Kellycd21b022022-01-14 01:44:59 -08001199 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001200 .input_stride(32)
Alan Kellycd21b022022-01-14 01:44:59 -08001201 .output_stride(i)
Alan Kelly5da6d382022-01-14 03:19:43 -08001202 .block_width(32)
Alan Kellycd21b022022-01-14 01:44:59 -08001203 .block_height(i)
1204 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001205 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001206 }
1207 }
Alan Kellycd21b022022-01-14 01:44:59 -08001208
Alan Kellyf2b233b2022-01-31 02:53:57 -08001209 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_17_32_bw_17_32) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001210 TEST_REQUIRES_X86_SSE2;
1211 for(size_t i = 17; i < 32; ++i){
1212 for(size_t j = 17; j < 32; ++j){
1213 TransposeMicrokernelTester()
1214 .input_stride(j)
1215 .output_stride(i)
1216 .block_width(j)
1217 .block_height(i)
1218 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001219 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001220 }
1221 }
Alan Kellycd21b022022-01-14 01:44:59 -08001222 }
Alan Kellycd21b022022-01-14 01:44:59 -08001223
Alan Kellyf2b233b2022-01-31 02:53:57 -08001224 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_16_is_32) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001225 TEST_REQUIRES_X86_SSE2;
Alan Kellycd21b022022-01-14 01:44:59 -08001226 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001227 .input_stride(32)
1228 .output_stride(16)
1229 .block_width(16)
1230 .block_height(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001231 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001232 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001233 }
Alan Kellycd21b022022-01-14 01:44:59 -08001234
Alan Kellyf2b233b2022-01-31 02:53:57 -08001235 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_16_os_32) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001236 TEST_REQUIRES_X86_SSE2;
Alan Kellycd21b022022-01-14 01:44:59 -08001237 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001238 .input_stride(16)
1239 .output_stride(32)
1240 .block_width(16)
1241 .block_height(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001242 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001243 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001244 }
Alan Kellycd21b022022-01-14 01:44:59 -08001245
Alan Kellyf2b233b2022-01-31 02:53:57 -08001246 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_16_is_32_os_32) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001247 TEST_REQUIRES_X86_SSE2;
Alan Kellycd21b022022-01-14 01:44:59 -08001248 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001249 .input_stride(32)
1250 .output_stride(32)
1251 .block_width(16)
1252 .block_height(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001253 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001254 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001255 }
Alan Kelly5da6d382022-01-14 03:19:43 -08001256#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kellycd21b022022-01-14 01:44:59 -08001257
Alan Kelly5da6d382022-01-14 03:19:43 -08001258
1259#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1260 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_16) {
1261 TEST_REQUIRES_X86_SSE2;
1262 TransposeMicrokernelTester()
1263 .input_stride(16)
1264 .output_stride(16)
1265 .block_width(16)
1266 .block_height(16)
1267 .iterations(1)
1268 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1269 }
1270
1271 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_1_32_bw_1_32) {
1272 TEST_REQUIRES_X86_SSE2;
1273 for(size_t i = 1; i <= 32; ++i){
1274 for(size_t j = 1; j <= 32; ++j){
1275 TransposeMicrokernelTester()
1276 .input_stride(j)
1277 .output_stride(i)
1278 .block_width(j)
1279 .block_height(i)
1280 .iterations(1)
1281 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1282 }
1283 }
1284 }
1285
1286 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_32) {
1287 TEST_REQUIRES_X86_SSE2;
1288 TransposeMicrokernelTester()
1289 .input_stride(32)
1290 .output_stride(16)
1291 .block_width(32)
1292 .block_height(16)
1293 .iterations(1)
1294 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1295 }
1296
1297 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_17_32) {
1298 TEST_REQUIRES_X86_SSE2;
1299 for(size_t i = 17; i < 32; ++i){
Alan Kellycd21b022022-01-14 01:44:59 -08001300 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001301 .input_stride(i)
1302 .output_stride(16)
1303 .block_width(i)
1304 .block_height(16)
1305 .iterations(1)
1306 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1307 }
1308 }
1309
1310 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_32_bw_17_32) {
1311 TEST_REQUIRES_X86_SSE2;
1312 for(size_t i = 17; i < 32; ++i){
1313 TransposeMicrokernelTester()
1314 .input_stride(i)
1315 .output_stride(32)
1316 .block_width(i)
1317 .block_height(32)
1318 .iterations(1)
1319 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1320 }
1321 }
1322
1323 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_32_bw_16) {
1324 TEST_REQUIRES_X86_SSE2;
1325 TransposeMicrokernelTester()
1326 .input_stride(16)
1327 .output_stride(32)
1328 .block_width(16)
1329 .block_height(32)
1330 .iterations(1)
1331 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1332 }
1333
1334 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_17_32_bw_16){
1335 TEST_REQUIRES_X86_SSE2;
1336 for(size_t i = 17; i < 32; ++i){
1337 TransposeMicrokernelTester()
1338 .input_stride(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001339 .output_stride(i)
Alan Kelly5da6d382022-01-14 03:19:43 -08001340 .block_width(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001341 .block_height(i)
1342 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001343 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001344 }
1345 }
Alan Kellycd21b022022-01-14 01:44:59 -08001346
Alan Kelly5da6d382022-01-14 03:19:43 -08001347 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_17_32_bw_32){
1348 TEST_REQUIRES_X86_SSE2;
1349 for(size_t i = 17; i < 32; ++i){
Alan Kellycd21b022022-01-14 01:44:59 -08001350 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001351 .input_stride(32)
Alan Kellycd21b022022-01-14 01:44:59 -08001352 .output_stride(i)
Alan Kelly5da6d382022-01-14 03:19:43 -08001353 .block_width(32)
Alan Kellycd21b022022-01-14 01:44:59 -08001354 .block_height(i)
1355 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001356 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001357 }
1358 }
Alan Kellycd21b022022-01-14 01:44:59 -08001359
Alan Kelly5da6d382022-01-14 03:19:43 -08001360 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_17_32_bw_17_32) {
1361 TEST_REQUIRES_X86_SSE2;
1362 for(size_t i = 17; i < 32; ++i){
1363 for(size_t j = 17; j < 32; ++j){
1364 TransposeMicrokernelTester()
1365 .input_stride(j)
1366 .output_stride(i)
1367 .block_width(j)
1368 .block_height(i)
1369 .iterations(1)
1370 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1371 }
Alan Kellycd21b022022-01-14 01:44:59 -08001372 }
1373 }
Alan Kellycd21b022022-01-14 01:44:59 -08001374
Alan Kelly5da6d382022-01-14 03:19:43 -08001375 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_16_is_32) {
1376 TEST_REQUIRES_X86_SSE2;
Alan Kellycd21b022022-01-14 01:44:59 -08001377 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001378 .input_stride(32)
1379 .output_stride(16)
1380 .block_width(16)
1381 .block_height(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001382 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001383 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001384 }
Alan Kellycd21b022022-01-14 01:44:59 -08001385
Alan Kelly5da6d382022-01-14 03:19:43 -08001386 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_16_os_32) {
1387 TEST_REQUIRES_X86_SSE2;
Alan Kellycd21b022022-01-14 01:44:59 -08001388 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001389 .input_stride(16)
1390 .output_stride(32)
1391 .block_width(16)
1392 .block_height(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001393 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001394 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001395 }
Alan Kellycd21b022022-01-14 01:44:59 -08001396
Alan Kelly5da6d382022-01-14 03:19:43 -08001397 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_16_is_32_os_32) {
1398 TEST_REQUIRES_X86_SSE2;
Alan Kellycd21b022022-01-14 01:44:59 -08001399 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001400 .input_stride(32)
1401 .output_stride(32)
1402 .block_width(16)
1403 .block_height(16)
Alan Kellycd21b022022-01-14 01:44:59 -08001404 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001405 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
Alan Kellycd21b022022-01-14 01:44:59 -08001406 }
Alan Kelly5da6d382022-01-14 03:19:43 -08001407#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kellycfd947d2022-02-02 00:18:46 -08001408
1409
1410#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1411 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_16) {
1412 TEST_REQUIRES_ARM_NEON;
1413 TransposeMicrokernelTester()
1414 .input_stride(16)
1415 .output_stride(16)
1416 .block_width(16)
1417 .block_height(16)
1418 .iterations(1)
1419 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1420 }
1421
1422 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_1_32_bw_1_32) {
1423 TEST_REQUIRES_ARM_NEON;
1424 for(size_t i = 1; i <= 32; ++i){
1425 for(size_t j = 1; j <= 32; ++j){
1426 TransposeMicrokernelTester()
1427 .input_stride(j)
1428 .output_stride(i)
1429 .block_width(j)
1430 .block_height(i)
1431 .iterations(1)
1432 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1433 }
1434 }
1435 }
1436
1437 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_32) {
1438 TEST_REQUIRES_ARM_NEON;
1439 TransposeMicrokernelTester()
1440 .input_stride(32)
1441 .output_stride(16)
1442 .block_width(32)
1443 .block_height(16)
1444 .iterations(1)
1445 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1446 }
1447
1448 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_17_32) {
1449 TEST_REQUIRES_ARM_NEON;
1450 for(size_t i = 17; i < 32; ++i){
1451 TransposeMicrokernelTester()
1452 .input_stride(i)
1453 .output_stride(16)
1454 .block_width(i)
1455 .block_height(16)
1456 .iterations(1)
1457 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1458 }
1459 }
1460
1461 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_32_bw_17_32) {
1462 TEST_REQUIRES_ARM_NEON;
1463 for(size_t i = 17; i < 32; ++i){
1464 TransposeMicrokernelTester()
1465 .input_stride(i)
1466 .output_stride(32)
1467 .block_width(i)
1468 .block_height(32)
1469 .iterations(1)
1470 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1471 }
1472 }
1473
1474 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_32_bw_16) {
1475 TEST_REQUIRES_ARM_NEON;
1476 TransposeMicrokernelTester()
1477 .input_stride(16)
1478 .output_stride(32)
1479 .block_width(16)
1480 .block_height(32)
1481 .iterations(1)
1482 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1483 }
1484
1485 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_17_32_bw_16){
1486 TEST_REQUIRES_ARM_NEON;
1487 for(size_t i = 17; i < 32; ++i){
1488 TransposeMicrokernelTester()
1489 .input_stride(16)
1490 .output_stride(i)
1491 .block_width(16)
1492 .block_height(i)
1493 .iterations(1)
1494 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1495 }
1496 }
1497
1498 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_17_32_bw_32){
1499 TEST_REQUIRES_ARM_NEON;
1500 for(size_t i = 17; i < 32; ++i){
1501 TransposeMicrokernelTester()
1502 .input_stride(32)
1503 .output_stride(i)
1504 .block_width(32)
1505 .block_height(i)
1506 .iterations(1)
1507 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1508 }
1509 }
1510
1511 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_17_32_bw_17_32) {
1512 TEST_REQUIRES_ARM_NEON;
1513 for(size_t i = 17; i < 32; ++i){
1514 for(size_t j = 17; j < 32; ++j){
1515 TransposeMicrokernelTester()
1516 .input_stride(j)
1517 .output_stride(i)
1518 .block_width(j)
1519 .block_height(i)
1520 .iterations(1)
1521 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1522 }
1523 }
1524 }
1525
1526 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_16_is_32) {
1527 TEST_REQUIRES_ARM_NEON;
1528 TransposeMicrokernelTester()
1529 .input_stride(32)
1530 .output_stride(16)
1531 .block_width(16)
1532 .block_height(16)
1533 .iterations(1)
1534 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1535 }
1536
1537 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_16_os_32) {
1538 TEST_REQUIRES_ARM_NEON;
1539 TransposeMicrokernelTester()
1540 .input_stride(16)
1541 .output_stride(32)
1542 .block_width(16)
1543 .block_height(16)
1544 .iterations(1)
1545 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1546 }
1547
1548 TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_16_is_32_os_32) {
1549 TEST_REQUIRES_ARM_NEON;
1550 TransposeMicrokernelTester()
1551 .input_stride(32)
1552 .output_stride(32)
1553 .block_width(16)
1554 .block_height(16)
1555 .iterations(1)
1556 .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1557 }
1558#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1559
1560
1561#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1562 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_16) {
1563 TEST_REQUIRES_ARM_NEON;
1564 TransposeMicrokernelTester()
1565 .input_stride(16)
1566 .output_stride(16)
1567 .block_width(16)
1568 .block_height(16)
1569 .iterations(1)
1570 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1571 }
1572
1573 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_1_32_bw_1_32) {
1574 TEST_REQUIRES_ARM_NEON;
1575 for(size_t i = 1; i <= 32; ++i){
1576 for(size_t j = 1; j <= 32; ++j){
1577 TransposeMicrokernelTester()
1578 .input_stride(j)
1579 .output_stride(i)
1580 .block_width(j)
1581 .block_height(i)
1582 .iterations(1)
1583 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1584 }
1585 }
1586 }
1587
1588 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_32) {
1589 TEST_REQUIRES_ARM_NEON;
1590 TransposeMicrokernelTester()
1591 .input_stride(32)
1592 .output_stride(16)
1593 .block_width(32)
1594 .block_height(16)
1595 .iterations(1)
1596 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1597 }
1598
1599 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_17_32) {
1600 TEST_REQUIRES_ARM_NEON;
1601 for(size_t i = 17; i < 32; ++i){
1602 TransposeMicrokernelTester()
1603 .input_stride(i)
1604 .output_stride(16)
1605 .block_width(i)
1606 .block_height(16)
1607 .iterations(1)
1608 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1609 }
1610 }
1611
1612 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_32_bw_17_32) {
1613 TEST_REQUIRES_ARM_NEON;
1614 for(size_t i = 17; i < 32; ++i){
1615 TransposeMicrokernelTester()
1616 .input_stride(i)
1617 .output_stride(32)
1618 .block_width(i)
1619 .block_height(32)
1620 .iterations(1)
1621 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1622 }
1623 }
1624
1625 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_32_bw_16) {
1626 TEST_REQUIRES_ARM_NEON;
1627 TransposeMicrokernelTester()
1628 .input_stride(16)
1629 .output_stride(32)
1630 .block_width(16)
1631 .block_height(32)
1632 .iterations(1)
1633 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1634 }
1635
1636 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_17_32_bw_16){
1637 TEST_REQUIRES_ARM_NEON;
1638 for(size_t i = 17; i < 32; ++i){
1639 TransposeMicrokernelTester()
1640 .input_stride(16)
1641 .output_stride(i)
1642 .block_width(16)
1643 .block_height(i)
1644 .iterations(1)
1645 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1646 }
1647 }
1648
1649 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_17_32_bw_32){
1650 TEST_REQUIRES_ARM_NEON;
1651 for(size_t i = 17; i < 32; ++i){
1652 TransposeMicrokernelTester()
1653 .input_stride(32)
1654 .output_stride(i)
1655 .block_width(32)
1656 .block_height(i)
1657 .iterations(1)
1658 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1659 }
1660 }
1661
1662 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_17_32_bw_17_32) {
1663 TEST_REQUIRES_ARM_NEON;
1664 for(size_t i = 17; i < 32; ++i){
1665 for(size_t j = 17; j < 32; ++j){
1666 TransposeMicrokernelTester()
1667 .input_stride(j)
1668 .output_stride(i)
1669 .block_width(j)
1670 .block_height(i)
1671 .iterations(1)
1672 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1673 }
1674 }
1675 }
1676
1677 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_16_is_32) {
1678 TEST_REQUIRES_ARM_NEON;
1679 TransposeMicrokernelTester()
1680 .input_stride(32)
1681 .output_stride(16)
1682 .block_width(16)
1683 .block_height(16)
1684 .iterations(1)
1685 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1686 }
1687
1688 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_16_os_32) {
1689 TEST_REQUIRES_ARM_NEON;
1690 TransposeMicrokernelTester()
1691 .input_stride(16)
1692 .output_stride(32)
1693 .block_width(16)
1694 .block_height(16)
1695 .iterations(1)
1696 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1697 }
1698
1699 TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_16_is_32_os_32) {
1700 TEST_REQUIRES_ARM_NEON;
1701 TransposeMicrokernelTester()
1702 .input_stride(32)
1703 .output_stride(32)
1704 .block_width(16)
1705 .block_height(16)
1706 .iterations(1)
1707 .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1708 }
1709#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1710
1711
1712#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1713 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_16) {
1714 TEST_REQUIRES_ARM_NEON;
1715 TransposeMicrokernelTester()
1716 .input_stride(16)
1717 .output_stride(16)
1718 .block_width(16)
1719 .block_height(16)
1720 .iterations(1)
1721 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1722 }
1723
1724 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_1_32_bw_1_32) {
1725 TEST_REQUIRES_ARM_NEON;
1726 for(size_t i = 1; i <= 32; ++i){
1727 for(size_t j = 1; j <= 32; ++j){
1728 TransposeMicrokernelTester()
1729 .input_stride(j)
1730 .output_stride(i)
1731 .block_width(j)
1732 .block_height(i)
1733 .iterations(1)
1734 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1735 }
1736 }
1737 }
1738
1739 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_32) {
1740 TEST_REQUIRES_ARM_NEON;
1741 TransposeMicrokernelTester()
1742 .input_stride(32)
1743 .output_stride(16)
1744 .block_width(32)
1745 .block_height(16)
1746 .iterations(1)
1747 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1748 }
1749
1750 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_17_32) {
1751 TEST_REQUIRES_ARM_NEON;
1752 for(size_t i = 17; i < 32; ++i){
1753 TransposeMicrokernelTester()
1754 .input_stride(i)
1755 .output_stride(16)
1756 .block_width(i)
1757 .block_height(16)
1758 .iterations(1)
1759 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1760 }
1761 }
1762
1763 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_32_bw_17_32) {
1764 TEST_REQUIRES_ARM_NEON;
1765 for(size_t i = 17; i < 32; ++i){
1766 TransposeMicrokernelTester()
1767 .input_stride(i)
1768 .output_stride(32)
1769 .block_width(i)
1770 .block_height(32)
1771 .iterations(1)
1772 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1773 }
1774 }
1775
1776 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_32_bw_16) {
1777 TEST_REQUIRES_ARM_NEON;
1778 TransposeMicrokernelTester()
1779 .input_stride(16)
1780 .output_stride(32)
1781 .block_width(16)
1782 .block_height(32)
1783 .iterations(1)
1784 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1785 }
1786
1787 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_17_32_bw_16){
1788 TEST_REQUIRES_ARM_NEON;
1789 for(size_t i = 17; i < 32; ++i){
1790 TransposeMicrokernelTester()
1791 .input_stride(16)
1792 .output_stride(i)
1793 .block_width(16)
1794 .block_height(i)
1795 .iterations(1)
1796 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1797 }
1798 }
1799
1800 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_17_32_bw_32){
1801 TEST_REQUIRES_ARM_NEON;
1802 for(size_t i = 17; i < 32; ++i){
1803 TransposeMicrokernelTester()
1804 .input_stride(32)
1805 .output_stride(i)
1806 .block_width(32)
1807 .block_height(i)
1808 .iterations(1)
1809 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1810 }
1811 }
1812
1813 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_17_32_bw_17_32) {
1814 TEST_REQUIRES_ARM_NEON;
1815 for(size_t i = 17; i < 32; ++i){
1816 for(size_t j = 17; j < 32; ++j){
1817 TransposeMicrokernelTester()
1818 .input_stride(j)
1819 .output_stride(i)
1820 .block_width(j)
1821 .block_height(i)
1822 .iterations(1)
1823 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1824 }
1825 }
1826 }
1827
1828 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_16_is_32) {
1829 TEST_REQUIRES_ARM_NEON;
1830 TransposeMicrokernelTester()
1831 .input_stride(32)
1832 .output_stride(16)
1833 .block_width(16)
1834 .block_height(16)
1835 .iterations(1)
1836 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1837 }
1838
1839 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_16_os_32) {
1840 TEST_REQUIRES_ARM_NEON;
1841 TransposeMicrokernelTester()
1842 .input_stride(16)
1843 .output_stride(32)
1844 .block_width(16)
1845 .block_height(16)
1846 .iterations(1)
1847 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1848 }
1849
1850 TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_16_is_32_os_32) {
1851 TEST_REQUIRES_ARM_NEON;
1852 TransposeMicrokernelTester()
1853 .input_stride(32)
1854 .output_stride(32)
1855 .block_width(16)
1856 .block_height(16)
1857 .iterations(1)
1858 .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1859 }
1860#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64