blob: d311078c3a94e51ac91e937f818dd2db15653c10 [file] [log] [blame]
Alan Kelly1945f0b2021-12-24 01:26:45 -08001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/x16-transpose.yaml
8// Generator: tools/generate-transpose-test.py
9
10
11#include <gtest/gtest.h>
12
13#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
16#include <xnnpack/transpose.h>
17#include "transpose-microkernel-tester.h"
18
19
Alan Kelly84aae412022-01-14 01:41:06 -080020TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2) {
21 TransposeMicrokernelTester()
22 .input_stride(2)
23 .output_stride(1)
24 .block_width(2)
25 .block_height(1)
26 .iterations(1)
27 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
28}
29
30TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_2_bw_1_4) {
31 for(size_t i = 1; i <= 2; ++i){
32 for(size_t j = 1; j <= 4; ++j){
33 TransposeMicrokernelTester()
34 .input_stride(j)
35 .output_stride(i)
36 .block_width(j)
37 .block_height(i)
38 .iterations(1)
39 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
40 }
41 }
42}
43
44TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_4) {
45 TransposeMicrokernelTester()
46 .input_stride(4)
47 .output_stride(1)
48 .block_width(4)
49 .block_height(1)
50 .iterations(1)
51 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
52}
53
54TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_3_4) {
55 for(size_t i = 3; i < 4; ++i){
56 TransposeMicrokernelTester()
57 .input_stride(i)
58 .output_stride(1)
59 .block_width(i)
60 .block_height(1)
61 .iterations(1)
62 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
63 }
64}
65
66TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_3_4) {
67 for(size_t i = 3; i < 4; ++i){
68 TransposeMicrokernelTester()
69 .input_stride(i)
70 .output_stride(2)
71 .block_width(i)
72 .block_height(2)
73 .iterations(1)
74 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
75 }
76}
77
78TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_2) {
79 TransposeMicrokernelTester()
80 .input_stride(2)
81 .output_stride(2)
82 .block_width(2)
83 .block_height(2)
84 .iterations(1)
85 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
86}
87
88TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_2){
89 for(size_t i = 2; i < 2; ++i){
90 TransposeMicrokernelTester()
91 .input_stride(2)
92 .output_stride(i)
93 .block_width(2)
94 .block_height(i)
95 .iterations(1)
96 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
97 }
98}
99
100TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_4){
101 for(size_t i = 2; i < 2; ++i){
102 TransposeMicrokernelTester()
103 .input_stride(4)
104 .output_stride(i)
105 .block_width(4)
106 .block_height(i)
107 .iterations(1)
108 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
109 }
110}
111
112TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_3_4) {
113 for(size_t i = 2; i < 2; ++i){
114 for(size_t j = 3; j < 4; ++j){
115 TransposeMicrokernelTester()
116 .input_stride(j)
117 .output_stride(i)
118 .block_width(j)
119 .block_height(i)
120 .iterations(1)
121 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
122 }
123 }
124}
125
126TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4) {
127 TransposeMicrokernelTester()
128 .input_stride(4)
129 .output_stride(1)
130 .block_width(2)
131 .block_height(1)
132 .iterations(1)
133 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
134}
135
136TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_os_2) {
137 TransposeMicrokernelTester()
138 .input_stride(2)
139 .output_stride(2)
140 .block_width(2)
141 .block_height(1)
142 .iterations(1)
143 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
144}
145
146TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4_os_2) {
147 TransposeMicrokernelTester()
148 .input_stride(4)
149 .output_stride(2)
150 .block_width(2)
151 .block_height(1)
152 .iterations(1)
153 .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
154}
155
156TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4) {
157 TransposeMicrokernelTester()
158 .input_stride(4)
159 .output_stride(1)
160 .block_width(4)
161 .block_height(1)
162 .iterations(1)
163 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
164}
165
166TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_2_bw_1_8) {
167 for(size_t i = 1; i <= 2; ++i){
168 for(size_t j = 1; j <= 8; ++j){
169 TransposeMicrokernelTester()
170 .input_stride(j)
171 .output_stride(i)
172 .block_width(j)
173 .block_height(i)
174 .iterations(1)
175 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
176 }
177 }
178}
179
180TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_8) {
181 TransposeMicrokernelTester()
182 .input_stride(8)
183 .output_stride(1)
184 .block_width(8)
185 .block_height(1)
186 .iterations(1)
187 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
188}
189
190TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_5_8) {
191 for(size_t i = 5; i < 8; ++i){
192 TransposeMicrokernelTester()
193 .input_stride(i)
194 .output_stride(1)
195 .block_width(i)
196 .block_height(1)
197 .iterations(1)
198 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
199 }
200}
201
202TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_5_8) {
203 for(size_t i = 5; i < 8; ++i){
204 TransposeMicrokernelTester()
205 .input_stride(i)
206 .output_stride(2)
207 .block_width(i)
208 .block_height(2)
209 .iterations(1)
210 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
211 }
212}
213
214TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_4) {
215 TransposeMicrokernelTester()
216 .input_stride(4)
217 .output_stride(2)
218 .block_width(4)
219 .block_height(2)
220 .iterations(1)
221 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
222}
223
224TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_4){
225 for(size_t i = 2; i < 2; ++i){
226 TransposeMicrokernelTester()
227 .input_stride(4)
228 .output_stride(i)
229 .block_width(4)
230 .block_height(i)
231 .iterations(1)
232 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
233 }
234}
235
236TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_8){
237 for(size_t i = 2; i < 2; ++i){
238 TransposeMicrokernelTester()
239 .input_stride(8)
240 .output_stride(i)
241 .block_width(8)
242 .block_height(i)
243 .iterations(1)
244 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
245 }
246}
247
248TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_5_8) {
249 for(size_t i = 2; i < 2; ++i){
250 for(size_t j = 5; j < 8; ++j){
251 TransposeMicrokernelTester()
252 .input_stride(j)
253 .output_stride(i)
254 .block_width(j)
255 .block_height(i)
256 .iterations(1)
257 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
258 }
259 }
260}
261
262TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8) {
263 TransposeMicrokernelTester()
264 .input_stride(8)
265 .output_stride(1)
266 .block_width(4)
267 .block_height(1)
268 .iterations(1)
269 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
270}
271
272TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_os_2) {
273 TransposeMicrokernelTester()
274 .input_stride(4)
275 .output_stride(2)
276 .block_width(4)
277 .block_height(1)
278 .iterations(1)
279 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
280}
281
282TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8_os_2) {
283 TransposeMicrokernelTester()
284 .input_stride(8)
285 .output_stride(2)
286 .block_width(4)
287 .block_height(1)
288 .iterations(1)
289 .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
290}
291
292TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1) {
293 TransposeMicrokernelTester()
294 .input_stride(1)
295 .output_stride(2)
296 .block_width(1)
297 .block_height(2)
298 .iterations(1)
299 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
300}
301
302TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_1_4_bw_1_2) {
303 for(size_t i = 1; i <= 4; ++i){
304 for(size_t j = 1; j <= 2; ++j){
305 TransposeMicrokernelTester()
306 .input_stride(j)
307 .output_stride(i)
308 .block_width(j)
309 .block_height(i)
310 .iterations(1)
311 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
312 }
313 }
314}
315
316TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2) {
317 TransposeMicrokernelTester()
318 .input_stride(2)
319 .output_stride(2)
320 .block_width(2)
321 .block_height(2)
322 .iterations(1)
323 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
324}
325
326TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2_2) {
327 for(size_t i = 2; i < 2; ++i){
328 TransposeMicrokernelTester()
329 .input_stride(i)
330 .output_stride(2)
331 .block_width(i)
332 .block_height(2)
333 .iterations(1)
334 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
335 }
336}
337
338TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_2_2) {
339 for(size_t i = 2; i < 2; ++i){
340 TransposeMicrokernelTester()
341 .input_stride(i)
342 .output_stride(4)
343 .block_width(i)
344 .block_height(4)
345 .iterations(1)
346 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
347 }
348}
349
350TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_1) {
351 TransposeMicrokernelTester()
352 .input_stride(1)
353 .output_stride(4)
354 .block_width(1)
355 .block_height(4)
356 .iterations(1)
357 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
358}
359
360TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_1){
361 for(size_t i = 3; i < 4; ++i){
362 TransposeMicrokernelTester()
363 .input_stride(1)
364 .output_stride(i)
365 .block_width(1)
366 .block_height(i)
367 .iterations(1)
368 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
369 }
370}
371
372TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2){
373 for(size_t i = 3; i < 4; ++i){
374 TransposeMicrokernelTester()
375 .input_stride(2)
376 .output_stride(i)
377 .block_width(2)
378 .block_height(i)
379 .iterations(1)
380 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
381 }
382}
383
384TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2_2) {
385 for(size_t i = 3; i < 4; ++i){
386 for(size_t j = 2; j < 2; ++j){
387 TransposeMicrokernelTester()
388 .input_stride(j)
389 .output_stride(i)
390 .block_width(j)
391 .block_height(i)
392 .iterations(1)
393 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
394 }
395 }
396}
397
398TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2) {
399 TransposeMicrokernelTester()
400 .input_stride(2)
401 .output_stride(2)
402 .block_width(1)
403 .block_height(2)
404 .iterations(1)
405 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
406}
407
408TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_os_4) {
409 TransposeMicrokernelTester()
410 .input_stride(1)
411 .output_stride(4)
412 .block_width(1)
413 .block_height(2)
414 .iterations(1)
415 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
416}
417
418TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2_os_4) {
419 TransposeMicrokernelTester()
420 .input_stride(2)
421 .output_stride(4)
422 .block_width(1)
423 .block_height(2)
424 .iterations(1)
425 .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
426}
427
428TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2) {
429 TransposeMicrokernelTester()
430 .input_stride(2)
431 .output_stride(2)
432 .block_width(2)
433 .block_height(2)
434 .iterations(1)
435 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
436}
437
438TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_1_4_bw_1_4) {
439 for(size_t i = 1; i <= 4; ++i){
440 for(size_t j = 1; j <= 4; ++j){
441 TransposeMicrokernelTester()
442 .input_stride(j)
443 .output_stride(i)
444 .block_width(j)
445 .block_height(i)
446 .iterations(1)
447 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
448 }
449 }
450}
451
452TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_4) {
453 TransposeMicrokernelTester()
454 .input_stride(4)
455 .output_stride(2)
456 .block_width(4)
457 .block_height(2)
458 .iterations(1)
459 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
460}
461
462TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_3_4) {
463 for(size_t i = 3; i < 4; ++i){
464 TransposeMicrokernelTester()
465 .input_stride(i)
466 .output_stride(2)
467 .block_width(i)
468 .block_height(2)
469 .iterations(1)
470 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
471 }
472}
473
474TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_3_4) {
475 for(size_t i = 3; i < 4; ++i){
476 TransposeMicrokernelTester()
477 .input_stride(i)
478 .output_stride(4)
479 .block_width(i)
480 .block_height(4)
481 .iterations(1)
482 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
483 }
484}
485
486TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_2) {
487 TransposeMicrokernelTester()
488 .input_stride(2)
489 .output_stride(4)
490 .block_width(2)
491 .block_height(4)
492 .iterations(1)
493 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
494}
495
496TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_2){
497 for(size_t i = 3; i < 4; ++i){
498 TransposeMicrokernelTester()
499 .input_stride(2)
500 .output_stride(i)
501 .block_width(2)
502 .block_height(i)
503 .iterations(1)
504 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
505 }
506}
507
508TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_4){
509 for(size_t i = 3; i < 4; ++i){
510 TransposeMicrokernelTester()
511 .input_stride(4)
512 .output_stride(i)
513 .block_width(4)
514 .block_height(i)
515 .iterations(1)
516 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
517 }
518}
519
520TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_3_4) {
521 for(size_t i = 3; i < 4; ++i){
522 for(size_t j = 3; j < 4; ++j){
523 TransposeMicrokernelTester()
524 .input_stride(j)
525 .output_stride(i)
526 .block_width(j)
527 .block_height(i)
528 .iterations(1)
529 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
530 }
531 }
532}
533
534TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4) {
535 TransposeMicrokernelTester()
536 .input_stride(4)
537 .output_stride(2)
538 .block_width(2)
539 .block_height(2)
540 .iterations(1)
541 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
542}
543
544TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_os_4) {
545 TransposeMicrokernelTester()
546 .input_stride(2)
547 .output_stride(4)
548 .block_width(2)
549 .block_height(2)
550 .iterations(1)
551 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
552}
553
554TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4_os_4) {
555 TransposeMicrokernelTester()
556 .input_stride(4)
557 .output_stride(4)
558 .block_width(2)
559 .block_height(2)
560 .iterations(1)
561 .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
562}
563
564TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4) {
565 TransposeMicrokernelTester()
566 .input_stride(4)
567 .output_stride(2)
568 .block_width(4)
569 .block_height(2)
570 .iterations(1)
571 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
572}
573
574TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_1_4_bw_1_8) {
575 for(size_t i = 1; i <= 4; ++i){
576 for(size_t j = 1; j <= 8; ++j){
577 TransposeMicrokernelTester()
578 .input_stride(j)
579 .output_stride(i)
580 .block_width(j)
581 .block_height(i)
582 .iterations(1)
583 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
584 }
585 }
586}
587
588TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_8) {
589 TransposeMicrokernelTester()
590 .input_stride(8)
591 .output_stride(2)
592 .block_width(8)
593 .block_height(2)
594 .iterations(1)
595 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
596}
597
598TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_5_8) {
599 for(size_t i = 5; i < 8; ++i){
600 TransposeMicrokernelTester()
601 .input_stride(i)
602 .output_stride(2)
603 .block_width(i)
604 .block_height(2)
605 .iterations(1)
606 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
607 }
608}
609
610TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_5_8) {
611 for(size_t i = 5; i < 8; ++i){
612 TransposeMicrokernelTester()
613 .input_stride(i)
614 .output_stride(4)
615 .block_width(i)
616 .block_height(4)
617 .iterations(1)
618 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
619 }
620}
621
622TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_4) {
623 TransposeMicrokernelTester()
624 .input_stride(4)
625 .output_stride(4)
626 .block_width(4)
627 .block_height(4)
628 .iterations(1)
629 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
630}
631
632TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_4){
633 for(size_t i = 3; i < 4; ++i){
634 TransposeMicrokernelTester()
635 .input_stride(4)
636 .output_stride(i)
637 .block_width(4)
638 .block_height(i)
639 .iterations(1)
640 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
641 }
642}
643
644TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_8){
645 for(size_t i = 3; i < 4; ++i){
646 TransposeMicrokernelTester()
647 .input_stride(8)
648 .output_stride(i)
649 .block_width(8)
650 .block_height(i)
651 .iterations(1)
652 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
653 }
654}
655
656TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_5_8) {
657 for(size_t i = 3; i < 4; ++i){
658 for(size_t j = 5; j < 8; ++j){
659 TransposeMicrokernelTester()
660 .input_stride(j)
661 .output_stride(i)
662 .block_width(j)
663 .block_height(i)
664 .iterations(1)
665 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
666 }
667 }
668}
669
670TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8) {
671 TransposeMicrokernelTester()
672 .input_stride(8)
673 .output_stride(2)
674 .block_width(4)
675 .block_height(2)
676 .iterations(1)
677 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
678}
679
680TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_os_4) {
681 TransposeMicrokernelTester()
682 .input_stride(4)
683 .output_stride(4)
684 .block_width(4)
685 .block_height(2)
686 .iterations(1)
687 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
688}
689
690TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8_os_4) {
691 TransposeMicrokernelTester()
692 .input_stride(8)
693 .output_stride(4)
694 .block_width(4)
695 .block_height(2)
696 .iterations(1)
697 .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
698}
699
700TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1) {
701 TransposeMicrokernelTester()
702 .input_stride(1)
703 .output_stride(4)
704 .block_width(1)
705 .block_height(4)
706 .iterations(1)
707 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
708}
709
710TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_1_8_bw_1_2) {
711 for(size_t i = 1; i <= 8; ++i){
712 for(size_t j = 1; j <= 2; ++j){
713 TransposeMicrokernelTester()
714 .input_stride(j)
715 .output_stride(i)
716 .block_width(j)
717 .block_height(i)
718 .iterations(1)
719 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
720 }
721 }
722}
723
724TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2) {
725 TransposeMicrokernelTester()
726 .input_stride(2)
727 .output_stride(4)
728 .block_width(2)
729 .block_height(4)
730 .iterations(1)
731 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
732}
733
734TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2_2) {
735 for(size_t i = 2; i < 2; ++i){
736 TransposeMicrokernelTester()
737 .input_stride(i)
738 .output_stride(4)
739 .block_width(i)
740 .block_height(4)
741 .iterations(1)
742 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
743 }
744}
745
746TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_2_2) {
747 for(size_t i = 2; i < 2; ++i){
748 TransposeMicrokernelTester()
749 .input_stride(i)
750 .output_stride(8)
751 .block_width(i)
752 .block_height(8)
753 .iterations(1)
754 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
755 }
756}
757
758TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_1) {
759 TransposeMicrokernelTester()
760 .input_stride(1)
761 .output_stride(8)
762 .block_width(1)
763 .block_height(8)
764 .iterations(1)
765 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
766}
767
768TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_1){
769 for(size_t i = 5; i < 8; ++i){
770 TransposeMicrokernelTester()
771 .input_stride(1)
772 .output_stride(i)
773 .block_width(1)
774 .block_height(i)
775 .iterations(1)
776 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
777 }
778}
779
780TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2){
781 for(size_t i = 5; i < 8; ++i){
782 TransposeMicrokernelTester()
783 .input_stride(2)
784 .output_stride(i)
785 .block_width(2)
786 .block_height(i)
787 .iterations(1)
788 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
789 }
790}
791
792TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2_2) {
793 for(size_t i = 5; i < 8; ++i){
794 for(size_t j = 2; j < 2; ++j){
795 TransposeMicrokernelTester()
796 .input_stride(j)
797 .output_stride(i)
798 .block_width(j)
799 .block_height(i)
800 .iterations(1)
801 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
802 }
803 }
804}
805
806TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2) {
807 TransposeMicrokernelTester()
808 .input_stride(2)
809 .output_stride(4)
810 .block_width(1)
811 .block_height(4)
812 .iterations(1)
813 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
814}
815
816TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_os_8) {
817 TransposeMicrokernelTester()
818 .input_stride(1)
819 .output_stride(8)
820 .block_width(1)
821 .block_height(4)
822 .iterations(1)
823 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
824}
825
826TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2_os_8) {
827 TransposeMicrokernelTester()
828 .input_stride(2)
829 .output_stride(8)
830 .block_width(1)
831 .block_height(4)
832 .iterations(1)
833 .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
834}
835
836TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2) {
837 TransposeMicrokernelTester()
838 .input_stride(2)
839 .output_stride(4)
840 .block_width(2)
841 .block_height(4)
842 .iterations(1)
843 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
844}
845
846TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_1_8_bw_1_4) {
847 for(size_t i = 1; i <= 8; ++i){
848 for(size_t j = 1; j <= 4; ++j){
849 TransposeMicrokernelTester()
850 .input_stride(j)
851 .output_stride(i)
852 .block_width(j)
853 .block_height(i)
854 .iterations(1)
855 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
856 }
857 }
858}
859
860TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_4) {
861 TransposeMicrokernelTester()
862 .input_stride(4)
863 .output_stride(4)
864 .block_width(4)
865 .block_height(4)
866 .iterations(1)
867 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
868}
869
870TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_3_4) {
871 for(size_t i = 3; i < 4; ++i){
872 TransposeMicrokernelTester()
873 .input_stride(i)
874 .output_stride(4)
875 .block_width(i)
876 .block_height(4)
877 .iterations(1)
878 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
879 }
880}
881
882TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_3_4) {
883 for(size_t i = 3; i < 4; ++i){
884 TransposeMicrokernelTester()
885 .input_stride(i)
886 .output_stride(8)
887 .block_width(i)
888 .block_height(8)
889 .iterations(1)
890 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
891 }
892}
893
894TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_2) {
895 TransposeMicrokernelTester()
896 .input_stride(2)
897 .output_stride(8)
898 .block_width(2)
899 .block_height(8)
900 .iterations(1)
901 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
902}
903
904TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_2){
905 for(size_t i = 5; i < 8; ++i){
906 TransposeMicrokernelTester()
907 .input_stride(2)
908 .output_stride(i)
909 .block_width(2)
910 .block_height(i)
911 .iterations(1)
912 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
913 }
914}
915
916TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_4){
917 for(size_t i = 5; i < 8; ++i){
918 TransposeMicrokernelTester()
919 .input_stride(4)
920 .output_stride(i)
921 .block_width(4)
922 .block_height(i)
923 .iterations(1)
924 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
925 }
926}
927
928TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_3_4) {
929 for(size_t i = 5; i < 8; ++i){
930 for(size_t j = 3; j < 4; ++j){
931 TransposeMicrokernelTester()
932 .input_stride(j)
933 .output_stride(i)
934 .block_width(j)
935 .block_height(i)
936 .iterations(1)
937 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
938 }
939 }
940}
941
942TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4) {
943 TransposeMicrokernelTester()
944 .input_stride(4)
945 .output_stride(4)
946 .block_width(2)
947 .block_height(4)
948 .iterations(1)
949 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
950}
951
952TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_os_8) {
953 TransposeMicrokernelTester()
954 .input_stride(2)
955 .output_stride(8)
956 .block_width(2)
957 .block_height(4)
958 .iterations(1)
959 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
960}
961
962TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4_os_8) {
963 TransposeMicrokernelTester()
964 .input_stride(4)
965 .output_stride(8)
966 .block_width(2)
967 .block_height(4)
968 .iterations(1)
969 .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
970}
971
972TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4) {
973 TransposeMicrokernelTester()
974 .input_stride(4)
975 .output_stride(4)
976 .block_width(4)
977 .block_height(4)
978 .iterations(1)
979 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
980}
981
982TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_1_8_bw_1_8) {
983 for(size_t i = 1; i <= 8; ++i){
984 for(size_t j = 1; j <= 8; ++j){
985 TransposeMicrokernelTester()
986 .input_stride(j)
987 .output_stride(i)
988 .block_width(j)
989 .block_height(i)
990 .iterations(1)
991 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
992 }
993 }
994}
995
996TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_8) {
997 TransposeMicrokernelTester()
998 .input_stride(8)
999 .output_stride(4)
1000 .block_width(8)
1001 .block_height(4)
1002 .iterations(1)
1003 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1004}
1005
1006TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_5_8) {
1007 for(size_t i = 5; i < 8; ++i){
1008 TransposeMicrokernelTester()
1009 .input_stride(i)
1010 .output_stride(4)
1011 .block_width(i)
1012 .block_height(4)
1013 .iterations(1)
1014 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1015 }
1016}
1017
1018TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_5_8) {
1019 for(size_t i = 5; i < 8; ++i){
1020 TransposeMicrokernelTester()
1021 .input_stride(i)
1022 .output_stride(8)
1023 .block_width(i)
1024 .block_height(8)
1025 .iterations(1)
1026 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1027 }
1028}
1029
1030TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_4) {
1031 TransposeMicrokernelTester()
1032 .input_stride(4)
1033 .output_stride(8)
1034 .block_width(4)
1035 .block_height(8)
1036 .iterations(1)
1037 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1038}
1039
1040TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_4){
1041 for(size_t i = 5; i < 8; ++i){
1042 TransposeMicrokernelTester()
1043 .input_stride(4)
1044 .output_stride(i)
1045 .block_width(4)
1046 .block_height(i)
1047 .iterations(1)
1048 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1049 }
1050}
1051
1052TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_8){
1053 for(size_t i = 5; i < 8; ++i){
1054 TransposeMicrokernelTester()
1055 .input_stride(8)
1056 .output_stride(i)
1057 .block_width(8)
1058 .block_height(i)
1059 .iterations(1)
1060 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1061 }
1062}
1063
1064TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_5_8) {
1065 for(size_t i = 5; i < 8; ++i){
1066 for(size_t j = 5; j < 8; ++j){
1067 TransposeMicrokernelTester()
1068 .input_stride(j)
1069 .output_stride(i)
1070 .block_width(j)
1071 .block_height(i)
1072 .iterations(1)
1073 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1074 }
1075 }
1076}
1077
1078TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8) {
1079 TransposeMicrokernelTester()
1080 .input_stride(8)
1081 .output_stride(4)
1082 .block_width(4)
1083 .block_height(4)
1084 .iterations(1)
1085 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1086}
1087
1088TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_os_8) {
1089 TransposeMicrokernelTester()
1090 .input_stride(4)
1091 .output_stride(8)
1092 .block_width(4)
1093 .block_height(4)
1094 .iterations(1)
1095 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1096}
1097
1098TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8_os_8) {
1099 TransposeMicrokernelTester()
1100 .input_stride(8)
1101 .output_stride(8)
1102 .block_width(4)
1103 .block_height(4)
1104 .iterations(1)
1105 .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1106}
1107
Alan Kelly1945f0b2021-12-24 01:26:45 -08001108#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1109 TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_8) {
1110 TEST_REQUIRES_X86_SSE2;
1111 TransposeMicrokernelTester()
1112 .input_stride(8)
1113 .output_stride(4)
1114 .block_width(8)
1115 .block_height(4)
1116 .iterations(1)
1117 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1118 }
1119
1120 TEST(X16_TRANSPOSE__4X8_SSE2, bh_1_8_bw_1_16) {
1121 TEST_REQUIRES_X86_SSE2;
1122 for(size_t i = 1; i <= 8; ++i){
1123 for(size_t j = 1; j <= 16; ++j){
1124 TransposeMicrokernelTester()
1125 .input_stride(j)
1126 .output_stride(i)
1127 .block_width(j)
1128 .block_height(i)
1129 .iterations(1)
1130 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1131 }
1132 }
1133 }
1134
1135 TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_16) {
1136 TEST_REQUIRES_X86_SSE2;
1137 TransposeMicrokernelTester()
1138 .input_stride(16)
1139 .output_stride(4)
1140 .block_width(16)
1141 .block_height(4)
1142 .iterations(1)
1143 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1144 }
1145
1146 TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_9_16) {
1147 TEST_REQUIRES_X86_SSE2;
1148 for(size_t i = 9; i < 16; ++i){
1149 TransposeMicrokernelTester()
1150 .input_stride(i)
1151 .output_stride(4)
1152 .block_width(i)
1153 .block_height(4)
1154 .iterations(1)
1155 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1156 }
1157 }
1158
1159 TEST(X16_TRANSPOSE__4X8_SSE2, bh_8_bw_9_16) {
1160 TEST_REQUIRES_X86_SSE2;
1161 for(size_t i = 9; i < 16; ++i){
1162 TransposeMicrokernelTester()
1163 .input_stride(i)
1164 .output_stride(8)
1165 .block_width(i)
1166 .block_height(8)
1167 .iterations(1)
1168 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1169 }
1170 }
1171
1172 TEST(X16_TRANSPOSE__4X8_SSE2, bh_8_bw_8) {
1173 TEST_REQUIRES_X86_SSE2;
1174 TransposeMicrokernelTester()
1175 .input_stride(8)
1176 .output_stride(8)
1177 .block_width(8)
1178 .block_height(8)
1179 .iterations(1)
1180 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1181 }
1182
1183 TEST(X16_TRANSPOSE__4X8_SSE2, bh_5_8_bw_8){
1184 TEST_REQUIRES_X86_SSE2;
1185 for(size_t i = 5; i < 8; ++i){
1186 TransposeMicrokernelTester()
1187 .input_stride(8)
1188 .output_stride(i)
1189 .block_width(8)
1190 .block_height(i)
1191 .iterations(1)
1192 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1193 }
1194 }
1195
1196 TEST(X16_TRANSPOSE__4X8_SSE2, bh_5_8_bw_16){
1197 TEST_REQUIRES_X86_SSE2;
1198 for(size_t i = 5; i < 8; ++i){
1199 TransposeMicrokernelTester()
1200 .input_stride(16)
1201 .output_stride(i)
1202 .block_width(16)
1203 .block_height(i)
1204 .iterations(1)
1205 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1206 }
1207 }
1208
1209 TEST(X16_TRANSPOSE__4X8_SSE2, bh_5_8_bw_9_16) {
1210 TEST_REQUIRES_X86_SSE2;
1211 for(size_t i = 5; i < 8; ++i){
1212 for(size_t j = 9; j < 16; ++j){
1213 TransposeMicrokernelTester()
1214 .input_stride(j)
1215 .output_stride(i)
1216 .block_width(j)
1217 .block_height(i)
1218 .iterations(1)
1219 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1220 }
1221 }
1222 }
1223
1224 TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_8_is_16) {
1225 TEST_REQUIRES_X86_SSE2;
1226 TransposeMicrokernelTester()
1227 .input_stride(16)
1228 .output_stride(4)
1229 .block_width(8)
1230 .block_height(4)
1231 .iterations(1)
1232 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1233 }
1234
1235 TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_8_os_8) {
1236 TEST_REQUIRES_X86_SSE2;
1237 TransposeMicrokernelTester()
1238 .input_stride(8)
1239 .output_stride(8)
1240 .block_width(8)
1241 .block_height(4)
1242 .iterations(1)
1243 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1244 }
1245
1246 TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_8_is_16_os_8) {
1247 TEST_REQUIRES_X86_SSE2;
1248 TransposeMicrokernelTester()
1249 .input_stride(16)
1250 .output_stride(8)
1251 .block_width(8)
1252 .block_height(4)
1253 .iterations(1)
1254 .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1255 }
1256#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kelly5da6d382022-01-14 03:19:43 -08001257
1258
1259#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kellyf2b233b2022-01-31 02:53:57 -08001260 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_8) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001261 TEST_REQUIRES_X86_SSE2;
1262 TransposeMicrokernelTester()
1263 .input_stride(8)
1264 .output_stride(8)
1265 .block_width(8)
1266 .block_height(8)
1267 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001268 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001269 }
1270
Alan Kellyf2b233b2022-01-31 02:53:57 -08001271 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_1_16_bw_1_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001272 TEST_REQUIRES_X86_SSE2;
1273 for(size_t i = 1; i <= 16; ++i){
1274 for(size_t j = 1; j <= 16; ++j){
1275 TransposeMicrokernelTester()
1276 .input_stride(j)
1277 .output_stride(i)
1278 .block_width(j)
1279 .block_height(i)
1280 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001281 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001282 }
1283 }
1284 }
1285
Alan Kellyf2b233b2022-01-31 02:53:57 -08001286 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001287 TEST_REQUIRES_X86_SSE2;
1288 TransposeMicrokernelTester()
1289 .input_stride(16)
1290 .output_stride(8)
1291 .block_width(16)
1292 .block_height(8)
1293 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001294 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001295 }
1296
Alan Kellyf2b233b2022-01-31 02:53:57 -08001297 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_9_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001298 TEST_REQUIRES_X86_SSE2;
1299 for(size_t i = 9; i < 16; ++i){
1300 TransposeMicrokernelTester()
1301 .input_stride(i)
1302 .output_stride(8)
1303 .block_width(i)
1304 .block_height(8)
1305 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001306 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001307 }
1308 }
1309
Alan Kellyf2b233b2022-01-31 02:53:57 -08001310 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_16_bw_9_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001311 TEST_REQUIRES_X86_SSE2;
1312 for(size_t i = 9; i < 16; ++i){
1313 TransposeMicrokernelTester()
1314 .input_stride(i)
1315 .output_stride(16)
1316 .block_width(i)
1317 .block_height(16)
1318 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001319 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001320 }
1321 }
1322
Alan Kellyf2b233b2022-01-31 02:53:57 -08001323 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_16_bw_8) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001324 TEST_REQUIRES_X86_SSE2;
1325 TransposeMicrokernelTester()
1326 .input_stride(8)
1327 .output_stride(16)
1328 .block_width(8)
1329 .block_height(16)
1330 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001331 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001332 }
1333
Alan Kellyf2b233b2022-01-31 02:53:57 -08001334 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_9_16_bw_8){
Alan Kelly5da6d382022-01-14 03:19:43 -08001335 TEST_REQUIRES_X86_SSE2;
1336 for(size_t i = 9; i < 16; ++i){
1337 TransposeMicrokernelTester()
1338 .input_stride(8)
1339 .output_stride(i)
1340 .block_width(8)
1341 .block_height(i)
1342 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001343 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001344 }
1345 }
1346
Alan Kellyf2b233b2022-01-31 02:53:57 -08001347 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_9_16_bw_16){
Alan Kelly5da6d382022-01-14 03:19:43 -08001348 TEST_REQUIRES_X86_SSE2;
1349 for(size_t i = 9; i < 16; ++i){
1350 TransposeMicrokernelTester()
1351 .input_stride(16)
1352 .output_stride(i)
1353 .block_width(16)
1354 .block_height(i)
1355 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001356 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001357 }
1358 }
1359
Alan Kellyf2b233b2022-01-31 02:53:57 -08001360 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_9_16_bw_9_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001361 TEST_REQUIRES_X86_SSE2;
1362 for(size_t i = 9; i < 16; ++i){
1363 for(size_t j = 9; j < 16; ++j){
1364 TransposeMicrokernelTester()
1365 .input_stride(j)
1366 .output_stride(i)
1367 .block_width(j)
1368 .block_height(i)
1369 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001370 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001371 }
1372 }
1373 }
1374
Alan Kellyf2b233b2022-01-31 02:53:57 -08001375 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_8_is_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001376 TEST_REQUIRES_X86_SSE2;
1377 TransposeMicrokernelTester()
1378 .input_stride(16)
1379 .output_stride(8)
1380 .block_width(8)
1381 .block_height(8)
1382 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001383 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001384 }
1385
Alan Kellyf2b233b2022-01-31 02:53:57 -08001386 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_8_os_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001387 TEST_REQUIRES_X86_SSE2;
1388 TransposeMicrokernelTester()
1389 .input_stride(8)
1390 .output_stride(16)
1391 .block_width(8)
1392 .block_height(8)
1393 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001394 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001395 }
1396
Alan Kellyf2b233b2022-01-31 02:53:57 -08001397 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_8_is_16_os_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001398 TEST_REQUIRES_X86_SSE2;
1399 TransposeMicrokernelTester()
1400 .input_stride(16)
1401 .output_stride(16)
1402 .block_width(8)
1403 .block_height(8)
1404 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001405 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001406 }
1407#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1408
1409
1410#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1411 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_8) {
1412 TEST_REQUIRES_X86_SSE2;
1413 TransposeMicrokernelTester()
1414 .input_stride(8)
1415 .output_stride(8)
1416 .block_width(8)
1417 .block_height(8)
1418 .iterations(1)
1419 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1420 }
1421
1422 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_1_16_bw_1_16) {
1423 TEST_REQUIRES_X86_SSE2;
1424 for(size_t i = 1; i <= 16; ++i){
1425 for(size_t j = 1; j <= 16; ++j){
1426 TransposeMicrokernelTester()
1427 .input_stride(j)
1428 .output_stride(i)
1429 .block_width(j)
1430 .block_height(i)
1431 .iterations(1)
1432 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1433 }
1434 }
1435 }
1436
1437 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_16) {
1438 TEST_REQUIRES_X86_SSE2;
1439 TransposeMicrokernelTester()
1440 .input_stride(16)
1441 .output_stride(8)
1442 .block_width(16)
1443 .block_height(8)
1444 .iterations(1)
1445 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1446 }
1447
1448 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_9_16) {
1449 TEST_REQUIRES_X86_SSE2;
1450 for(size_t i = 9; i < 16; ++i){
1451 TransposeMicrokernelTester()
1452 .input_stride(i)
1453 .output_stride(8)
1454 .block_width(i)
1455 .block_height(8)
1456 .iterations(1)
1457 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1458 }
1459 }
1460
1461 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_16_bw_9_16) {
1462 TEST_REQUIRES_X86_SSE2;
1463 for(size_t i = 9; i < 16; ++i){
1464 TransposeMicrokernelTester()
1465 .input_stride(i)
1466 .output_stride(16)
1467 .block_width(i)
1468 .block_height(16)
1469 .iterations(1)
1470 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1471 }
1472 }
1473
1474 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_16_bw_8) {
1475 TEST_REQUIRES_X86_SSE2;
1476 TransposeMicrokernelTester()
1477 .input_stride(8)
1478 .output_stride(16)
1479 .block_width(8)
1480 .block_height(16)
1481 .iterations(1)
1482 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1483 }
1484
1485 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_9_16_bw_8){
1486 TEST_REQUIRES_X86_SSE2;
1487 for(size_t i = 9; i < 16; ++i){
1488 TransposeMicrokernelTester()
1489 .input_stride(8)
1490 .output_stride(i)
1491 .block_width(8)
1492 .block_height(i)
1493 .iterations(1)
1494 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1495 }
1496 }
1497
1498 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_9_16_bw_16){
1499 TEST_REQUIRES_X86_SSE2;
1500 for(size_t i = 9; i < 16; ++i){
1501 TransposeMicrokernelTester()
1502 .input_stride(16)
1503 .output_stride(i)
1504 .block_width(16)
1505 .block_height(i)
1506 .iterations(1)
1507 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1508 }
1509 }
1510
1511 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_9_16_bw_9_16) {
1512 TEST_REQUIRES_X86_SSE2;
1513 for(size_t i = 9; i < 16; ++i){
1514 for(size_t j = 9; j < 16; ++j){
1515 TransposeMicrokernelTester()
1516 .input_stride(j)
1517 .output_stride(i)
1518 .block_width(j)
1519 .block_height(i)
1520 .iterations(1)
1521 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1522 }
1523 }
1524 }
1525
1526 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_8_is_16) {
1527 TEST_REQUIRES_X86_SSE2;
1528 TransposeMicrokernelTester()
1529 .input_stride(16)
1530 .output_stride(8)
1531 .block_width(8)
1532 .block_height(8)
1533 .iterations(1)
1534 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1535 }
1536
1537 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_8_os_16) {
1538 TEST_REQUIRES_X86_SSE2;
1539 TransposeMicrokernelTester()
1540 .input_stride(8)
1541 .output_stride(16)
1542 .block_width(8)
1543 .block_height(8)
1544 .iterations(1)
1545 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1546 }
1547
1548 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_8_is_16_os_16) {
1549 TEST_REQUIRES_X86_SSE2;
1550 TransposeMicrokernelTester()
1551 .input_stride(16)
1552 .output_stride(16)
1553 .block_width(8)
1554 .block_height(8)
1555 .iterations(1)
1556 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1557 }
1558#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1559
1560
1561#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kellyf2b233b2022-01-31 02:53:57 -08001562 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_8) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001563 TEST_REQUIRES_X86_SSE2;
1564 TransposeMicrokernelTester()
1565 .input_stride(8)
1566 .output_stride(8)
1567 .block_width(8)
1568 .block_height(8)
1569 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001570 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001571 }
1572
Alan Kellyf2b233b2022-01-31 02:53:57 -08001573 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_1_16_bw_1_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001574 TEST_REQUIRES_X86_SSE2;
1575 for(size_t i = 1; i <= 16; ++i){
1576 for(size_t j = 1; j <= 16; ++j){
1577 TransposeMicrokernelTester()
1578 .input_stride(j)
1579 .output_stride(i)
1580 .block_width(j)
1581 .block_height(i)
1582 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001583 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001584 }
1585 }
1586 }
1587
Alan Kellyf2b233b2022-01-31 02:53:57 -08001588 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001589 TEST_REQUIRES_X86_SSE2;
1590 TransposeMicrokernelTester()
1591 .input_stride(16)
1592 .output_stride(8)
1593 .block_width(16)
1594 .block_height(8)
1595 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001596 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001597 }
1598
Alan Kellyf2b233b2022-01-31 02:53:57 -08001599 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_9_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001600 TEST_REQUIRES_X86_SSE2;
1601 for(size_t i = 9; i < 16; ++i){
1602 TransposeMicrokernelTester()
1603 .input_stride(i)
1604 .output_stride(8)
1605 .block_width(i)
1606 .block_height(8)
1607 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001608 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001609 }
1610 }
1611
Alan Kellyf2b233b2022-01-31 02:53:57 -08001612 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_16_bw_9_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001613 TEST_REQUIRES_X86_SSE2;
1614 for(size_t i = 9; i < 16; ++i){
1615 TransposeMicrokernelTester()
1616 .input_stride(i)
1617 .output_stride(16)
1618 .block_width(i)
1619 .block_height(16)
1620 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001621 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001622 }
1623 }
1624
Alan Kellyf2b233b2022-01-31 02:53:57 -08001625 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_16_bw_8) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001626 TEST_REQUIRES_X86_SSE2;
1627 TransposeMicrokernelTester()
1628 .input_stride(8)
1629 .output_stride(16)
1630 .block_width(8)
1631 .block_height(16)
1632 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001633 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001634 }
1635
Alan Kellyf2b233b2022-01-31 02:53:57 -08001636 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_9_16_bw_8){
Alan Kelly5da6d382022-01-14 03:19:43 -08001637 TEST_REQUIRES_X86_SSE2;
1638 for(size_t i = 9; i < 16; ++i){
1639 TransposeMicrokernelTester()
1640 .input_stride(8)
1641 .output_stride(i)
1642 .block_width(8)
1643 .block_height(i)
1644 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001645 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001646 }
1647 }
1648
Alan Kellyf2b233b2022-01-31 02:53:57 -08001649 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_9_16_bw_16){
Alan Kelly5da6d382022-01-14 03:19:43 -08001650 TEST_REQUIRES_X86_SSE2;
1651 for(size_t i = 9; i < 16; ++i){
1652 TransposeMicrokernelTester()
1653 .input_stride(16)
1654 .output_stride(i)
1655 .block_width(16)
1656 .block_height(i)
1657 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001658 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001659 }
1660 }
1661
Alan Kellyf2b233b2022-01-31 02:53:57 -08001662 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_9_16_bw_9_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001663 TEST_REQUIRES_X86_SSE2;
1664 for(size_t i = 9; i < 16; ++i){
1665 for(size_t j = 9; j < 16; ++j){
1666 TransposeMicrokernelTester()
1667 .input_stride(j)
1668 .output_stride(i)
1669 .block_width(j)
1670 .block_height(i)
1671 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001672 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001673 }
1674 }
1675 }
1676
Alan Kellyf2b233b2022-01-31 02:53:57 -08001677 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_8_is_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001678 TEST_REQUIRES_X86_SSE2;
1679 TransposeMicrokernelTester()
1680 .input_stride(16)
1681 .output_stride(8)
1682 .block_width(8)
1683 .block_height(8)
1684 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001685 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001686 }
1687
Alan Kellyf2b233b2022-01-31 02:53:57 -08001688 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_8_os_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001689 TEST_REQUIRES_X86_SSE2;
1690 TransposeMicrokernelTester()
1691 .input_stride(8)
1692 .output_stride(16)
1693 .block_width(8)
1694 .block_height(8)
1695 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001696 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001697 }
1698
Alan Kellyf2b233b2022-01-31 02:53:57 -08001699 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_8_is_16_os_16) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001700 TEST_REQUIRES_X86_SSE2;
1701 TransposeMicrokernelTester()
1702 .input_stride(16)
1703 .output_stride(16)
1704 .block_width(8)
1705 .block_height(8)
1706 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001707 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001708 }
1709#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1710
1711
1712#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1713 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_8) {
1714 TEST_REQUIRES_X86_SSE2;
1715 TransposeMicrokernelTester()
1716 .input_stride(8)
1717 .output_stride(8)
1718 .block_width(8)
1719 .block_height(8)
1720 .iterations(1)
1721 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1722 }
1723
1724 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_1_16_bw_1_16) {
1725 TEST_REQUIRES_X86_SSE2;
1726 for(size_t i = 1; i <= 16; ++i){
1727 for(size_t j = 1; j <= 16; ++j){
1728 TransposeMicrokernelTester()
1729 .input_stride(j)
1730 .output_stride(i)
1731 .block_width(j)
1732 .block_height(i)
1733 .iterations(1)
1734 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1735 }
1736 }
1737 }
1738
1739 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_16) {
1740 TEST_REQUIRES_X86_SSE2;
1741 TransposeMicrokernelTester()
1742 .input_stride(16)
1743 .output_stride(8)
1744 .block_width(16)
1745 .block_height(8)
1746 .iterations(1)
1747 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1748 }
1749
1750 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_9_16) {
1751 TEST_REQUIRES_X86_SSE2;
1752 for(size_t i = 9; i < 16; ++i){
1753 TransposeMicrokernelTester()
1754 .input_stride(i)
1755 .output_stride(8)
1756 .block_width(i)
1757 .block_height(8)
1758 .iterations(1)
1759 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1760 }
1761 }
1762
1763 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_16_bw_9_16) {
1764 TEST_REQUIRES_X86_SSE2;
1765 for(size_t i = 9; i < 16; ++i){
1766 TransposeMicrokernelTester()
1767 .input_stride(i)
1768 .output_stride(16)
1769 .block_width(i)
1770 .block_height(16)
1771 .iterations(1)
1772 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1773 }
1774 }
1775
1776 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_16_bw_8) {
1777 TEST_REQUIRES_X86_SSE2;
1778 TransposeMicrokernelTester()
1779 .input_stride(8)
1780 .output_stride(16)
1781 .block_width(8)
1782 .block_height(16)
1783 .iterations(1)
1784 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1785 }
1786
1787 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_9_16_bw_8){
1788 TEST_REQUIRES_X86_SSE2;
1789 for(size_t i = 9; i < 16; ++i){
1790 TransposeMicrokernelTester()
1791 .input_stride(8)
1792 .output_stride(i)
1793 .block_width(8)
1794 .block_height(i)
1795 .iterations(1)
1796 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1797 }
1798 }
1799
1800 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_9_16_bw_16){
1801 TEST_REQUIRES_X86_SSE2;
1802 for(size_t i = 9; i < 16; ++i){
1803 TransposeMicrokernelTester()
1804 .input_stride(16)
1805 .output_stride(i)
1806 .block_width(16)
1807 .block_height(i)
1808 .iterations(1)
1809 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1810 }
1811 }
1812
1813 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_9_16_bw_9_16) {
1814 TEST_REQUIRES_X86_SSE2;
1815 for(size_t i = 9; i < 16; ++i){
1816 for(size_t j = 9; j < 16; ++j){
1817 TransposeMicrokernelTester()
1818 .input_stride(j)
1819 .output_stride(i)
1820 .block_width(j)
1821 .block_height(i)
1822 .iterations(1)
1823 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1824 }
1825 }
1826 }
1827
1828 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_8_is_16) {
1829 TEST_REQUIRES_X86_SSE2;
1830 TransposeMicrokernelTester()
1831 .input_stride(16)
1832 .output_stride(8)
1833 .block_width(8)
1834 .block_height(8)
1835 .iterations(1)
1836 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1837 }
1838
1839 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_8_os_16) {
1840 TEST_REQUIRES_X86_SSE2;
1841 TransposeMicrokernelTester()
1842 .input_stride(8)
1843 .output_stride(16)
1844 .block_width(8)
1845 .block_height(8)
1846 .iterations(1)
1847 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1848 }
1849
1850 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_8_is_16_os_16) {
1851 TEST_REQUIRES_X86_SSE2;
1852 TransposeMicrokernelTester()
1853 .input_stride(16)
1854 .output_stride(16)
1855 .block_width(8)
1856 .block_height(8)
1857 .iterations(1)
1858 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1859 }
1860#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1861
1862
1863#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1864 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_8) {
1865 TEST_REQUIRES_X86_SSE2;
1866 TransposeMicrokernelTester()
1867 .input_stride(8)
1868 .output_stride(8)
1869 .block_width(8)
1870 .block_height(8)
1871 .iterations(1)
1872 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1873 }
1874
1875 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_1_16_bw_1_16) {
1876 TEST_REQUIRES_X86_SSE2;
1877 for(size_t i = 1; i <= 16; ++i){
1878 for(size_t j = 1; j <= 16; ++j){
1879 TransposeMicrokernelTester()
1880 .input_stride(j)
1881 .output_stride(i)
1882 .block_width(j)
1883 .block_height(i)
1884 .iterations(1)
1885 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1886 }
1887 }
1888 }
1889
1890 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_16) {
1891 TEST_REQUIRES_X86_SSE2;
1892 TransposeMicrokernelTester()
1893 .input_stride(16)
1894 .output_stride(8)
1895 .block_width(16)
1896 .block_height(8)
1897 .iterations(1)
1898 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1899 }
1900
1901 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_9_16) {
1902 TEST_REQUIRES_X86_SSE2;
1903 for(size_t i = 9; i < 16; ++i){
1904 TransposeMicrokernelTester()
1905 .input_stride(i)
1906 .output_stride(8)
1907 .block_width(i)
1908 .block_height(8)
1909 .iterations(1)
1910 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1911 }
1912 }
1913
1914 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_16_bw_9_16) {
1915 TEST_REQUIRES_X86_SSE2;
1916 for(size_t i = 9; i < 16; ++i){
1917 TransposeMicrokernelTester()
1918 .input_stride(i)
1919 .output_stride(16)
1920 .block_width(i)
1921 .block_height(16)
1922 .iterations(1)
1923 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1924 }
1925 }
1926
1927 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_16_bw_8) {
1928 TEST_REQUIRES_X86_SSE2;
1929 TransposeMicrokernelTester()
1930 .input_stride(8)
1931 .output_stride(16)
1932 .block_width(8)
1933 .block_height(16)
1934 .iterations(1)
1935 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1936 }
1937
1938 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_9_16_bw_8){
1939 TEST_REQUIRES_X86_SSE2;
1940 for(size_t i = 9; i < 16; ++i){
1941 TransposeMicrokernelTester()
1942 .input_stride(8)
1943 .output_stride(i)
1944 .block_width(8)
1945 .block_height(i)
1946 .iterations(1)
1947 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1948 }
1949 }
1950
1951 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_9_16_bw_16){
1952 TEST_REQUIRES_X86_SSE2;
1953 for(size_t i = 9; i < 16; ++i){
1954 TransposeMicrokernelTester()
1955 .input_stride(16)
1956 .output_stride(i)
1957 .block_width(16)
1958 .block_height(i)
1959 .iterations(1)
1960 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1961 }
1962 }
1963
1964 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_9_16_bw_9_16) {
1965 TEST_REQUIRES_X86_SSE2;
1966 for(size_t i = 9; i < 16; ++i){
1967 for(size_t j = 9; j < 16; ++j){
1968 TransposeMicrokernelTester()
1969 .input_stride(j)
1970 .output_stride(i)
1971 .block_width(j)
1972 .block_height(i)
1973 .iterations(1)
1974 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1975 }
1976 }
1977 }
1978
1979 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_8_is_16) {
1980 TEST_REQUIRES_X86_SSE2;
1981 TransposeMicrokernelTester()
1982 .input_stride(16)
1983 .output_stride(8)
1984 .block_width(8)
1985 .block_height(8)
1986 .iterations(1)
1987 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1988 }
1989
1990 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_8_os_16) {
1991 TEST_REQUIRES_X86_SSE2;
1992 TransposeMicrokernelTester()
1993 .input_stride(8)
1994 .output_stride(16)
1995 .block_width(8)
1996 .block_height(8)
1997 .iterations(1)
1998 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1999 }
2000
2001 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_8_is_16_os_16) {
2002 TEST_REQUIRES_X86_SSE2;
2003 TransposeMicrokernelTester()
2004 .input_stride(16)
2005 .output_stride(16)
2006 .block_width(8)
2007 .block_height(8)
2008 .iterations(1)
2009 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
2010 }
2011#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kellycfd947d2022-02-02 00:18:46 -08002012
2013
2014#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2015 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_8) {
2016 TEST_REQUIRES_ARM_NEON;
2017 TransposeMicrokernelTester()
2018 .input_stride(8)
2019 .output_stride(8)
2020 .block_width(8)
2021 .block_height(8)
2022 .iterations(1)
2023 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2024 }
2025
2026 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_1_16_bw_1_16) {
2027 TEST_REQUIRES_ARM_NEON;
2028 for(size_t i = 1; i <= 16; ++i){
2029 for(size_t j = 1; j <= 16; ++j){
2030 TransposeMicrokernelTester()
2031 .input_stride(j)
2032 .output_stride(i)
2033 .block_width(j)
2034 .block_height(i)
2035 .iterations(1)
2036 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2037 }
2038 }
2039 }
2040
2041 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_16) {
2042 TEST_REQUIRES_ARM_NEON;
2043 TransposeMicrokernelTester()
2044 .input_stride(16)
2045 .output_stride(8)
2046 .block_width(16)
2047 .block_height(8)
2048 .iterations(1)
2049 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2050 }
2051
2052 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_9_16) {
2053 TEST_REQUIRES_ARM_NEON;
2054 for(size_t i = 9; i < 16; ++i){
2055 TransposeMicrokernelTester()
2056 .input_stride(i)
2057 .output_stride(8)
2058 .block_width(i)
2059 .block_height(8)
2060 .iterations(1)
2061 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2062 }
2063 }
2064
2065 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_16_bw_9_16) {
2066 TEST_REQUIRES_ARM_NEON;
2067 for(size_t i = 9; i < 16; ++i){
2068 TransposeMicrokernelTester()
2069 .input_stride(i)
2070 .output_stride(16)
2071 .block_width(i)
2072 .block_height(16)
2073 .iterations(1)
2074 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2075 }
2076 }
2077
2078 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_16_bw_8) {
2079 TEST_REQUIRES_ARM_NEON;
2080 TransposeMicrokernelTester()
2081 .input_stride(8)
2082 .output_stride(16)
2083 .block_width(8)
2084 .block_height(16)
2085 .iterations(1)
2086 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2087 }
2088
2089 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_9_16_bw_8){
2090 TEST_REQUIRES_ARM_NEON;
2091 for(size_t i = 9; i < 16; ++i){
2092 TransposeMicrokernelTester()
2093 .input_stride(8)
2094 .output_stride(i)
2095 .block_width(8)
2096 .block_height(i)
2097 .iterations(1)
2098 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2099 }
2100 }
2101
2102 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_9_16_bw_16){
2103 TEST_REQUIRES_ARM_NEON;
2104 for(size_t i = 9; i < 16; ++i){
2105 TransposeMicrokernelTester()
2106 .input_stride(16)
2107 .output_stride(i)
2108 .block_width(16)
2109 .block_height(i)
2110 .iterations(1)
2111 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2112 }
2113 }
2114
2115 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_9_16_bw_9_16) {
2116 TEST_REQUIRES_ARM_NEON;
2117 for(size_t i = 9; i < 16; ++i){
2118 for(size_t j = 9; j < 16; ++j){
2119 TransposeMicrokernelTester()
2120 .input_stride(j)
2121 .output_stride(i)
2122 .block_width(j)
2123 .block_height(i)
2124 .iterations(1)
2125 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2126 }
2127 }
2128 }
2129
2130 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_8_is_16) {
2131 TEST_REQUIRES_ARM_NEON;
2132 TransposeMicrokernelTester()
2133 .input_stride(16)
2134 .output_stride(8)
2135 .block_width(8)
2136 .block_height(8)
2137 .iterations(1)
2138 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2139 }
2140
2141 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_8_os_16) {
2142 TEST_REQUIRES_ARM_NEON;
2143 TransposeMicrokernelTester()
2144 .input_stride(8)
2145 .output_stride(16)
2146 .block_width(8)
2147 .block_height(8)
2148 .iterations(1)
2149 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2150 }
2151
2152 TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2153 TEST_REQUIRES_ARM_NEON;
2154 TransposeMicrokernelTester()
2155 .input_stride(16)
2156 .output_stride(16)
2157 .block_width(8)
2158 .block_height(8)
2159 .iterations(1)
2160 .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2161 }
2162#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2163
2164
2165#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2166 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_8) {
2167 TEST_REQUIRES_ARM_NEON;
2168 TransposeMicrokernelTester()
2169 .input_stride(8)
2170 .output_stride(8)
2171 .block_width(8)
2172 .block_height(8)
2173 .iterations(1)
2174 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2175 }
2176
2177 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_1_16_bw_1_16) {
2178 TEST_REQUIRES_ARM_NEON;
2179 for(size_t i = 1; i <= 16; ++i){
2180 for(size_t j = 1; j <= 16; ++j){
2181 TransposeMicrokernelTester()
2182 .input_stride(j)
2183 .output_stride(i)
2184 .block_width(j)
2185 .block_height(i)
2186 .iterations(1)
2187 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2188 }
2189 }
2190 }
2191
2192 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_16) {
2193 TEST_REQUIRES_ARM_NEON;
2194 TransposeMicrokernelTester()
2195 .input_stride(16)
2196 .output_stride(8)
2197 .block_width(16)
2198 .block_height(8)
2199 .iterations(1)
2200 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2201 }
2202
2203 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_9_16) {
2204 TEST_REQUIRES_ARM_NEON;
2205 for(size_t i = 9; i < 16; ++i){
2206 TransposeMicrokernelTester()
2207 .input_stride(i)
2208 .output_stride(8)
2209 .block_width(i)
2210 .block_height(8)
2211 .iterations(1)
2212 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2213 }
2214 }
2215
2216 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_16_bw_9_16) {
2217 TEST_REQUIRES_ARM_NEON;
2218 for(size_t i = 9; i < 16; ++i){
2219 TransposeMicrokernelTester()
2220 .input_stride(i)
2221 .output_stride(16)
2222 .block_width(i)
2223 .block_height(16)
2224 .iterations(1)
2225 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2226 }
2227 }
2228
2229 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_16_bw_8) {
2230 TEST_REQUIRES_ARM_NEON;
2231 TransposeMicrokernelTester()
2232 .input_stride(8)
2233 .output_stride(16)
2234 .block_width(8)
2235 .block_height(16)
2236 .iterations(1)
2237 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2238 }
2239
2240 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_9_16_bw_8){
2241 TEST_REQUIRES_ARM_NEON;
2242 for(size_t i = 9; i < 16; ++i){
2243 TransposeMicrokernelTester()
2244 .input_stride(8)
2245 .output_stride(i)
2246 .block_width(8)
2247 .block_height(i)
2248 .iterations(1)
2249 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2250 }
2251 }
2252
2253 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_9_16_bw_16){
2254 TEST_REQUIRES_ARM_NEON;
2255 for(size_t i = 9; i < 16; ++i){
2256 TransposeMicrokernelTester()
2257 .input_stride(16)
2258 .output_stride(i)
2259 .block_width(16)
2260 .block_height(i)
2261 .iterations(1)
2262 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2263 }
2264 }
2265
2266 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_9_16_bw_9_16) {
2267 TEST_REQUIRES_ARM_NEON;
2268 for(size_t i = 9; i < 16; ++i){
2269 for(size_t j = 9; j < 16; ++j){
2270 TransposeMicrokernelTester()
2271 .input_stride(j)
2272 .output_stride(i)
2273 .block_width(j)
2274 .block_height(i)
2275 .iterations(1)
2276 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2277 }
2278 }
2279 }
2280
2281 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_8_is_16) {
2282 TEST_REQUIRES_ARM_NEON;
2283 TransposeMicrokernelTester()
2284 .input_stride(16)
2285 .output_stride(8)
2286 .block_width(8)
2287 .block_height(8)
2288 .iterations(1)
2289 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2290 }
2291
2292 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_8_os_16) {
2293 TEST_REQUIRES_ARM_NEON;
2294 TransposeMicrokernelTester()
2295 .input_stride(8)
2296 .output_stride(16)
2297 .block_width(8)
2298 .block_height(8)
2299 .iterations(1)
2300 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2301 }
2302
2303 TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2304 TEST_REQUIRES_ARM_NEON;
2305 TransposeMicrokernelTester()
2306 .input_stride(16)
2307 .output_stride(16)
2308 .block_width(8)
2309 .block_height(8)
2310 .iterations(1)
2311 .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2312 }
2313#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2314
2315
2316#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2317 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_8) {
2318 TEST_REQUIRES_ARM_NEON;
2319 TransposeMicrokernelTester()
2320 .input_stride(8)
2321 .output_stride(8)
2322 .block_width(8)
2323 .block_height(8)
2324 .iterations(1)
2325 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2326 }
2327
2328 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_1_16_bw_1_16) {
2329 TEST_REQUIRES_ARM_NEON;
2330 for(size_t i = 1; i <= 16; ++i){
2331 for(size_t j = 1; j <= 16; ++j){
2332 TransposeMicrokernelTester()
2333 .input_stride(j)
2334 .output_stride(i)
2335 .block_width(j)
2336 .block_height(i)
2337 .iterations(1)
2338 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2339 }
2340 }
2341 }
2342
2343 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_16) {
2344 TEST_REQUIRES_ARM_NEON;
2345 TransposeMicrokernelTester()
2346 .input_stride(16)
2347 .output_stride(8)
2348 .block_width(16)
2349 .block_height(8)
2350 .iterations(1)
2351 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2352 }
2353
2354 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_9_16) {
2355 TEST_REQUIRES_ARM_NEON;
2356 for(size_t i = 9; i < 16; ++i){
2357 TransposeMicrokernelTester()
2358 .input_stride(i)
2359 .output_stride(8)
2360 .block_width(i)
2361 .block_height(8)
2362 .iterations(1)
2363 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2364 }
2365 }
2366
2367 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_16_bw_9_16) {
2368 TEST_REQUIRES_ARM_NEON;
2369 for(size_t i = 9; i < 16; ++i){
2370 TransposeMicrokernelTester()
2371 .input_stride(i)
2372 .output_stride(16)
2373 .block_width(i)
2374 .block_height(16)
2375 .iterations(1)
2376 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2377 }
2378 }
2379
2380 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_16_bw_8) {
2381 TEST_REQUIRES_ARM_NEON;
2382 TransposeMicrokernelTester()
2383 .input_stride(8)
2384 .output_stride(16)
2385 .block_width(8)
2386 .block_height(16)
2387 .iterations(1)
2388 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2389 }
2390
2391 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_9_16_bw_8){
2392 TEST_REQUIRES_ARM_NEON;
2393 for(size_t i = 9; i < 16; ++i){
2394 TransposeMicrokernelTester()
2395 .input_stride(8)
2396 .output_stride(i)
2397 .block_width(8)
2398 .block_height(i)
2399 .iterations(1)
2400 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2401 }
2402 }
2403
2404 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_9_16_bw_16){
2405 TEST_REQUIRES_ARM_NEON;
2406 for(size_t i = 9; i < 16; ++i){
2407 TransposeMicrokernelTester()
2408 .input_stride(16)
2409 .output_stride(i)
2410 .block_width(16)
2411 .block_height(i)
2412 .iterations(1)
2413 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2414 }
2415 }
2416
2417 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_9_16_bw_9_16) {
2418 TEST_REQUIRES_ARM_NEON;
2419 for(size_t i = 9; i < 16; ++i){
2420 for(size_t j = 9; j < 16; ++j){
2421 TransposeMicrokernelTester()
2422 .input_stride(j)
2423 .output_stride(i)
2424 .block_width(j)
2425 .block_height(i)
2426 .iterations(1)
2427 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2428 }
2429 }
2430 }
2431
2432 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_8_is_16) {
2433 TEST_REQUIRES_ARM_NEON;
2434 TransposeMicrokernelTester()
2435 .input_stride(16)
2436 .output_stride(8)
2437 .block_width(8)
2438 .block_height(8)
2439 .iterations(1)
2440 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2441 }
2442
2443 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_8_os_16) {
2444 TEST_REQUIRES_ARM_NEON;
2445 TransposeMicrokernelTester()
2446 .input_stride(8)
2447 .output_stride(16)
2448 .block_width(8)
2449 .block_height(8)
2450 .iterations(1)
2451 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2452 }
2453
2454 TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2455 TEST_REQUIRES_ARM_NEON;
2456 TransposeMicrokernelTester()
2457 .input_stride(16)
2458 .output_stride(16)
2459 .block_width(8)
2460 .block_height(8)
2461 .iterations(1)
2462 .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2463 }
2464#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2465
2466
2467#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2468 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_8) {
2469 TEST_REQUIRES_ARM_NEON;
2470 TransposeMicrokernelTester()
2471 .input_stride(8)
2472 .output_stride(8)
2473 .block_width(8)
2474 .block_height(8)
2475 .iterations(1)
2476 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2477 }
2478
2479 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_1_16_bw_1_16) {
2480 TEST_REQUIRES_ARM_NEON;
2481 for(size_t i = 1; i <= 16; ++i){
2482 for(size_t j = 1; j <= 16; ++j){
2483 TransposeMicrokernelTester()
2484 .input_stride(j)
2485 .output_stride(i)
2486 .block_width(j)
2487 .block_height(i)
2488 .iterations(1)
2489 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2490 }
2491 }
2492 }
2493
2494 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_16) {
2495 TEST_REQUIRES_ARM_NEON;
2496 TransposeMicrokernelTester()
2497 .input_stride(16)
2498 .output_stride(8)
2499 .block_width(16)
2500 .block_height(8)
2501 .iterations(1)
2502 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2503 }
2504
2505 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_9_16) {
2506 TEST_REQUIRES_ARM_NEON;
2507 for(size_t i = 9; i < 16; ++i){
2508 TransposeMicrokernelTester()
2509 .input_stride(i)
2510 .output_stride(8)
2511 .block_width(i)
2512 .block_height(8)
2513 .iterations(1)
2514 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2515 }
2516 }
2517
2518 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_16_bw_9_16) {
2519 TEST_REQUIRES_ARM_NEON;
2520 for(size_t i = 9; i < 16; ++i){
2521 TransposeMicrokernelTester()
2522 .input_stride(i)
2523 .output_stride(16)
2524 .block_width(i)
2525 .block_height(16)
2526 .iterations(1)
2527 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2528 }
2529 }
2530
2531 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_16_bw_8) {
2532 TEST_REQUIRES_ARM_NEON;
2533 TransposeMicrokernelTester()
2534 .input_stride(8)
2535 .output_stride(16)
2536 .block_width(8)
2537 .block_height(16)
2538 .iterations(1)
2539 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2540 }
2541
2542 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_9_16_bw_8){
2543 TEST_REQUIRES_ARM_NEON;
2544 for(size_t i = 9; i < 16; ++i){
2545 TransposeMicrokernelTester()
2546 .input_stride(8)
2547 .output_stride(i)
2548 .block_width(8)
2549 .block_height(i)
2550 .iterations(1)
2551 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2552 }
2553 }
2554
2555 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_9_16_bw_16){
2556 TEST_REQUIRES_ARM_NEON;
2557 for(size_t i = 9; i < 16; ++i){
2558 TransposeMicrokernelTester()
2559 .input_stride(16)
2560 .output_stride(i)
2561 .block_width(16)
2562 .block_height(i)
2563 .iterations(1)
2564 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2565 }
2566 }
2567
2568 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_9_16_bw_9_16) {
2569 TEST_REQUIRES_ARM_NEON;
2570 for(size_t i = 9; i < 16; ++i){
2571 for(size_t j = 9; j < 16; ++j){
2572 TransposeMicrokernelTester()
2573 .input_stride(j)
2574 .output_stride(i)
2575 .block_width(j)
2576 .block_height(i)
2577 .iterations(1)
2578 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2579 }
2580 }
2581 }
2582
2583 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_8_is_16) {
2584 TEST_REQUIRES_ARM_NEON;
2585 TransposeMicrokernelTester()
2586 .input_stride(16)
2587 .output_stride(8)
2588 .block_width(8)
2589 .block_height(8)
2590 .iterations(1)
2591 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2592 }
2593
2594 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_8_os_16) {
2595 TEST_REQUIRES_ARM_NEON;
2596 TransposeMicrokernelTester()
2597 .input_stride(8)
2598 .output_stride(16)
2599 .block_width(8)
2600 .block_height(8)
2601 .iterations(1)
2602 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2603 }
2604
2605 TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2606 TEST_REQUIRES_ARM_NEON;
2607 TransposeMicrokernelTester()
2608 .input_stride(16)
2609 .output_stride(16)
2610 .block_width(8)
2611 .block_height(8)
2612 .iterations(1)
2613 .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2614 }
2615#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2616
2617
2618#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2619 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_8) {
2620 TEST_REQUIRES_ARM_NEON;
2621 TransposeMicrokernelTester()
2622 .input_stride(8)
2623 .output_stride(8)
2624 .block_width(8)
2625 .block_height(8)
2626 .iterations(1)
2627 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2628 }
2629
2630 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_1_16_bw_1_16) {
2631 TEST_REQUIRES_ARM_NEON;
2632 for(size_t i = 1; i <= 16; ++i){
2633 for(size_t j = 1; j <= 16; ++j){
2634 TransposeMicrokernelTester()
2635 .input_stride(j)
2636 .output_stride(i)
2637 .block_width(j)
2638 .block_height(i)
2639 .iterations(1)
2640 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2641 }
2642 }
2643 }
2644
2645 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_16) {
2646 TEST_REQUIRES_ARM_NEON;
2647 TransposeMicrokernelTester()
2648 .input_stride(16)
2649 .output_stride(8)
2650 .block_width(16)
2651 .block_height(8)
2652 .iterations(1)
2653 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2654 }
2655
2656 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_9_16) {
2657 TEST_REQUIRES_ARM_NEON;
2658 for(size_t i = 9; i < 16; ++i){
2659 TransposeMicrokernelTester()
2660 .input_stride(i)
2661 .output_stride(8)
2662 .block_width(i)
2663 .block_height(8)
2664 .iterations(1)
2665 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2666 }
2667 }
2668
2669 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_16_bw_9_16) {
2670 TEST_REQUIRES_ARM_NEON;
2671 for(size_t i = 9; i < 16; ++i){
2672 TransposeMicrokernelTester()
2673 .input_stride(i)
2674 .output_stride(16)
2675 .block_width(i)
2676 .block_height(16)
2677 .iterations(1)
2678 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2679 }
2680 }
2681
2682 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_16_bw_8) {
2683 TEST_REQUIRES_ARM_NEON;
2684 TransposeMicrokernelTester()
2685 .input_stride(8)
2686 .output_stride(16)
2687 .block_width(8)
2688 .block_height(16)
2689 .iterations(1)
2690 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2691 }
2692
2693 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_9_16_bw_8){
2694 TEST_REQUIRES_ARM_NEON;
2695 for(size_t i = 9; i < 16; ++i){
2696 TransposeMicrokernelTester()
2697 .input_stride(8)
2698 .output_stride(i)
2699 .block_width(8)
2700 .block_height(i)
2701 .iterations(1)
2702 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2703 }
2704 }
2705
2706 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_9_16_bw_16){
2707 TEST_REQUIRES_ARM_NEON;
2708 for(size_t i = 9; i < 16; ++i){
2709 TransposeMicrokernelTester()
2710 .input_stride(16)
2711 .output_stride(i)
2712 .block_width(16)
2713 .block_height(i)
2714 .iterations(1)
2715 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2716 }
2717 }
2718
2719 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_9_16_bw_9_16) {
2720 TEST_REQUIRES_ARM_NEON;
2721 for(size_t i = 9; i < 16; ++i){
2722 for(size_t j = 9; j < 16; ++j){
2723 TransposeMicrokernelTester()
2724 .input_stride(j)
2725 .output_stride(i)
2726 .block_width(j)
2727 .block_height(i)
2728 .iterations(1)
2729 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2730 }
2731 }
2732 }
2733
2734 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_8_is_16) {
2735 TEST_REQUIRES_ARM_NEON;
2736 TransposeMicrokernelTester()
2737 .input_stride(16)
2738 .output_stride(8)
2739 .block_width(8)
2740 .block_height(8)
2741 .iterations(1)
2742 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2743 }
2744
2745 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_8_os_16) {
2746 TEST_REQUIRES_ARM_NEON;
2747 TransposeMicrokernelTester()
2748 .input_stride(8)
2749 .output_stride(16)
2750 .block_width(8)
2751 .block_height(8)
2752 .iterations(1)
2753 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2754 }
2755
2756 TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2757 TEST_REQUIRES_ARM_NEON;
2758 TransposeMicrokernelTester()
2759 .input_stride(16)
2760 .output_stride(16)
2761 .block_width(8)
2762 .block_height(8)
2763 .iterations(1)
2764 .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2765 }
2766#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2767
2768
2769#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2770 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_8) {
2771 TEST_REQUIRES_ARM_NEON;
2772 TransposeMicrokernelTester()
2773 .input_stride(8)
2774 .output_stride(8)
2775 .block_width(8)
2776 .block_height(8)
2777 .iterations(1)
2778 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2779 }
2780
2781 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_1_16_bw_1_16) {
2782 TEST_REQUIRES_ARM_NEON;
2783 for(size_t i = 1; i <= 16; ++i){
2784 for(size_t j = 1; j <= 16; ++j){
2785 TransposeMicrokernelTester()
2786 .input_stride(j)
2787 .output_stride(i)
2788 .block_width(j)
2789 .block_height(i)
2790 .iterations(1)
2791 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2792 }
2793 }
2794 }
2795
2796 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_16) {
2797 TEST_REQUIRES_ARM_NEON;
2798 TransposeMicrokernelTester()
2799 .input_stride(16)
2800 .output_stride(8)
2801 .block_width(16)
2802 .block_height(8)
2803 .iterations(1)
2804 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2805 }
2806
2807 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_9_16) {
2808 TEST_REQUIRES_ARM_NEON;
2809 for(size_t i = 9; i < 16; ++i){
2810 TransposeMicrokernelTester()
2811 .input_stride(i)
2812 .output_stride(8)
2813 .block_width(i)
2814 .block_height(8)
2815 .iterations(1)
2816 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2817 }
2818 }
2819
2820 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_16_bw_9_16) {
2821 TEST_REQUIRES_ARM_NEON;
2822 for(size_t i = 9; i < 16; ++i){
2823 TransposeMicrokernelTester()
2824 .input_stride(i)
2825 .output_stride(16)
2826 .block_width(i)
2827 .block_height(16)
2828 .iterations(1)
2829 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2830 }
2831 }
2832
2833 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_16_bw_8) {
2834 TEST_REQUIRES_ARM_NEON;
2835 TransposeMicrokernelTester()
2836 .input_stride(8)
2837 .output_stride(16)
2838 .block_width(8)
2839 .block_height(16)
2840 .iterations(1)
2841 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2842 }
2843
2844 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_9_16_bw_8){
2845 TEST_REQUIRES_ARM_NEON;
2846 for(size_t i = 9; i < 16; ++i){
2847 TransposeMicrokernelTester()
2848 .input_stride(8)
2849 .output_stride(i)
2850 .block_width(8)
2851 .block_height(i)
2852 .iterations(1)
2853 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2854 }
2855 }
2856
2857 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_9_16_bw_16){
2858 TEST_REQUIRES_ARM_NEON;
2859 for(size_t i = 9; i < 16; ++i){
2860 TransposeMicrokernelTester()
2861 .input_stride(16)
2862 .output_stride(i)
2863 .block_width(16)
2864 .block_height(i)
2865 .iterations(1)
2866 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2867 }
2868 }
2869
2870 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_9_16_bw_9_16) {
2871 TEST_REQUIRES_ARM_NEON;
2872 for(size_t i = 9; i < 16; ++i){
2873 for(size_t j = 9; j < 16; ++j){
2874 TransposeMicrokernelTester()
2875 .input_stride(j)
2876 .output_stride(i)
2877 .block_width(j)
2878 .block_height(i)
2879 .iterations(1)
2880 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2881 }
2882 }
2883 }
2884
2885 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_8_is_16) {
2886 TEST_REQUIRES_ARM_NEON;
2887 TransposeMicrokernelTester()
2888 .input_stride(16)
2889 .output_stride(8)
2890 .block_width(8)
2891 .block_height(8)
2892 .iterations(1)
2893 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2894 }
2895
2896 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_8_os_16) {
2897 TEST_REQUIRES_ARM_NEON;
2898 TransposeMicrokernelTester()
2899 .input_stride(8)
2900 .output_stride(16)
2901 .block_width(8)
2902 .block_height(8)
2903 .iterations(1)
2904 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2905 }
2906
2907 TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2908 TEST_REQUIRES_ARM_NEON;
2909 TransposeMicrokernelTester()
2910 .input_stride(16)
2911 .output_stride(16)
2912 .block_width(8)
2913 .block_height(8)
2914 .iterations(1)
2915 .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2916 }
2917#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2918
2919
2920#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2921 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_8) {
2922 TEST_REQUIRES_ARM_NEON;
2923 TransposeMicrokernelTester()
2924 .input_stride(8)
2925 .output_stride(8)
2926 .block_width(8)
2927 .block_height(8)
2928 .iterations(1)
2929 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2930 }
2931
2932 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_1_16_bw_1_16) {
2933 TEST_REQUIRES_ARM_NEON;
2934 for(size_t i = 1; i <= 16; ++i){
2935 for(size_t j = 1; j <= 16; ++j){
2936 TransposeMicrokernelTester()
2937 .input_stride(j)
2938 .output_stride(i)
2939 .block_width(j)
2940 .block_height(i)
2941 .iterations(1)
2942 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2943 }
2944 }
2945 }
2946
2947 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_16) {
2948 TEST_REQUIRES_ARM_NEON;
2949 TransposeMicrokernelTester()
2950 .input_stride(16)
2951 .output_stride(8)
2952 .block_width(16)
2953 .block_height(8)
2954 .iterations(1)
2955 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2956 }
2957
2958 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_9_16) {
2959 TEST_REQUIRES_ARM_NEON;
2960 for(size_t i = 9; i < 16; ++i){
2961 TransposeMicrokernelTester()
2962 .input_stride(i)
2963 .output_stride(8)
2964 .block_width(i)
2965 .block_height(8)
2966 .iterations(1)
2967 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2968 }
2969 }
2970
2971 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_16_bw_9_16) {
2972 TEST_REQUIRES_ARM_NEON;
2973 for(size_t i = 9; i < 16; ++i){
2974 TransposeMicrokernelTester()
2975 .input_stride(i)
2976 .output_stride(16)
2977 .block_width(i)
2978 .block_height(16)
2979 .iterations(1)
2980 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2981 }
2982 }
2983
2984 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_16_bw_8) {
2985 TEST_REQUIRES_ARM_NEON;
2986 TransposeMicrokernelTester()
2987 .input_stride(8)
2988 .output_stride(16)
2989 .block_width(8)
2990 .block_height(16)
2991 .iterations(1)
2992 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2993 }
2994
2995 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_9_16_bw_8){
2996 TEST_REQUIRES_ARM_NEON;
2997 for(size_t i = 9; i < 16; ++i){
2998 TransposeMicrokernelTester()
2999 .input_stride(8)
3000 .output_stride(i)
3001 .block_width(8)
3002 .block_height(i)
3003 .iterations(1)
3004 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3005 }
3006 }
3007
3008 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_9_16_bw_16){
3009 TEST_REQUIRES_ARM_NEON;
3010 for(size_t i = 9; i < 16; ++i){
3011 TransposeMicrokernelTester()
3012 .input_stride(16)
3013 .output_stride(i)
3014 .block_width(16)
3015 .block_height(i)
3016 .iterations(1)
3017 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3018 }
3019 }
3020
3021 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_9_16_bw_9_16) {
3022 TEST_REQUIRES_ARM_NEON;
3023 for(size_t i = 9; i < 16; ++i){
3024 for(size_t j = 9; j < 16; ++j){
3025 TransposeMicrokernelTester()
3026 .input_stride(j)
3027 .output_stride(i)
3028 .block_width(j)
3029 .block_height(i)
3030 .iterations(1)
3031 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3032 }
3033 }
3034 }
3035
3036 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_8_is_16) {
3037 TEST_REQUIRES_ARM_NEON;
3038 TransposeMicrokernelTester()
3039 .input_stride(16)
3040 .output_stride(8)
3041 .block_width(8)
3042 .block_height(8)
3043 .iterations(1)
3044 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3045 }
3046
3047 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_8_os_16) {
3048 TEST_REQUIRES_ARM_NEON;
3049 TransposeMicrokernelTester()
3050 .input_stride(8)
3051 .output_stride(16)
3052 .block_width(8)
3053 .block_height(8)
3054 .iterations(1)
3055 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3056 }
3057
3058 TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
3059 TEST_REQUIRES_ARM_NEON;
3060 TransposeMicrokernelTester()
3061 .input_stride(16)
3062 .output_stride(16)
3063 .block_width(8)
3064 .block_height(8)
3065 .iterations(1)
3066 .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3067 }
3068#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64