blob: 9d52acdf655107c44da3b23e7dbad005ada560a5 [file] [log] [blame]
Alan Kellyd19bde92022-01-14 02:30:28 -08001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/x64-transpose.yaml
8// Generator: tools/generate-transpose-test.py
9
10
11#include <gtest/gtest.h>
12
13#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
16#include <xnnpack/transpose.h>
17#include "transpose-microkernel-tester.h"
18
19
Alan Kelly667e0f12022-01-14 09:37:59 -080020TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2) {
21 TransposeMicrokernelTester()
22 .input_stride(2)
23 .output_stride(1)
24 .block_width(2)
25 .block_height(1)
26 .iterations(1)
27 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
28}
29
30TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_2_bw_1_4) {
31 for(size_t i = 1; i <= 2; ++i){
32 for(size_t j = 1; j <= 4; ++j){
33 TransposeMicrokernelTester()
34 .input_stride(j)
35 .output_stride(i)
36 .block_width(j)
37 .block_height(i)
38 .iterations(1)
39 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
40 }
41 }
42}
43
44TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_4) {
45 TransposeMicrokernelTester()
46 .input_stride(4)
47 .output_stride(1)
48 .block_width(4)
49 .block_height(1)
50 .iterations(1)
51 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
52}
53
54TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_3_4) {
55 for(size_t i = 3; i < 4; ++i){
56 TransposeMicrokernelTester()
57 .input_stride(i)
58 .output_stride(1)
59 .block_width(i)
60 .block_height(1)
61 .iterations(1)
62 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
63 }
64}
65
66TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_3_4) {
67 for(size_t i = 3; i < 4; ++i){
68 TransposeMicrokernelTester()
69 .input_stride(i)
70 .output_stride(2)
71 .block_width(i)
72 .block_height(2)
73 .iterations(1)
74 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
75 }
76}
77
78TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_2) {
79 TransposeMicrokernelTester()
80 .input_stride(2)
81 .output_stride(2)
82 .block_width(2)
83 .block_height(2)
84 .iterations(1)
85 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
86}
87
88TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_2){
89 for(size_t i = 2; i < 2; ++i){
90 TransposeMicrokernelTester()
91 .input_stride(2)
92 .output_stride(i)
93 .block_width(2)
94 .block_height(i)
95 .iterations(1)
96 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
97 }
98}
99
100TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_4){
101 for(size_t i = 2; i < 2; ++i){
102 TransposeMicrokernelTester()
103 .input_stride(4)
104 .output_stride(i)
105 .block_width(4)
106 .block_height(i)
107 .iterations(1)
108 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
109 }
110}
111
112TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_3_4) {
113 for(size_t i = 2; i < 2; ++i){
114 for(size_t j = 3; j < 4; ++j){
115 TransposeMicrokernelTester()
116 .input_stride(j)
117 .output_stride(i)
118 .block_width(j)
119 .block_height(i)
120 .iterations(1)
121 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
122 }
123 }
124}
125
126TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4) {
127 TransposeMicrokernelTester()
128 .input_stride(4)
129 .output_stride(1)
130 .block_width(2)
131 .block_height(1)
132 .iterations(1)
133 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
134}
135
136TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_os_2) {
137 TransposeMicrokernelTester()
138 .input_stride(2)
139 .output_stride(2)
140 .block_width(2)
141 .block_height(1)
142 .iterations(1)
143 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
144}
145
146TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4_os_2) {
147 TransposeMicrokernelTester()
148 .input_stride(4)
149 .output_stride(2)
150 .block_width(2)
151 .block_height(1)
152 .iterations(1)
153 .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
154}
155
156TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1) {
157 TransposeMicrokernelTester()
158 .input_stride(1)
159 .output_stride(2)
160 .block_width(1)
161 .block_height(2)
162 .iterations(1)
163 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
164}
165
166TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_1_4_bw_1_2) {
167 for(size_t i = 1; i <= 4; ++i){
168 for(size_t j = 1; j <= 2; ++j){
169 TransposeMicrokernelTester()
170 .input_stride(j)
171 .output_stride(i)
172 .block_width(j)
173 .block_height(i)
174 .iterations(1)
175 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
176 }
177 }
178}
179
180TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2) {
181 TransposeMicrokernelTester()
182 .input_stride(2)
183 .output_stride(2)
184 .block_width(2)
185 .block_height(2)
186 .iterations(1)
187 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
188}
189
190TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2_2) {
191 for(size_t i = 2; i < 2; ++i){
192 TransposeMicrokernelTester()
193 .input_stride(i)
194 .output_stride(2)
195 .block_width(i)
196 .block_height(2)
197 .iterations(1)
198 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
199 }
200}
201
202TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_2_2) {
203 for(size_t i = 2; i < 2; ++i){
204 TransposeMicrokernelTester()
205 .input_stride(i)
206 .output_stride(4)
207 .block_width(i)
208 .block_height(4)
209 .iterations(1)
210 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
211 }
212}
213
214TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_1) {
215 TransposeMicrokernelTester()
216 .input_stride(1)
217 .output_stride(4)
218 .block_width(1)
219 .block_height(4)
220 .iterations(1)
221 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
222}
223
224TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_1){
225 for(size_t i = 3; i < 4; ++i){
226 TransposeMicrokernelTester()
227 .input_stride(1)
228 .output_stride(i)
229 .block_width(1)
230 .block_height(i)
231 .iterations(1)
232 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
233 }
234}
235
236TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2){
237 for(size_t i = 3; i < 4; ++i){
238 TransposeMicrokernelTester()
239 .input_stride(2)
240 .output_stride(i)
241 .block_width(2)
242 .block_height(i)
243 .iterations(1)
244 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
245 }
246}
247
248TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2_2) {
249 for(size_t i = 3; i < 4; ++i){
250 for(size_t j = 2; j < 2; ++j){
251 TransposeMicrokernelTester()
252 .input_stride(j)
253 .output_stride(i)
254 .block_width(j)
255 .block_height(i)
256 .iterations(1)
257 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
258 }
259 }
260}
261
262TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2) {
263 TransposeMicrokernelTester()
264 .input_stride(2)
265 .output_stride(2)
266 .block_width(1)
267 .block_height(2)
268 .iterations(1)
269 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
270}
271
272TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_os_4) {
273 TransposeMicrokernelTester()
274 .input_stride(1)
275 .output_stride(4)
276 .block_width(1)
277 .block_height(2)
278 .iterations(1)
279 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
280}
281
282TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2_os_4) {
283 TransposeMicrokernelTester()
284 .input_stride(2)
285 .output_stride(4)
286 .block_width(1)
287 .block_height(2)
288 .iterations(1)
289 .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
290}
291
292TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2) {
293 TransposeMicrokernelTester()
294 .input_stride(2)
295 .output_stride(2)
296 .block_width(2)
297 .block_height(2)
298 .iterations(1)
299 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
300}
301
302TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_1_4_bw_1_4) {
303 for(size_t i = 1; i <= 4; ++i){
304 for(size_t j = 1; j <= 4; ++j){
305 TransposeMicrokernelTester()
306 .input_stride(j)
307 .output_stride(i)
308 .block_width(j)
309 .block_height(i)
310 .iterations(1)
311 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
312 }
313 }
314}
315
316TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_4) {
317 TransposeMicrokernelTester()
318 .input_stride(4)
319 .output_stride(2)
320 .block_width(4)
321 .block_height(2)
322 .iterations(1)
323 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
324}
325
326TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_3_4) {
327 for(size_t i = 3; i < 4; ++i){
328 TransposeMicrokernelTester()
329 .input_stride(i)
330 .output_stride(2)
331 .block_width(i)
332 .block_height(2)
333 .iterations(1)
334 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
335 }
336}
337
338TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_3_4) {
339 for(size_t i = 3; i < 4; ++i){
340 TransposeMicrokernelTester()
341 .input_stride(i)
342 .output_stride(4)
343 .block_width(i)
344 .block_height(4)
345 .iterations(1)
346 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
347 }
348}
349
350TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_2) {
351 TransposeMicrokernelTester()
352 .input_stride(2)
353 .output_stride(4)
354 .block_width(2)
355 .block_height(4)
356 .iterations(1)
357 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
358}
359
360TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_2){
361 for(size_t i = 3; i < 4; ++i){
362 TransposeMicrokernelTester()
363 .input_stride(2)
364 .output_stride(i)
365 .block_width(2)
366 .block_height(i)
367 .iterations(1)
368 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
369 }
370}
371
372TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_4){
373 for(size_t i = 3; i < 4; ++i){
374 TransposeMicrokernelTester()
375 .input_stride(4)
376 .output_stride(i)
377 .block_width(4)
378 .block_height(i)
379 .iterations(1)
380 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
381 }
382}
383
384TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_3_4) {
385 for(size_t i = 3; i < 4; ++i){
386 for(size_t j = 3; j < 4; ++j){
387 TransposeMicrokernelTester()
388 .input_stride(j)
389 .output_stride(i)
390 .block_width(j)
391 .block_height(i)
392 .iterations(1)
393 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
394 }
395 }
396}
397
398TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4) {
399 TransposeMicrokernelTester()
400 .input_stride(4)
401 .output_stride(2)
402 .block_width(2)
403 .block_height(2)
404 .iterations(1)
405 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
406}
407
408TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_os_4) {
409 TransposeMicrokernelTester()
410 .input_stride(2)
411 .output_stride(4)
412 .block_width(2)
413 .block_height(2)
414 .iterations(1)
415 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
416}
417
418TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4_os_4) {
419 TransposeMicrokernelTester()
420 .input_stride(4)
421 .output_stride(4)
422 .block_width(2)
423 .block_height(2)
424 .iterations(1)
425 .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
426}
427
428TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1) {
429 TransposeMicrokernelTester()
430 .input_stride(1)
431 .output_stride(4)
432 .block_width(1)
433 .block_height(4)
434 .iterations(1)
435 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
436}
437
438TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_1_8_bw_1_2) {
439 for(size_t i = 1; i <= 8; ++i){
440 for(size_t j = 1; j <= 2; ++j){
441 TransposeMicrokernelTester()
442 .input_stride(j)
443 .output_stride(i)
444 .block_width(j)
445 .block_height(i)
446 .iterations(1)
447 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
448 }
449 }
450}
451
452TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2) {
453 TransposeMicrokernelTester()
454 .input_stride(2)
455 .output_stride(4)
456 .block_width(2)
457 .block_height(4)
458 .iterations(1)
459 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
460}
461
462TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2_2) {
463 for(size_t i = 2; i < 2; ++i){
464 TransposeMicrokernelTester()
465 .input_stride(i)
466 .output_stride(4)
467 .block_width(i)
468 .block_height(4)
469 .iterations(1)
470 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
471 }
472}
473
474TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_2_2) {
475 for(size_t i = 2; i < 2; ++i){
476 TransposeMicrokernelTester()
477 .input_stride(i)
478 .output_stride(8)
479 .block_width(i)
480 .block_height(8)
481 .iterations(1)
482 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
483 }
484}
485
486TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_1) {
487 TransposeMicrokernelTester()
488 .input_stride(1)
489 .output_stride(8)
490 .block_width(1)
491 .block_height(8)
492 .iterations(1)
493 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
494}
495
496TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_1){
497 for(size_t i = 5; i < 8; ++i){
498 TransposeMicrokernelTester()
499 .input_stride(1)
500 .output_stride(i)
501 .block_width(1)
502 .block_height(i)
503 .iterations(1)
504 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
505 }
506}
507
508TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2){
509 for(size_t i = 5; i < 8; ++i){
510 TransposeMicrokernelTester()
511 .input_stride(2)
512 .output_stride(i)
513 .block_width(2)
514 .block_height(i)
515 .iterations(1)
516 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
517 }
518}
519
520TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2_2) {
521 for(size_t i = 5; i < 8; ++i){
522 for(size_t j = 2; j < 2; ++j){
523 TransposeMicrokernelTester()
524 .input_stride(j)
525 .output_stride(i)
526 .block_width(j)
527 .block_height(i)
528 .iterations(1)
529 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
530 }
531 }
532}
533
534TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2) {
535 TransposeMicrokernelTester()
536 .input_stride(2)
537 .output_stride(4)
538 .block_width(1)
539 .block_height(4)
540 .iterations(1)
541 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
542}
543
544TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_os_8) {
545 TransposeMicrokernelTester()
546 .input_stride(1)
547 .output_stride(8)
548 .block_width(1)
549 .block_height(4)
550 .iterations(1)
551 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
552}
553
554TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2_os_8) {
555 TransposeMicrokernelTester()
556 .input_stride(2)
557 .output_stride(8)
558 .block_width(1)
559 .block_height(4)
560 .iterations(1)
561 .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
562}
563
564TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2) {
565 TransposeMicrokernelTester()
566 .input_stride(2)
567 .output_stride(4)
568 .block_width(2)
569 .block_height(4)
570 .iterations(1)
571 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
572}
573
574TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_1_8_bw_1_4) {
575 for(size_t i = 1; i <= 8; ++i){
576 for(size_t j = 1; j <= 4; ++j){
577 TransposeMicrokernelTester()
578 .input_stride(j)
579 .output_stride(i)
580 .block_width(j)
581 .block_height(i)
582 .iterations(1)
583 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
584 }
585 }
586}
587
588TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_4) {
589 TransposeMicrokernelTester()
590 .input_stride(4)
591 .output_stride(4)
592 .block_width(4)
593 .block_height(4)
594 .iterations(1)
595 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
596}
597
598TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_3_4) {
599 for(size_t i = 3; i < 4; ++i){
600 TransposeMicrokernelTester()
601 .input_stride(i)
602 .output_stride(4)
603 .block_width(i)
604 .block_height(4)
605 .iterations(1)
606 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
607 }
608}
609
610TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_3_4) {
611 for(size_t i = 3; i < 4; ++i){
612 TransposeMicrokernelTester()
613 .input_stride(i)
614 .output_stride(8)
615 .block_width(i)
616 .block_height(8)
617 .iterations(1)
618 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
619 }
620}
621
622TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_2) {
623 TransposeMicrokernelTester()
624 .input_stride(2)
625 .output_stride(8)
626 .block_width(2)
627 .block_height(8)
628 .iterations(1)
629 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
630}
631
632TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_2){
633 for(size_t i = 5; i < 8; ++i){
634 TransposeMicrokernelTester()
635 .input_stride(2)
636 .output_stride(i)
637 .block_width(2)
638 .block_height(i)
639 .iterations(1)
640 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
641 }
642}
643
644TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_4){
645 for(size_t i = 5; i < 8; ++i){
646 TransposeMicrokernelTester()
647 .input_stride(4)
648 .output_stride(i)
649 .block_width(4)
650 .block_height(i)
651 .iterations(1)
652 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
653 }
654}
655
656TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_3_4) {
657 for(size_t i = 5; i < 8; ++i){
658 for(size_t j = 3; j < 4; ++j){
659 TransposeMicrokernelTester()
660 .input_stride(j)
661 .output_stride(i)
662 .block_width(j)
663 .block_height(i)
664 .iterations(1)
665 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
666 }
667 }
668}
669
670TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4) {
671 TransposeMicrokernelTester()
672 .input_stride(4)
673 .output_stride(4)
674 .block_width(2)
675 .block_height(4)
676 .iterations(1)
677 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
678}
679
680TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_os_8) {
681 TransposeMicrokernelTester()
682 .input_stride(2)
683 .output_stride(8)
684 .block_width(2)
685 .block_height(4)
686 .iterations(1)
687 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
688}
689
690TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4_os_8) {
691 TransposeMicrokernelTester()
692 .input_stride(4)
693 .output_stride(8)
694 .block_width(2)
695 .block_height(4)
696 .iterations(1)
697 .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
698}
699
700TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2) {
701 TransposeMicrokernelTester()
702 .input_stride(2)
703 .output_stride(1)
704 .block_width(2)
705 .block_height(1)
706 .iterations(1)
707 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
708}
709
710TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_2_bw_1_4) {
711 for(size_t i = 1; i <= 2; ++i){
712 for(size_t j = 1; j <= 4; ++j){
713 TransposeMicrokernelTester()
714 .input_stride(j)
715 .output_stride(i)
716 .block_width(j)
717 .block_height(i)
718 .iterations(1)
719 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
720 }
721 }
722}
723
724TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_4) {
725 TransposeMicrokernelTester()
726 .input_stride(4)
727 .output_stride(1)
728 .block_width(4)
729 .block_height(1)
730 .iterations(1)
731 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
732}
733
734TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_3_4) {
735 for(size_t i = 3; i < 4; ++i){
736 TransposeMicrokernelTester()
737 .input_stride(i)
738 .output_stride(1)
739 .block_width(i)
740 .block_height(1)
741 .iterations(1)
742 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
743 }
744}
745
746TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_bw_3_4) {
747 for(size_t i = 3; i < 4; ++i){
748 TransposeMicrokernelTester()
749 .input_stride(i)
750 .output_stride(2)
751 .block_width(i)
752 .block_height(2)
753 .iterations(1)
754 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
755 }
756}
757
758TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_bw_2) {
759 TransposeMicrokernelTester()
760 .input_stride(2)
761 .output_stride(2)
762 .block_width(2)
763 .block_height(2)
764 .iterations(1)
765 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
766}
767
768TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_2_bw_2){
769 for(size_t i = 2; i < 2; ++i){
770 TransposeMicrokernelTester()
771 .input_stride(2)
772 .output_stride(i)
773 .block_width(2)
774 .block_height(i)
775 .iterations(1)
776 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
777 }
778}
779
780TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_2_bw_4){
781 for(size_t i = 2; i < 2; ++i){
782 TransposeMicrokernelTester()
783 .input_stride(4)
784 .output_stride(i)
785 .block_width(4)
786 .block_height(i)
787 .iterations(1)
788 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
789 }
790}
791
792TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_2_bw_3_4) {
793 for(size_t i = 2; i < 2; ++i){
794 for(size_t j = 3; j < 4; ++j){
795 TransposeMicrokernelTester()
796 .input_stride(j)
797 .output_stride(i)
798 .block_width(j)
799 .block_height(i)
800 .iterations(1)
801 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
802 }
803 }
804}
805
806TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2_is_4) {
807 TransposeMicrokernelTester()
808 .input_stride(4)
809 .output_stride(1)
810 .block_width(2)
811 .block_height(1)
812 .iterations(1)
813 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
814}
815
816TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2_os_2) {
817 TransposeMicrokernelTester()
818 .input_stride(2)
819 .output_stride(2)
820 .block_width(2)
821 .block_height(1)
822 .iterations(1)
823 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
824}
825
826TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2_is_4_os_2) {
827 TransposeMicrokernelTester()
828 .input_stride(4)
829 .output_stride(2)
830 .block_width(2)
831 .block_height(1)
832 .iterations(1)
833 .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
834}
835
836TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1) {
837 TransposeMicrokernelTester()
838 .input_stride(1)
839 .output_stride(2)
840 .block_width(1)
841 .block_height(2)
842 .iterations(1)
843 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
844}
845
846TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_1_4_bw_1_2) {
847 for(size_t i = 1; i <= 4; ++i){
848 for(size_t j = 1; j <= 2; ++j){
849 TransposeMicrokernelTester()
850 .input_stride(j)
851 .output_stride(i)
852 .block_width(j)
853 .block_height(i)
854 .iterations(1)
855 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
856 }
857 }
858}
859
860TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_2) {
861 TransposeMicrokernelTester()
862 .input_stride(2)
863 .output_stride(2)
864 .block_width(2)
865 .block_height(2)
866 .iterations(1)
867 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
868}
869
870TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_2_2) {
871 for(size_t i = 2; i < 2; ++i){
872 TransposeMicrokernelTester()
873 .input_stride(i)
874 .output_stride(2)
875 .block_width(i)
876 .block_height(2)
877 .iterations(1)
878 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
879 }
880}
881
882TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_4_bw_2_2) {
883 for(size_t i = 2; i < 2; ++i){
884 TransposeMicrokernelTester()
885 .input_stride(i)
886 .output_stride(4)
887 .block_width(i)
888 .block_height(4)
889 .iterations(1)
890 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
891 }
892}
893
894TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_4_bw_1) {
895 TransposeMicrokernelTester()
896 .input_stride(1)
897 .output_stride(4)
898 .block_width(1)
899 .block_height(4)
900 .iterations(1)
901 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
902}
903
904TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_3_4_bw_1){
905 for(size_t i = 3; i < 4; ++i){
906 TransposeMicrokernelTester()
907 .input_stride(1)
908 .output_stride(i)
909 .block_width(1)
910 .block_height(i)
911 .iterations(1)
912 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
913 }
914}
915
916TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_3_4_bw_2){
917 for(size_t i = 3; i < 4; ++i){
918 TransposeMicrokernelTester()
919 .input_stride(2)
920 .output_stride(i)
921 .block_width(2)
922 .block_height(i)
923 .iterations(1)
924 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
925 }
926}
927
928TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_3_4_bw_2_2) {
929 for(size_t i = 3; i < 4; ++i){
930 for(size_t j = 2; j < 2; ++j){
931 TransposeMicrokernelTester()
932 .input_stride(j)
933 .output_stride(i)
934 .block_width(j)
935 .block_height(i)
936 .iterations(1)
937 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
938 }
939 }
940}
941
942TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1_is_2) {
943 TransposeMicrokernelTester()
944 .input_stride(2)
945 .output_stride(2)
946 .block_width(1)
947 .block_height(2)
948 .iterations(1)
949 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
950}
951
952TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1_os_4) {
953 TransposeMicrokernelTester()
954 .input_stride(1)
955 .output_stride(4)
956 .block_width(1)
957 .block_height(2)
958 .iterations(1)
959 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
960}
961
962TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1_is_2_os_4) {
963 TransposeMicrokernelTester()
964 .input_stride(2)
965 .output_stride(4)
966 .block_width(1)
967 .block_height(2)
968 .iterations(1)
969 .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
970}
971
972TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2) {
973 TransposeMicrokernelTester()
974 .input_stride(2)
975 .output_stride(2)
976 .block_width(2)
977 .block_height(2)
978 .iterations(1)
979 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
980}
981
982TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_1_4_bw_1_4) {
983 for(size_t i = 1; i <= 4; ++i){
984 for(size_t j = 1; j <= 4; ++j){
985 TransposeMicrokernelTester()
986 .input_stride(j)
987 .output_stride(i)
988 .block_width(j)
989 .block_height(i)
990 .iterations(1)
991 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
992 }
993 }
994}
995
996TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_4) {
997 TransposeMicrokernelTester()
998 .input_stride(4)
999 .output_stride(2)
1000 .block_width(4)
1001 .block_height(2)
1002 .iterations(1)
1003 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1004}
1005
1006TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_3_4) {
1007 for(size_t i = 3; i < 4; ++i){
1008 TransposeMicrokernelTester()
1009 .input_stride(i)
1010 .output_stride(2)
1011 .block_width(i)
1012 .block_height(2)
1013 .iterations(1)
1014 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1015 }
1016}
1017
1018TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_4_bw_3_4) {
1019 for(size_t i = 3; i < 4; ++i){
1020 TransposeMicrokernelTester()
1021 .input_stride(i)
1022 .output_stride(4)
1023 .block_width(i)
1024 .block_height(4)
1025 .iterations(1)
1026 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1027 }
1028}
1029
1030TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_4_bw_2) {
1031 TransposeMicrokernelTester()
1032 .input_stride(2)
1033 .output_stride(4)
1034 .block_width(2)
1035 .block_height(4)
1036 .iterations(1)
1037 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1038}
1039
1040TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_3_4_bw_2){
1041 for(size_t i = 3; i < 4; ++i){
1042 TransposeMicrokernelTester()
1043 .input_stride(2)
1044 .output_stride(i)
1045 .block_width(2)
1046 .block_height(i)
1047 .iterations(1)
1048 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1049 }
1050}
1051
1052TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_3_4_bw_4){
1053 for(size_t i = 3; i < 4; ++i){
1054 TransposeMicrokernelTester()
1055 .input_stride(4)
1056 .output_stride(i)
1057 .block_width(4)
1058 .block_height(i)
1059 .iterations(1)
1060 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1061 }
1062}
1063
1064TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_3_4_bw_3_4) {
1065 for(size_t i = 3; i < 4; ++i){
1066 for(size_t j = 3; j < 4; ++j){
1067 TransposeMicrokernelTester()
1068 .input_stride(j)
1069 .output_stride(i)
1070 .block_width(j)
1071 .block_height(i)
1072 .iterations(1)
1073 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1074 }
1075 }
1076}
1077
1078TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2_is_4) {
1079 TransposeMicrokernelTester()
1080 .input_stride(4)
1081 .output_stride(2)
1082 .block_width(2)
1083 .block_height(2)
1084 .iterations(1)
1085 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1086}
1087
1088TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2_os_4) {
1089 TransposeMicrokernelTester()
1090 .input_stride(2)
1091 .output_stride(4)
1092 .block_width(2)
1093 .block_height(2)
1094 .iterations(1)
1095 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1096}
1097
1098TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2_is_4_os_4) {
1099 TransposeMicrokernelTester()
1100 .input_stride(4)
1101 .output_stride(4)
1102 .block_width(2)
1103 .block_height(2)
1104 .iterations(1)
1105 .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1106}
1107
1108TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1) {
1109 TransposeMicrokernelTester()
1110 .input_stride(1)
1111 .output_stride(4)
1112 .block_width(1)
1113 .block_height(4)
1114 .iterations(1)
1115 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1116}
1117
1118TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_1_8_bw_1_2) {
1119 for(size_t i = 1; i <= 8; ++i){
1120 for(size_t j = 1; j <= 2; ++j){
1121 TransposeMicrokernelTester()
1122 .input_stride(j)
1123 .output_stride(i)
1124 .block_width(j)
1125 .block_height(i)
1126 .iterations(1)
1127 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1128 }
1129 }
1130}
1131
1132TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_2) {
1133 TransposeMicrokernelTester()
1134 .input_stride(2)
1135 .output_stride(4)
1136 .block_width(2)
1137 .block_height(4)
1138 .iterations(1)
1139 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1140}
1141
1142TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_2_2) {
1143 for(size_t i = 2; i < 2; ++i){
1144 TransposeMicrokernelTester()
1145 .input_stride(i)
1146 .output_stride(4)
1147 .block_width(i)
1148 .block_height(4)
1149 .iterations(1)
1150 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1151 }
1152}
1153
1154TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_8_bw_2_2) {
1155 for(size_t i = 2; i < 2; ++i){
1156 TransposeMicrokernelTester()
1157 .input_stride(i)
1158 .output_stride(8)
1159 .block_width(i)
1160 .block_height(8)
1161 .iterations(1)
1162 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1163 }
1164}
1165
1166TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_8_bw_1) {
1167 TransposeMicrokernelTester()
1168 .input_stride(1)
1169 .output_stride(8)
1170 .block_width(1)
1171 .block_height(8)
1172 .iterations(1)
1173 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1174}
1175
1176TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_5_8_bw_1){
1177 for(size_t i = 5; i < 8; ++i){
1178 TransposeMicrokernelTester()
1179 .input_stride(1)
1180 .output_stride(i)
1181 .block_width(1)
1182 .block_height(i)
1183 .iterations(1)
1184 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1185 }
1186}
1187
1188TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_5_8_bw_2){
1189 for(size_t i = 5; i < 8; ++i){
1190 TransposeMicrokernelTester()
1191 .input_stride(2)
1192 .output_stride(i)
1193 .block_width(2)
1194 .block_height(i)
1195 .iterations(1)
1196 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1197 }
1198}
1199
1200TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_5_8_bw_2_2) {
1201 for(size_t i = 5; i < 8; ++i){
1202 for(size_t j = 2; j < 2; ++j){
1203 TransposeMicrokernelTester()
1204 .input_stride(j)
1205 .output_stride(i)
1206 .block_width(j)
1207 .block_height(i)
1208 .iterations(1)
1209 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1210 }
1211 }
1212}
1213
1214TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1_is_2) {
1215 TransposeMicrokernelTester()
1216 .input_stride(2)
1217 .output_stride(4)
1218 .block_width(1)
1219 .block_height(4)
1220 .iterations(1)
1221 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1222}
1223
1224TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1_os_8) {
1225 TransposeMicrokernelTester()
1226 .input_stride(1)
1227 .output_stride(8)
1228 .block_width(1)
1229 .block_height(4)
1230 .iterations(1)
1231 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1232}
1233
1234TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1_is_2_os_8) {
1235 TransposeMicrokernelTester()
1236 .input_stride(2)
1237 .output_stride(8)
1238 .block_width(1)
1239 .block_height(4)
1240 .iterations(1)
1241 .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1242}
1243
1244TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2) {
1245 TransposeMicrokernelTester()
1246 .input_stride(2)
1247 .output_stride(4)
1248 .block_width(2)
1249 .block_height(4)
1250 .iterations(1)
1251 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1252}
1253
1254TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_1_8_bw_1_4) {
1255 for(size_t i = 1; i <= 8; ++i){
1256 for(size_t j = 1; j <= 4; ++j){
1257 TransposeMicrokernelTester()
1258 .input_stride(j)
1259 .output_stride(i)
1260 .block_width(j)
1261 .block_height(i)
1262 .iterations(1)
1263 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1264 }
1265 }
1266}
1267
1268TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_4) {
1269 TransposeMicrokernelTester()
1270 .input_stride(4)
1271 .output_stride(4)
1272 .block_width(4)
1273 .block_height(4)
1274 .iterations(1)
1275 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1276}
1277
1278TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_3_4) {
1279 for(size_t i = 3; i < 4; ++i){
1280 TransposeMicrokernelTester()
1281 .input_stride(i)
1282 .output_stride(4)
1283 .block_width(i)
1284 .block_height(4)
1285 .iterations(1)
1286 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1287 }
1288}
1289
1290TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_8_bw_3_4) {
1291 for(size_t i = 3; i < 4; ++i){
1292 TransposeMicrokernelTester()
1293 .input_stride(i)
1294 .output_stride(8)
1295 .block_width(i)
1296 .block_height(8)
1297 .iterations(1)
1298 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1299 }
1300}
1301
1302TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_8_bw_2) {
1303 TransposeMicrokernelTester()
1304 .input_stride(2)
1305 .output_stride(8)
1306 .block_width(2)
1307 .block_height(8)
1308 .iterations(1)
1309 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1310}
1311
1312TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_5_8_bw_2){
1313 for(size_t i = 5; i < 8; ++i){
1314 TransposeMicrokernelTester()
1315 .input_stride(2)
1316 .output_stride(i)
1317 .block_width(2)
1318 .block_height(i)
1319 .iterations(1)
1320 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1321 }
1322}
1323
1324TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_5_8_bw_4){
1325 for(size_t i = 5; i < 8; ++i){
1326 TransposeMicrokernelTester()
1327 .input_stride(4)
1328 .output_stride(i)
1329 .block_width(4)
1330 .block_height(i)
1331 .iterations(1)
1332 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1333 }
1334}
1335
1336TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_5_8_bw_3_4) {
1337 for(size_t i = 5; i < 8; ++i){
1338 for(size_t j = 3; j < 4; ++j){
1339 TransposeMicrokernelTester()
1340 .input_stride(j)
1341 .output_stride(i)
1342 .block_width(j)
1343 .block_height(i)
1344 .iterations(1)
1345 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1346 }
1347 }
1348}
1349
1350TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2_is_4) {
1351 TransposeMicrokernelTester()
1352 .input_stride(4)
1353 .output_stride(4)
1354 .block_width(2)
1355 .block_height(4)
1356 .iterations(1)
1357 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1358}
1359
1360TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2_os_8) {
1361 TransposeMicrokernelTester()
1362 .input_stride(2)
1363 .output_stride(8)
1364 .block_width(2)
1365 .block_height(4)
1366 .iterations(1)
1367 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1368}
1369
1370TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2_is_4_os_8) {
1371 TransposeMicrokernelTester()
1372 .input_stride(4)
1373 .output_stride(8)
1374 .block_width(2)
1375 .block_height(4)
1376 .iterations(1)
1377 .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1378}
1379
Alan Kelly5da6d382022-01-14 03:19:43 -08001380#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kellyf2b233b2022-01-31 02:53:57 -08001381 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001382 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001383 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001384 .input_stride(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001385 .output_stride(2)
Alan Kelly5da6d382022-01-14 03:19:43 -08001386 .block_width(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001387 .block_height(2)
1388 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001389 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001390 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001391
Alan Kellyf2b233b2022-01-31 02:53:57 -08001392 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_1_4_bw_1_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001393 TEST_REQUIRES_X86_SSE2;
1394 for(size_t i = 1; i <= 4; ++i){
1395 for(size_t j = 1; j <= 4; ++j){
1396 TransposeMicrokernelTester()
1397 .input_stride(j)
1398 .output_stride(i)
1399 .block_width(j)
1400 .block_height(i)
1401 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001402 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001403 }
1404 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001405 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001406
Alan Kellyf2b233b2022-01-31 02:53:57 -08001407 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001408 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001409 TransposeMicrokernelTester()
1410 .input_stride(4)
Alan Kellyd19bde92022-01-14 02:30:28 -08001411 .output_stride(2)
Alan Kelly5da6d382022-01-14 03:19:43 -08001412 .block_width(4)
Alan Kellyd19bde92022-01-14 02:30:28 -08001413 .block_height(2)
1414 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001415 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001416 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001417
Alan Kellyf2b233b2022-01-31 02:53:57 -08001418 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_3_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001419 TEST_REQUIRES_X86_SSE2;
1420 for(size_t i = 3; i < 4; ++i){
1421 TransposeMicrokernelTester()
1422 .input_stride(i)
1423 .output_stride(2)
1424 .block_width(i)
1425 .block_height(2)
1426 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001427 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001428 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001429 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001430
Alan Kellyf2b233b2022-01-31 02:53:57 -08001431 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_4_bw_3_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001432 TEST_REQUIRES_X86_SSE2;
1433 for(size_t i = 3; i < 4; ++i){
1434 TransposeMicrokernelTester()
1435 .input_stride(i)
1436 .output_stride(4)
1437 .block_width(i)
1438 .block_height(4)
1439 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001440 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001441 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001442 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001443
Alan Kellyf2b233b2022-01-31 02:53:57 -08001444 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_4_bw_2) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001445 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001446 TransposeMicrokernelTester()
1447 .input_stride(2)
Alan Kelly5da6d382022-01-14 03:19:43 -08001448 .output_stride(4)
Alan Kellyd19bde92022-01-14 02:30:28 -08001449 .block_width(2)
Alan Kelly5da6d382022-01-14 03:19:43 -08001450 .block_height(4)
Alan Kellyd19bde92022-01-14 02:30:28 -08001451 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001452 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001453 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001454
Alan Kellyf2b233b2022-01-31 02:53:57 -08001455 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_3_4_bw_2){
Alan Kelly5da6d382022-01-14 03:19:43 -08001456 TEST_REQUIRES_X86_SSE2;
1457 for(size_t i = 3; i < 4; ++i){
Alan Kellyd19bde92022-01-14 02:30:28 -08001458 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001459 .input_stride(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001460 .output_stride(i)
Alan Kelly5da6d382022-01-14 03:19:43 -08001461 .block_width(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001462 .block_height(i)
1463 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001464 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001465 }
1466 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001467
Alan Kellyf2b233b2022-01-31 02:53:57 -08001468 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_3_4_bw_4){
Alan Kelly5da6d382022-01-14 03:19:43 -08001469 TEST_REQUIRES_X86_SSE2;
1470 for(size_t i = 3; i < 4; ++i){
Alan Kellyd19bde92022-01-14 02:30:28 -08001471 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001472 .input_stride(4)
Alan Kellyd19bde92022-01-14 02:30:28 -08001473 .output_stride(i)
Alan Kelly5da6d382022-01-14 03:19:43 -08001474 .block_width(4)
Alan Kellyd19bde92022-01-14 02:30:28 -08001475 .block_height(i)
1476 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001477 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001478 }
1479 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001480
Alan Kellyf2b233b2022-01-31 02:53:57 -08001481 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_3_4_bw_3_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001482 TEST_REQUIRES_X86_SSE2;
1483 for(size_t i = 3; i < 4; ++i){
1484 for(size_t j = 3; j < 4; ++j){
1485 TransposeMicrokernelTester()
1486 .input_stride(j)
1487 .output_stride(i)
1488 .block_width(j)
1489 .block_height(i)
1490 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001491 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001492 }
1493 }
1494 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001495
Alan Kellyf2b233b2022-01-31 02:53:57 -08001496 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2_is_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001497 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001498 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001499 .input_stride(4)
Alan Kellyd19bde92022-01-14 02:30:28 -08001500 .output_stride(2)
Alan Kelly5da6d382022-01-14 03:19:43 -08001501 .block_width(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001502 .block_height(2)
1503 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001504 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001505 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001506
Alan Kellyf2b233b2022-01-31 02:53:57 -08001507 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2_os_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001508 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001509 TransposeMicrokernelTester()
1510 .input_stride(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001511 .output_stride(4)
Alan Kellyd19bde92022-01-14 02:30:28 -08001512 .block_width(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001513 .block_height(2)
1514 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001515 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001516 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001517
Alan Kellyf2b233b2022-01-31 02:53:57 -08001518 TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2_is_4_os_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001519 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001520 TransposeMicrokernelTester()
1521 .input_stride(4)
Alan Kelly5da6d382022-01-14 03:19:43 -08001522 .output_stride(4)
1523 .block_width(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001524 .block_height(2)
1525 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001526 .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001527 }
Alan Kelly5da6d382022-01-14 03:19:43 -08001528#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kellyd19bde92022-01-14 02:30:28 -08001529
Alan Kellyd19bde92022-01-14 02:30:28 -08001530
Alan Kelly5da6d382022-01-14 03:19:43 -08001531#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1532 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_2) {
1533 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001534 TransposeMicrokernelTester()
1535 .input_stride(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001536 .output_stride(2)
Alan Kelly5da6d382022-01-14 03:19:43 -08001537 .block_width(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001538 .block_height(2)
1539 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001540 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001541 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001542
Alan Kelly5da6d382022-01-14 03:19:43 -08001543 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_1_4_bw_1_4) {
1544 TEST_REQUIRES_X86_SSE2;
1545 for(size_t i = 1; i <= 4; ++i){
1546 for(size_t j = 1; j <= 4; ++j){
1547 TransposeMicrokernelTester()
1548 .input_stride(j)
1549 .output_stride(i)
1550 .block_width(j)
1551 .block_height(i)
1552 .iterations(1)
1553 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1554 }
1555 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001556 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001557
Alan Kelly5da6d382022-01-14 03:19:43 -08001558 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_4) {
1559 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001560 TransposeMicrokernelTester()
1561 .input_stride(4)
Alan Kelly5da6d382022-01-14 03:19:43 -08001562 .output_stride(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001563 .block_width(4)
Alan Kelly5da6d382022-01-14 03:19:43 -08001564 .block_height(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001565 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001566 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001567 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001568
Alan Kelly5da6d382022-01-14 03:19:43 -08001569 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_3_4) {
1570 TEST_REQUIRES_X86_SSE2;
1571 for(size_t i = 3; i < 4; ++i){
Alan Kellyd19bde92022-01-14 02:30:28 -08001572 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001573 .input_stride(i)
1574 .output_stride(2)
1575 .block_width(i)
1576 .block_height(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001577 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001578 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001579 }
1580 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001581
Alan Kelly5da6d382022-01-14 03:19:43 -08001582 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_4_bw_3_4) {
1583 TEST_REQUIRES_X86_SSE2;
1584 for(size_t i = 3; i < 4; ++i){
Alan Kellyd19bde92022-01-14 02:30:28 -08001585 TransposeMicrokernelTester()
Alan Kelly5da6d382022-01-14 03:19:43 -08001586 .input_stride(i)
1587 .output_stride(4)
1588 .block_width(i)
1589 .block_height(4)
Alan Kellyd19bde92022-01-14 02:30:28 -08001590 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001591 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001592 }
1593 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001594
Alan Kelly5da6d382022-01-14 03:19:43 -08001595 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_4_bw_2) {
1596 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001597 TransposeMicrokernelTester()
1598 .input_stride(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001599 .output_stride(4)
Alan Kelly5da6d382022-01-14 03:19:43 -08001600 .block_width(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001601 .block_height(4)
1602 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001603 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001604 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001605
Alan Kelly5da6d382022-01-14 03:19:43 -08001606 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_3_4_bw_2){
1607 TEST_REQUIRES_X86_SSE2;
1608 for(size_t i = 3; i < 4; ++i){
1609 TransposeMicrokernelTester()
1610 .input_stride(2)
1611 .output_stride(i)
1612 .block_width(2)
1613 .block_height(i)
1614 .iterations(1)
1615 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1616 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001617 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001618
Alan Kelly5da6d382022-01-14 03:19:43 -08001619 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_3_4_bw_4){
1620 TEST_REQUIRES_X86_SSE2;
1621 for(size_t i = 3; i < 4; ++i){
1622 TransposeMicrokernelTester()
1623 .input_stride(4)
1624 .output_stride(i)
1625 .block_width(4)
1626 .block_height(i)
1627 .iterations(1)
1628 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1629 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001630 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001631
Alan Kelly5da6d382022-01-14 03:19:43 -08001632 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_3_4_bw_3_4) {
1633 TEST_REQUIRES_X86_SSE2;
1634 for(size_t i = 3; i < 4; ++i){
1635 for(size_t j = 3; j < 4; ++j){
1636 TransposeMicrokernelTester()
1637 .input_stride(j)
1638 .output_stride(i)
1639 .block_width(j)
1640 .block_height(i)
1641 .iterations(1)
1642 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1643 }
1644 }
1645 }
1646
1647 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_2_is_4) {
1648 TEST_REQUIRES_X86_SSE2;
Alan Kellyd19bde92022-01-14 02:30:28 -08001649 TransposeMicrokernelTester()
1650 .input_stride(4)
Alan Kelly5da6d382022-01-14 03:19:43 -08001651 .output_stride(2)
1652 .block_width(2)
1653 .block_height(2)
Alan Kellyd19bde92022-01-14 02:30:28 -08001654 .iterations(1)
Alan Kelly5da6d382022-01-14 03:19:43 -08001655 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
Alan Kellyd19bde92022-01-14 02:30:28 -08001656 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001657
Alan Kelly5da6d382022-01-14 03:19:43 -08001658 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_2_os_4) {
1659 TEST_REQUIRES_X86_SSE2;
1660 TransposeMicrokernelTester()
1661 .input_stride(2)
1662 .output_stride(4)
1663 .block_width(2)
1664 .block_height(2)
1665 .iterations(1)
1666 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1667 }
1668
1669 TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_2_is_4_os_4) {
1670 TEST_REQUIRES_X86_SSE2;
1671 TransposeMicrokernelTester()
1672 .input_stride(4)
1673 .output_stride(4)
1674 .block_width(2)
1675 .block_height(2)
1676 .iterations(1)
1677 .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1678 }
1679#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1680
1681
1682#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1683 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_2) {
1684 TEST_REQUIRES_X86_SSE2;
1685 TransposeMicrokernelTester()
1686 .input_stride(2)
1687 .output_stride(2)
1688 .block_width(2)
1689 .block_height(2)
1690 .iterations(1)
1691 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1692 }
1693
1694 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_1_4_bw_1_4) {
1695 TEST_REQUIRES_X86_SSE2;
1696 for(size_t i = 1; i <= 4; ++i){
1697 for(size_t j = 1; j <= 4; ++j){
1698 TransposeMicrokernelTester()
1699 .input_stride(j)
1700 .output_stride(i)
1701 .block_width(j)
1702 .block_height(i)
1703 .iterations(1)
1704 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1705 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001706 }
1707 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001708
Alan Kelly5da6d382022-01-14 03:19:43 -08001709 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_4) {
1710 TEST_REQUIRES_X86_SSE2;
1711 TransposeMicrokernelTester()
1712 .input_stride(4)
1713 .output_stride(2)
1714 .block_width(4)
1715 .block_height(2)
1716 .iterations(1)
1717 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1718 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001719
Alan Kelly5da6d382022-01-14 03:19:43 -08001720 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_3_4) {
1721 TEST_REQUIRES_X86_SSE2;
1722 for(size_t i = 3; i < 4; ++i){
1723 TransposeMicrokernelTester()
1724 .input_stride(i)
1725 .output_stride(2)
1726 .block_width(i)
1727 .block_height(2)
1728 .iterations(1)
1729 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1730 }
1731 }
Alan Kellyd19bde92022-01-14 02:30:28 -08001732
Alan Kelly5da6d382022-01-14 03:19:43 -08001733 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_4_bw_3_4) {
1734 TEST_REQUIRES_X86_SSE2;
1735 for(size_t i = 3; i < 4; ++i){
1736 TransposeMicrokernelTester()
1737 .input_stride(i)
1738 .output_stride(4)
1739 .block_width(i)
1740 .block_height(4)
1741 .iterations(1)
1742 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1743 }
1744 }
1745
1746 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_4_bw_2) {
1747 TEST_REQUIRES_X86_SSE2;
1748 TransposeMicrokernelTester()
1749 .input_stride(2)
1750 .output_stride(4)
1751 .block_width(2)
1752 .block_height(4)
1753 .iterations(1)
1754 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1755 }
1756
1757 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_3_4_bw_2){
1758 TEST_REQUIRES_X86_SSE2;
1759 for(size_t i = 3; i < 4; ++i){
1760 TransposeMicrokernelTester()
1761 .input_stride(2)
1762 .output_stride(i)
1763 .block_width(2)
1764 .block_height(i)
1765 .iterations(1)
1766 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1767 }
1768 }
1769
1770 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_3_4_bw_4){
1771 TEST_REQUIRES_X86_SSE2;
1772 for(size_t i = 3; i < 4; ++i){
1773 TransposeMicrokernelTester()
1774 .input_stride(4)
1775 .output_stride(i)
1776 .block_width(4)
1777 .block_height(i)
1778 .iterations(1)
1779 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1780 }
1781 }
1782
1783 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_3_4_bw_3_4) {
1784 TEST_REQUIRES_X86_SSE2;
1785 for(size_t i = 3; i < 4; ++i){
1786 for(size_t j = 3; j < 4; ++j){
1787 TransposeMicrokernelTester()
1788 .input_stride(j)
1789 .output_stride(i)
1790 .block_width(j)
1791 .block_height(i)
1792 .iterations(1)
1793 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1794 }
1795 }
1796 }
1797
1798 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_2_is_4) {
1799 TEST_REQUIRES_X86_SSE2;
1800 TransposeMicrokernelTester()
1801 .input_stride(4)
1802 .output_stride(2)
1803 .block_width(2)
1804 .block_height(2)
1805 .iterations(1)
1806 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1807 }
1808
1809 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_2_os_4) {
1810 TEST_REQUIRES_X86_SSE2;
1811 TransposeMicrokernelTester()
1812 .input_stride(2)
1813 .output_stride(4)
1814 .block_width(2)
1815 .block_height(2)
1816 .iterations(1)
1817 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1818 }
1819
1820 TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_2_is_4_os_4) {
1821 TEST_REQUIRES_X86_SSE2;
1822 TransposeMicrokernelTester()
1823 .input_stride(4)
1824 .output_stride(4)
1825 .block_width(2)
1826 .block_height(2)
1827 .iterations(1)
1828 .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1829 }
1830#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1831
1832
1833#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Alan Kellyf2b233b2022-01-31 02:53:57 -08001834 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001835 TEST_REQUIRES_X86_SSE2;
1836 TransposeMicrokernelTester()
1837 .input_stride(2)
1838 .output_stride(2)
1839 .block_width(2)
1840 .block_height(2)
1841 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001842 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001843 }
1844
Alan Kellyf2b233b2022-01-31 02:53:57 -08001845 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_1_4_bw_1_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001846 TEST_REQUIRES_X86_SSE2;
1847 for(size_t i = 1; i <= 4; ++i){
1848 for(size_t j = 1; j <= 4; ++j){
1849 TransposeMicrokernelTester()
1850 .input_stride(j)
1851 .output_stride(i)
1852 .block_width(j)
1853 .block_height(i)
1854 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001855 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001856 }
1857 }
1858 }
1859
Alan Kellyf2b233b2022-01-31 02:53:57 -08001860 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001861 TEST_REQUIRES_X86_SSE2;
1862 TransposeMicrokernelTester()
1863 .input_stride(4)
1864 .output_stride(2)
1865 .block_width(4)
1866 .block_height(2)
1867 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001868 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001869 }
1870
Alan Kellyf2b233b2022-01-31 02:53:57 -08001871 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_3_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001872 TEST_REQUIRES_X86_SSE2;
1873 for(size_t i = 3; i < 4; ++i){
1874 TransposeMicrokernelTester()
1875 .input_stride(i)
1876 .output_stride(2)
1877 .block_width(i)
1878 .block_height(2)
1879 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001880 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001881 }
1882 }
1883
Alan Kellyf2b233b2022-01-31 02:53:57 -08001884 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_4_bw_3_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001885 TEST_REQUIRES_X86_SSE2;
1886 for(size_t i = 3; i < 4; ++i){
1887 TransposeMicrokernelTester()
1888 .input_stride(i)
1889 .output_stride(4)
1890 .block_width(i)
1891 .block_height(4)
1892 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001893 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001894 }
1895 }
1896
Alan Kellyf2b233b2022-01-31 02:53:57 -08001897 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_4_bw_2) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001898 TEST_REQUIRES_X86_SSE2;
1899 TransposeMicrokernelTester()
1900 .input_stride(2)
1901 .output_stride(4)
1902 .block_width(2)
1903 .block_height(4)
1904 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001905 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001906 }
1907
Alan Kellyf2b233b2022-01-31 02:53:57 -08001908 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_3_4_bw_2){
Alan Kelly5da6d382022-01-14 03:19:43 -08001909 TEST_REQUIRES_X86_SSE2;
1910 for(size_t i = 3; i < 4; ++i){
1911 TransposeMicrokernelTester()
1912 .input_stride(2)
1913 .output_stride(i)
1914 .block_width(2)
1915 .block_height(i)
1916 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001917 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001918 }
1919 }
1920
Alan Kellyf2b233b2022-01-31 02:53:57 -08001921 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_3_4_bw_4){
Alan Kelly5da6d382022-01-14 03:19:43 -08001922 TEST_REQUIRES_X86_SSE2;
1923 for(size_t i = 3; i < 4; ++i){
1924 TransposeMicrokernelTester()
1925 .input_stride(4)
1926 .output_stride(i)
1927 .block_width(4)
1928 .block_height(i)
1929 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001930 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001931 }
1932 }
1933
Alan Kellyf2b233b2022-01-31 02:53:57 -08001934 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_3_4_bw_3_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001935 TEST_REQUIRES_X86_SSE2;
1936 for(size_t i = 3; i < 4; ++i){
1937 for(size_t j = 3; j < 4; ++j){
1938 TransposeMicrokernelTester()
1939 .input_stride(j)
1940 .output_stride(i)
1941 .block_width(j)
1942 .block_height(i)
1943 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001944 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001945 }
1946 }
1947 }
1948
Alan Kellyf2b233b2022-01-31 02:53:57 -08001949 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2_is_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001950 TEST_REQUIRES_X86_SSE2;
1951 TransposeMicrokernelTester()
1952 .input_stride(4)
1953 .output_stride(2)
1954 .block_width(2)
1955 .block_height(2)
1956 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001957 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001958 }
1959
Alan Kellyf2b233b2022-01-31 02:53:57 -08001960 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2_os_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001961 TEST_REQUIRES_X86_SSE2;
1962 TransposeMicrokernelTester()
1963 .input_stride(2)
1964 .output_stride(4)
1965 .block_width(2)
1966 .block_height(2)
1967 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001968 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001969 }
1970
Alan Kellyf2b233b2022-01-31 02:53:57 -08001971 TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2_is_4_os_4) {
Alan Kelly5da6d382022-01-14 03:19:43 -08001972 TEST_REQUIRES_X86_SSE2;
1973 TransposeMicrokernelTester()
1974 .input_stride(4)
1975 .output_stride(4)
1976 .block_width(2)
1977 .block_height(2)
1978 .iterations(1)
Alan Kellyf2b233b2022-01-31 02:53:57 -08001979 .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
Alan Kelly5da6d382022-01-14 03:19:43 -08001980 }
1981#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1982
1983
1984#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1985 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_2) {
1986 TEST_REQUIRES_X86_SSE2;
1987 TransposeMicrokernelTester()
1988 .input_stride(2)
1989 .output_stride(2)
1990 .block_width(2)
1991 .block_height(2)
1992 .iterations(1)
1993 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
1994 }
1995
1996 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_1_4_bw_1_4) {
1997 TEST_REQUIRES_X86_SSE2;
1998 for(size_t i = 1; i <= 4; ++i){
1999 for(size_t j = 1; j <= 4; ++j){
2000 TransposeMicrokernelTester()
2001 .input_stride(j)
2002 .output_stride(i)
2003 .block_width(j)
2004 .block_height(i)
2005 .iterations(1)
2006 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2007 }
2008 }
2009 }
2010
2011 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_4) {
2012 TEST_REQUIRES_X86_SSE2;
2013 TransposeMicrokernelTester()
2014 .input_stride(4)
2015 .output_stride(2)
2016 .block_width(4)
2017 .block_height(2)
2018 .iterations(1)
2019 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2020 }
2021
2022 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_3_4) {
2023 TEST_REQUIRES_X86_SSE2;
2024 for(size_t i = 3; i < 4; ++i){
2025 TransposeMicrokernelTester()
2026 .input_stride(i)
2027 .output_stride(2)
2028 .block_width(i)
2029 .block_height(2)
2030 .iterations(1)
2031 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2032 }
2033 }
2034
2035 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_4_bw_3_4) {
2036 TEST_REQUIRES_X86_SSE2;
2037 for(size_t i = 3; i < 4; ++i){
2038 TransposeMicrokernelTester()
2039 .input_stride(i)
2040 .output_stride(4)
2041 .block_width(i)
2042 .block_height(4)
2043 .iterations(1)
2044 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2045 }
2046 }
2047
2048 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_4_bw_2) {
2049 TEST_REQUIRES_X86_SSE2;
2050 TransposeMicrokernelTester()
2051 .input_stride(2)
2052 .output_stride(4)
2053 .block_width(2)
2054 .block_height(4)
2055 .iterations(1)
2056 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2057 }
2058
2059 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_3_4_bw_2){
2060 TEST_REQUIRES_X86_SSE2;
2061 for(size_t i = 3; i < 4; ++i){
2062 TransposeMicrokernelTester()
2063 .input_stride(2)
2064 .output_stride(i)
2065 .block_width(2)
2066 .block_height(i)
2067 .iterations(1)
2068 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2069 }
2070 }
2071
2072 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_3_4_bw_4){
2073 TEST_REQUIRES_X86_SSE2;
2074 for(size_t i = 3; i < 4; ++i){
2075 TransposeMicrokernelTester()
2076 .input_stride(4)
2077 .output_stride(i)
2078 .block_width(4)
2079 .block_height(i)
2080 .iterations(1)
2081 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2082 }
2083 }
2084
2085 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_3_4_bw_3_4) {
2086 TEST_REQUIRES_X86_SSE2;
2087 for(size_t i = 3; i < 4; ++i){
2088 for(size_t j = 3; j < 4; ++j){
2089 TransposeMicrokernelTester()
2090 .input_stride(j)
2091 .output_stride(i)
2092 .block_width(j)
2093 .block_height(i)
2094 .iterations(1)
2095 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2096 }
2097 }
2098 }
2099
2100 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_2_is_4) {
2101 TEST_REQUIRES_X86_SSE2;
2102 TransposeMicrokernelTester()
2103 .input_stride(4)
2104 .output_stride(2)
2105 .block_width(2)
2106 .block_height(2)
2107 .iterations(1)
2108 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2109 }
2110
2111 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_2_os_4) {
2112 TEST_REQUIRES_X86_SSE2;
2113 TransposeMicrokernelTester()
2114 .input_stride(2)
2115 .output_stride(4)
2116 .block_width(2)
2117 .block_height(2)
2118 .iterations(1)
2119 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2120 }
2121
2122 TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_2_is_4_os_4) {
2123 TEST_REQUIRES_X86_SSE2;
2124 TransposeMicrokernelTester()
2125 .input_stride(4)
2126 .output_stride(4)
2127 .block_width(2)
2128 .block_height(2)
2129 .iterations(1)
2130 .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2131 }
2132#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2133
2134
2135#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2136 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_2) {
2137 TEST_REQUIRES_X86_SSE2;
2138 TransposeMicrokernelTester()
2139 .input_stride(2)
2140 .output_stride(2)
2141 .block_width(2)
2142 .block_height(2)
2143 .iterations(1)
2144 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2145 }
2146
2147 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_1_4_bw_1_4) {
2148 TEST_REQUIRES_X86_SSE2;
2149 for(size_t i = 1; i <= 4; ++i){
2150 for(size_t j = 1; j <= 4; ++j){
2151 TransposeMicrokernelTester()
2152 .input_stride(j)
2153 .output_stride(i)
2154 .block_width(j)
2155 .block_height(i)
2156 .iterations(1)
2157 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2158 }
2159 }
2160 }
2161
2162 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_4) {
2163 TEST_REQUIRES_X86_SSE2;
2164 TransposeMicrokernelTester()
2165 .input_stride(4)
2166 .output_stride(2)
2167 .block_width(4)
2168 .block_height(2)
2169 .iterations(1)
2170 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2171 }
2172
2173 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_3_4) {
2174 TEST_REQUIRES_X86_SSE2;
2175 for(size_t i = 3; i < 4; ++i){
2176 TransposeMicrokernelTester()
2177 .input_stride(i)
2178 .output_stride(2)
2179 .block_width(i)
2180 .block_height(2)
2181 .iterations(1)
2182 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2183 }
2184 }
2185
2186 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_4_bw_3_4) {
2187 TEST_REQUIRES_X86_SSE2;
2188 for(size_t i = 3; i < 4; ++i){
2189 TransposeMicrokernelTester()
2190 .input_stride(i)
2191 .output_stride(4)
2192 .block_width(i)
2193 .block_height(4)
2194 .iterations(1)
2195 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2196 }
2197 }
2198
2199 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_4_bw_2) {
2200 TEST_REQUIRES_X86_SSE2;
2201 TransposeMicrokernelTester()
2202 .input_stride(2)
2203 .output_stride(4)
2204 .block_width(2)
2205 .block_height(4)
2206 .iterations(1)
2207 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2208 }
2209
2210 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_3_4_bw_2){
2211 TEST_REQUIRES_X86_SSE2;
2212 for(size_t i = 3; i < 4; ++i){
2213 TransposeMicrokernelTester()
2214 .input_stride(2)
2215 .output_stride(i)
2216 .block_width(2)
2217 .block_height(i)
2218 .iterations(1)
2219 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2220 }
2221 }
2222
2223 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_3_4_bw_4){
2224 TEST_REQUIRES_X86_SSE2;
2225 for(size_t i = 3; i < 4; ++i){
2226 TransposeMicrokernelTester()
2227 .input_stride(4)
2228 .output_stride(i)
2229 .block_width(4)
2230 .block_height(i)
2231 .iterations(1)
2232 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2233 }
2234 }
2235
2236 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_3_4_bw_3_4) {
2237 TEST_REQUIRES_X86_SSE2;
2238 for(size_t i = 3; i < 4; ++i){
2239 for(size_t j = 3; j < 4; ++j){
2240 TransposeMicrokernelTester()
2241 .input_stride(j)
2242 .output_stride(i)
2243 .block_width(j)
2244 .block_height(i)
2245 .iterations(1)
2246 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2247 }
2248 }
2249 }
2250
2251 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_2_is_4) {
2252 TEST_REQUIRES_X86_SSE2;
2253 TransposeMicrokernelTester()
2254 .input_stride(4)
2255 .output_stride(2)
2256 .block_width(2)
2257 .block_height(2)
2258 .iterations(1)
2259 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2260 }
2261
2262 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_2_os_4) {
2263 TEST_REQUIRES_X86_SSE2;
2264 TransposeMicrokernelTester()
2265 .input_stride(2)
2266 .output_stride(4)
2267 .block_width(2)
2268 .block_height(2)
2269 .iterations(1)
2270 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2271 }
2272
2273 TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_2_is_4_os_4) {
2274 TEST_REQUIRES_X86_SSE2;
2275 TransposeMicrokernelTester()
2276 .input_stride(4)
2277 .output_stride(4)
2278 .block_width(2)
2279 .block_height(2)
2280 .iterations(1)
2281 .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2282 }
2283#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64