blob: 44927a7a20e45583812dd8055f4968b8bc5b5c2b [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/x32-packx.yaml
8// Generator: tools/generate-pack-test.py
9
10
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <gtest/gtest.h>
12
Marat Dukhan1dadbf72019-10-01 10:46:20 -070013#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/isa-checks.h>
15
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/packx.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017#include "pack-microkernel-tester.h"
18
19
Marat Dukhan1dadbf72019-10-01 10:46:20 -070020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070021 TEST(X32_PACKX_4X__NEON_ST4, k_eq_4) {
22 TEST_REQUIRES_ARM_NEON;
23 PackMicrokernelTester()
24 .mr(4)
25 .m(4)
26 .k(4)
27 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
28 }
29
30 TEST(X32_PACKX_4X__NEON_ST4, k_eq_4_subtile) {
31 TEST_REQUIRES_ARM_NEON;
32 for (size_t m = 1; m <= 4; m++) {
33 PackMicrokernelTester()
34 .mr(4)
35 .m(m)
36 .k(4)
37 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
38 }
39 }
40
41 TEST(X32_PACKX_4X__NEON_ST4, k_lt_4) {
42 TEST_REQUIRES_ARM_NEON;
43 for (size_t k = 1; k < 4; k++) {
44 PackMicrokernelTester()
45 .mr(4)
46 .m(4)
47 .k(k)
48 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
49 }
50 }
51
52 TEST(X32_PACKX_4X__NEON_ST4, k_lt_4_subtile) {
53 TEST_REQUIRES_ARM_NEON;
54 for (size_t k = 1; k < 4; k++) {
55 for (size_t m = 1; m <= 4; m++) {
56 PackMicrokernelTester()
57 .mr(4)
58 .m(m)
59 .k(k)
60 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
61 }
62 }
63 }
64
65 TEST(X32_PACKX_4X__NEON_ST4, k_gt_4) {
66 TEST_REQUIRES_ARM_NEON;
67 for (size_t k = 5; k < 8; k++) {
68 PackMicrokernelTester()
69 .mr(4)
70 .m(4)
71 .k(k)
72 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
73 }
74 }
75
76 TEST(X32_PACKX_4X__NEON_ST4, k_gt_4_subtile) {
77 TEST_REQUIRES_ARM_NEON;
78 for (size_t k = 5; k < 8; k++) {
79 for (size_t m = 1; m <= 4; m++) {
80 PackMicrokernelTester()
81 .mr(4)
82 .m(m)
83 .k(k)
84 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
85 }
86 }
87 }
88
89 TEST(X32_PACKX_4X__NEON_ST4, k_div_4) {
90 TEST_REQUIRES_ARM_NEON;
91 for (size_t k = 8; k < 40; k += 4) {
92 PackMicrokernelTester()
93 .mr(4)
94 .m(4)
95 .k(k)
96 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
97 }
98 }
99
100 TEST(X32_PACKX_4X__NEON_ST4, k_div_4_subtile) {
101 TEST_REQUIRES_ARM_NEON;
102 for (size_t k = 8; k < 40; k += 4) {
103 for (size_t m = 1; m <= 4; m++) {
104 PackMicrokernelTester()
105 .mr(4)
106 .m(m)
107 .k(k)
108 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
109 }
110 }
111 }
112
113 TEST(X32_PACKX_4X__NEON_ST4, strided_x) {
114 TEST_REQUIRES_ARM_NEON;
115 for (size_t k = 1; k <= 20; k += 5) {
116 PackMicrokernelTester()
117 .mr(4)
118 .m(4)
119 .k(k)
120 .x_stride(23)
121 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
122 }
123 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700124#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700125
126
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700127#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700128 TEST(X32_PACKX_4X__SSE, k_eq_4) {
129 TEST_REQUIRES_X86_SSE;
130 PackMicrokernelTester()
131 .mr(4)
132 .m(4)
133 .k(4)
134 .Test(xnn_x32_packx_ukernel_4x__sse);
135 }
136
137 TEST(X32_PACKX_4X__SSE, k_eq_4_subtile) {
138 TEST_REQUIRES_X86_SSE;
139 for (size_t m = 1; m <= 4; m++) {
140 PackMicrokernelTester()
141 .mr(4)
142 .m(m)
143 .k(4)
144 .Test(xnn_x32_packx_ukernel_4x__sse);
145 }
146 }
147
148 TEST(X32_PACKX_4X__SSE, k_lt_4) {
149 TEST_REQUIRES_X86_SSE;
150 for (size_t k = 1; k < 4; k++) {
151 PackMicrokernelTester()
152 .mr(4)
153 .m(4)
154 .k(k)
155 .Test(xnn_x32_packx_ukernel_4x__sse);
156 }
157 }
158
159 TEST(X32_PACKX_4X__SSE, k_lt_4_subtile) {
160 TEST_REQUIRES_X86_SSE;
161 for (size_t k = 1; k < 4; k++) {
162 for (size_t m = 1; m <= 4; m++) {
163 PackMicrokernelTester()
164 .mr(4)
165 .m(m)
166 .k(k)
167 .Test(xnn_x32_packx_ukernel_4x__sse);
168 }
169 }
170 }
171
172 TEST(X32_PACKX_4X__SSE, k_gt_4) {
173 TEST_REQUIRES_X86_SSE;
174 for (size_t k = 5; k < 8; k++) {
175 PackMicrokernelTester()
176 .mr(4)
177 .m(4)
178 .k(k)
179 .Test(xnn_x32_packx_ukernel_4x__sse);
180 }
181 }
182
183 TEST(X32_PACKX_4X__SSE, k_gt_4_subtile) {
184 TEST_REQUIRES_X86_SSE;
185 for (size_t k = 5; k < 8; k++) {
186 for (size_t m = 1; m <= 4; m++) {
187 PackMicrokernelTester()
188 .mr(4)
189 .m(m)
190 .k(k)
191 .Test(xnn_x32_packx_ukernel_4x__sse);
192 }
193 }
194 }
195
196 TEST(X32_PACKX_4X__SSE, k_div_4) {
197 TEST_REQUIRES_X86_SSE;
198 for (size_t k = 8; k < 40; k += 4) {
199 PackMicrokernelTester()
200 .mr(4)
201 .m(4)
202 .k(k)
203 .Test(xnn_x32_packx_ukernel_4x__sse);
204 }
205 }
206
207 TEST(X32_PACKX_4X__SSE, k_div_4_subtile) {
208 TEST_REQUIRES_X86_SSE;
209 for (size_t k = 8; k < 40; k += 4) {
210 for (size_t m = 1; m <= 4; m++) {
211 PackMicrokernelTester()
212 .mr(4)
213 .m(m)
214 .k(k)
215 .Test(xnn_x32_packx_ukernel_4x__sse);
216 }
217 }
218 }
219
220 TEST(X32_PACKX_4X__SSE, strided_x) {
221 TEST_REQUIRES_X86_SSE;
222 for (size_t k = 1; k <= 20; k += 5) {
223 PackMicrokernelTester()
224 .mr(4)
225 .m(4)
226 .k(k)
227 .x_stride(23)
228 .Test(xnn_x32_packx_ukernel_4x__sse);
229 }
230 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700231#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700232
233
Marat Dukhan66d99e92020-07-16 12:56:21 -0700234#if XNN_ARCH_WASMSIMD
235 TEST(X32_PACKX_4X__WASMSIMD, k_eq_4) {
236 PackMicrokernelTester()
237 .mr(4)
238 .m(4)
239 .k(4)
240 .Test(xnn_x32_packx_ukernel_4x__wasmsimd);
241 }
242
243 TEST(X32_PACKX_4X__WASMSIMD, k_eq_4_subtile) {
244 for (size_t m = 1; m <= 4; m++) {
245 PackMicrokernelTester()
246 .mr(4)
247 .m(m)
248 .k(4)
249 .Test(xnn_x32_packx_ukernel_4x__wasmsimd);
250 }
251 }
252
253 TEST(X32_PACKX_4X__WASMSIMD, k_lt_4) {
254 for (size_t k = 1; k < 4; k++) {
255 PackMicrokernelTester()
256 .mr(4)
257 .m(4)
258 .k(k)
259 .Test(xnn_x32_packx_ukernel_4x__wasmsimd);
260 }
261 }
262
263 TEST(X32_PACKX_4X__WASMSIMD, k_lt_4_subtile) {
264 for (size_t k = 1; k < 4; k++) {
265 for (size_t m = 1; m <= 4; m++) {
266 PackMicrokernelTester()
267 .mr(4)
268 .m(m)
269 .k(k)
270 .Test(xnn_x32_packx_ukernel_4x__wasmsimd);
271 }
272 }
273 }
274
275 TEST(X32_PACKX_4X__WASMSIMD, k_gt_4) {
276 for (size_t k = 5; k < 8; k++) {
277 PackMicrokernelTester()
278 .mr(4)
279 .m(4)
280 .k(k)
281 .Test(xnn_x32_packx_ukernel_4x__wasmsimd);
282 }
283 }
284
285 TEST(X32_PACKX_4X__WASMSIMD, k_gt_4_subtile) {
286 for (size_t k = 5; k < 8; k++) {
287 for (size_t m = 1; m <= 4; m++) {
288 PackMicrokernelTester()
289 .mr(4)
290 .m(m)
291 .k(k)
292 .Test(xnn_x32_packx_ukernel_4x__wasmsimd);
293 }
294 }
295 }
296
297 TEST(X32_PACKX_4X__WASMSIMD, k_div_4) {
298 for (size_t k = 8; k < 40; k += 4) {
299 PackMicrokernelTester()
300 .mr(4)
301 .m(4)
302 .k(k)
303 .Test(xnn_x32_packx_ukernel_4x__wasmsimd);
304 }
305 }
306
307 TEST(X32_PACKX_4X__WASMSIMD, k_div_4_subtile) {
308 for (size_t k = 8; k < 40; k += 4) {
309 for (size_t m = 1; m <= 4; m++) {
310 PackMicrokernelTester()
311 .mr(4)
312 .m(m)
313 .k(k)
314 .Test(xnn_x32_packx_ukernel_4x__wasmsimd);
315 }
316 }
317 }
318
319 TEST(X32_PACKX_4X__WASMSIMD, strided_x) {
320 for (size_t k = 1; k <= 20; k += 5) {
321 PackMicrokernelTester()
322 .mr(4)
323 .m(4)
324 .k(k)
325 .x_stride(23)
326 .Test(xnn_x32_packx_ukernel_4x__wasmsimd);
327 }
328 }
329#endif // XNN_ARCH_WASMSIMD
330
331
XNNPACK Teamb455b122019-09-27 18:10:33 -0700332TEST(X32_PACKX_2X__SCALAR, k_eq_1) {
333 PackMicrokernelTester()
334 .mr(2)
335 .m(2)
336 .k(1)
337 .Test(xnn_x32_packx_ukernel_2x__scalar);
338}
339
340TEST(X32_PACKX_2X__SCALAR, k_eq_1_subtile) {
341 for (size_t m = 1; m <= 2; m++) {
342 PackMicrokernelTester()
343 .mr(2)
344 .m(m)
345 .k(1)
346 .Test(xnn_x32_packx_ukernel_2x__scalar);
347 }
348}
349
350TEST(X32_PACKX_2X__SCALAR, k_gt_1) {
351 for (size_t k = 2; k < 10; k++) {
352 PackMicrokernelTester()
353 .mr(2)
354 .m(2)
355 .k(k)
356 .Test(xnn_x32_packx_ukernel_2x__scalar);
357 }
358}
359
360TEST(X32_PACKX_2X__SCALAR, k_gt_1_subtile) {
361 for (size_t k = 2; k < 10; k++) {
362 for (size_t m = 1; m <= 2; m++) {
363 PackMicrokernelTester()
364 .mr(2)
365 .m(m)
366 .k(k)
367 .Test(xnn_x32_packx_ukernel_2x__scalar);
368 }
369 }
370}
371
372TEST(X32_PACKX_2X__SCALAR, strided_x) {
373 for (size_t k = 1; k <= 5; k += 2) {
374 PackMicrokernelTester()
375 .mr(2)
376 .m(2)
377 .k(k)
378 .x_stride(7)
379 .Test(xnn_x32_packx_ukernel_2x__scalar);
380 }
381}
382
383TEST(X32_PACKX_3X__SCALAR, k_eq_1) {
384 PackMicrokernelTester()
385 .mr(3)
386 .m(3)
387 .k(1)
388 .Test(xnn_x32_packx_ukernel_3x__scalar);
389}
390
391TEST(X32_PACKX_3X__SCALAR, k_eq_1_subtile) {
392 for (size_t m = 1; m <= 3; m++) {
393 PackMicrokernelTester()
394 .mr(3)
395 .m(m)
396 .k(1)
397 .Test(xnn_x32_packx_ukernel_3x__scalar);
398 }
399}
400
401TEST(X32_PACKX_3X__SCALAR, k_gt_1) {
402 for (size_t k = 2; k < 10; k++) {
403 PackMicrokernelTester()
404 .mr(3)
405 .m(3)
406 .k(k)
407 .Test(xnn_x32_packx_ukernel_3x__scalar);
408 }
409}
410
411TEST(X32_PACKX_3X__SCALAR, k_gt_1_subtile) {
412 for (size_t k = 2; k < 10; k++) {
413 for (size_t m = 1; m <= 3; m++) {
414 PackMicrokernelTester()
415 .mr(3)
416 .m(m)
417 .k(k)
418 .Test(xnn_x32_packx_ukernel_3x__scalar);
419 }
420 }
421}
422
423TEST(X32_PACKX_3X__SCALAR, strided_x) {
424 for (size_t k = 1; k <= 5; k += 2) {
425 PackMicrokernelTester()
426 .mr(3)
427 .m(3)
428 .k(k)
429 .x_stride(7)
430 .Test(xnn_x32_packx_ukernel_3x__scalar);
431 }
432}
433
434TEST(X32_PACKX_4X__SCALAR, k_eq_1) {
435 PackMicrokernelTester()
436 .mr(4)
437 .m(4)
438 .k(1)
439 .Test(xnn_x32_packx_ukernel_4x__scalar);
440}
441
442TEST(X32_PACKX_4X__SCALAR, k_eq_1_subtile) {
443 for (size_t m = 1; m <= 4; m++) {
444 PackMicrokernelTester()
445 .mr(4)
446 .m(m)
447 .k(1)
448 .Test(xnn_x32_packx_ukernel_4x__scalar);
449 }
450}
451
452TEST(X32_PACKX_4X__SCALAR, k_gt_1) {
453 for (size_t k = 2; k < 10; k++) {
454 PackMicrokernelTester()
455 .mr(4)
456 .m(4)
457 .k(k)
458 .Test(xnn_x32_packx_ukernel_4x__scalar);
459 }
460}
461
462TEST(X32_PACKX_4X__SCALAR, k_gt_1_subtile) {
463 for (size_t k = 2; k < 10; k++) {
464 for (size_t m = 1; m <= 4; m++) {
465 PackMicrokernelTester()
466 .mr(4)
467 .m(m)
468 .k(k)
469 .Test(xnn_x32_packx_ukernel_4x__scalar);
470 }
471 }
472}
473
474TEST(X32_PACKX_4X__SCALAR, strided_x) {
475 for (size_t k = 1; k <= 5; k += 2) {
476 PackMicrokernelTester()
477 .mr(4)
478 .m(4)
479 .k(k)
480 .x_stride(7)
481 .Test(xnn_x32_packx_ukernel_4x__scalar);
482 }
483}