blob: c92ee350ec359fc48726f3f69303765d266ee309 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
XNNPACK Teamb455b122019-09-27 18:10:33 -07009#include <gtest/gtest.h>
10
Marat Dukhan1dadbf72019-10-01 10:46:20 -070011#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070013
Marat Dukhan1dadbf72019-10-01 10:46:20 -070014#include <xnnpack/zip.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070015#include "zip-microkernel-tester.h"
16
17
Marat Dukhan1dadbf72019-10-01 10:46:20 -070018#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070019 TEST(X8_ZIP_X2__NEON, n_eq_8) {
20 TEST_REQUIRES_ARM_NEON;
21 ZipMicrokernelTester()
22 .n(8)
23 .g(2)
24 .Test(xnn_x8_zip_x2_ukernel__neon);
25 }
26
27 TEST(X8_ZIP_X2__NEON, n_div_16) {
28 TEST_REQUIRES_ARM_NEON;
29 for (size_t n = 8; n < 128; n += 8) {
30 ZipMicrokernelTester()
31 .n(n)
32 .g(2)
33 .Test(xnn_x8_zip_x2_ukernel__neon);
34 }
35 }
36
37 TEST(X8_ZIP_X2__NEON, n_gt_8) {
38 TEST_REQUIRES_ARM_NEON;
39 for (size_t n = 9; n < 16; n++) {
40 ZipMicrokernelTester()
41 .n(n)
42 .g(2)
43 .Test(xnn_x8_zip_x2_ukernel__neon);
44 }
45 }
46
47 TEST(X8_ZIP_X2__NEON, n_lt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t n = 1; n < 8; n++) {
50 ZipMicrokernelTester()
51 .n(n)
52 .g(2)
53 .Test(xnn_x8_zip_x2_ukernel__neon);
54 }
55 }
56
57 TEST(X8_ZIP_X3__NEON, n_eq_8) {
58 TEST_REQUIRES_ARM_NEON;
59 ZipMicrokernelTester()
60 .n(9)
61 .g(3)
62 .Test(xnn_x8_zip_x3_ukernel__neon);
63 }
64
65 TEST(X8_ZIP_X3__NEON, n_div_8) {
66 TEST_REQUIRES_ARM_NEON;
67 for (size_t n = 8; n < 128; n += 8) {
68 ZipMicrokernelTester()
69 .n(n)
70 .g(3)
71 .Test(xnn_x8_zip_x3_ukernel__neon);
72 }
73 }
74
75 TEST(X8_ZIP_X3__NEON, n_gt_8) {
76 TEST_REQUIRES_ARM_NEON;
77 for (size_t n = 9; n < 16; n++) {
78 ZipMicrokernelTester()
79 .n(n)
80 .g(3)
81 .Test(xnn_x8_zip_x3_ukernel__neon);
82 }
83 }
84
85 TEST(X8_ZIP_X3__NEON, n_lt_8) {
86 TEST_REQUIRES_ARM_NEON;
87 for (size_t n = 1; n < 8; n++) {
88 ZipMicrokernelTester()
89 .n(n)
90 .g(3)
91 .Test(xnn_x8_zip_x3_ukernel__neon);
92 }
93 }
94
95 TEST(X8_ZIP_X4__NEON, n_eq_8) {
96 TEST_REQUIRES_ARM_NEON;
97 ZipMicrokernelTester()
98 .n(8)
99 .g(4)
100 .Test(xnn_x8_zip_x4_ukernel__neon);
101 }
102
103 TEST(X8_ZIP_X4__NEON, n_div_8) {
104 TEST_REQUIRES_ARM_NEON;
105 for (size_t n = 8; n < 128; n += 8) {
106 ZipMicrokernelTester()
107 .n(n)
108 .g(4)
109 .Test(xnn_x8_zip_x4_ukernel__neon);
110 }
111 }
112
113 TEST(X8_ZIP_X4__NEON, n_gt_8) {
114 TEST_REQUIRES_ARM_NEON;
115 for (size_t n = 9; n < 16; n++) {
116 ZipMicrokernelTester()
117 .n(n)
118 .g(4)
119 .Test(xnn_x8_zip_x4_ukernel__neon);
120 }
121 }
122
123 TEST(X8_ZIP_X4__NEON, n_lt_16) {
124 TEST_REQUIRES_ARM_NEON;
125 for (size_t n = 1; n < 16; n++) {
126 ZipMicrokernelTester()
127 .n(n)
128 .g(4)
129 .Test(xnn_x8_zip_x4_ukernel__neon);
130 }
131 }
132
133 TEST(X8_ZIP_XM__NEON, n_eq_8_m_eq_4) {
134 TEST_REQUIRES_ARM_NEON;
135 ZipMicrokernelTester()
136 .n(8)
137 .g(4)
138 .Test(xnn_x8_zip_xm_ukernel__neon);
139 }
140
141 TEST(X8_ZIP_XM__NEON, n_eq_8_m_div_4) {
142 TEST_REQUIRES_ARM_NEON;
143 for (size_t g = 4; g < 32; g += 4) {
144 ZipMicrokernelTester()
145 .n(8)
146 .g(g)
147 .Test(xnn_x8_zip_xm_ukernel__neon);
148 }
149 }
150
151 TEST(X8_ZIP_XM__NEON, n_eq_8_m_gt_4) {
152 TEST_REQUIRES_ARM_NEON;
153 for (size_t g = 5; g < 8; g++) {
154 ZipMicrokernelTester()
155 .n(8)
156 .g(g)
157 .Test(xnn_x8_zip_xm_ukernel__neon);
158 }
159 }
160
161 TEST(X8_ZIP_XM__NEON, n_div_8_m_eq_4) {
162 TEST_REQUIRES_ARM_NEON;
163 for (size_t n = 8; n < 128; n += 8) {
164 ZipMicrokernelTester()
165 .n(n)
166 .g(4)
167 .Test(xnn_x8_zip_xm_ukernel__neon);
168 }
169 }
170
171 TEST(X8_ZIP_XM__NEON, n_div_8_m_div_4) {
172 TEST_REQUIRES_ARM_NEON;
173 for (size_t n = 8; n < 128; n += 8) {
174 for (size_t g = 4; g < 32; g += 4) {
175 ZipMicrokernelTester()
176 .n(n)
177 .g(g)
178 .Test(xnn_x8_zip_xm_ukernel__neon);
179 }
180 }
181 }
182
183 TEST(X8_ZIP_XM__NEON, n_div_8_m_gt_4) {
184 TEST_REQUIRES_ARM_NEON;
185 for (size_t n = 8; n < 128; n += 8) {
186 for (size_t g = 5; g < 8; g++) {
187 ZipMicrokernelTester()
188 .n(n)
189 .g(g)
190 .Test(xnn_x8_zip_xm_ukernel__neon);
191 }
192 }
193 }
194
195 TEST(X8_ZIP_XM__NEON, n_gt_8_m_eq_4) {
196 TEST_REQUIRES_ARM_NEON;
197 for (size_t n = 9; n < 16; n++) {
198 ZipMicrokernelTester()
199 .n(n)
200 .g(4)
201 .Test(xnn_x8_zip_xm_ukernel__neon);
202 }
203 }
204
205 TEST(X8_ZIP_XM__NEON, n_gt_8_m_div_4) {
206 TEST_REQUIRES_ARM_NEON;
207 for (size_t n = 9; n < 16; n++) {
208 for (size_t g = 4; g < 32; g += 4) {
209 ZipMicrokernelTester()
210 .n(n)
211 .g(g)
212 .Test(xnn_x8_zip_xm_ukernel__neon);
213 }
214 }
215 }
216
217 TEST(X8_ZIP_XM__NEON, n_gt_8_m_gt_4) {
218 TEST_REQUIRES_ARM_NEON;
219 for (size_t n = 9; n < 16; n++) {
220 for (size_t g = 5; g < 8; g++) {
221 ZipMicrokernelTester()
222 .n(n)
223 .g(g)
224 .Test(xnn_x8_zip_xm_ukernel__neon);
225 }
226 }
227 }
228
229 TEST(X8_ZIP_XM__NEON, n_lt_8) {
230 TEST_REQUIRES_ARM_NEON;
231 for (size_t n = 1; n < 8; n++) {
232 for (size_t g = 4; g < 12; g++) {
233 ZipMicrokernelTester()
234 .n(n)
235 .g(g)
236 .Test(xnn_x8_zip_xm_ukernel__neon);
237 }
238 }
239 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700240#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700241
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700242#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700243 TEST(X8_ZIP_X2__SSE2, n_eq_16) {
244 TEST_REQUIRES_X86_SSE2;
245 ZipMicrokernelTester()
246 .n(16)
247 .g(2)
248 .Test(xnn_x8_zip_x2_ukernel__sse2);
249 }
250
251 TEST(X8_ZIP_X2__SSE2, n_div_16) {
252 TEST_REQUIRES_X86_SSE2;
253 for (size_t n = 16; n < 256; n += 16) {
254 ZipMicrokernelTester()
255 .n(n)
256 .g(2)
257 .Test(xnn_x8_zip_x2_ukernel__sse2);
258 }
259 }
260
261 TEST(X8_ZIP_X2__SSE2, n_gt_16) {
262 TEST_REQUIRES_X86_SSE2;
263 for (size_t n = 17; n < 32; n++) {
264 ZipMicrokernelTester()
265 .n(n)
266 .g(2)
267 .Test(xnn_x8_zip_x2_ukernel__sse2);
268 }
269 }
270
271 TEST(X8_ZIP_X2__SSE2, n_lt_16) {
272 TEST_REQUIRES_X86_SSE2;
273 for (size_t n = 1; n < 16; n++) {
274 ZipMicrokernelTester()
275 .n(n)
276 .g(2)
277 .Test(xnn_x8_zip_x2_ukernel__sse2);
278 }
279 }
280
281 TEST(X8_ZIP_X3__SSE2, n_eq_16) {
282 TEST_REQUIRES_X86_SSE2;
283 ZipMicrokernelTester()
284 .n(16)
285 .g(3)
286 .Test(xnn_x8_zip_x3_ukernel__sse2);
287 }
288
289 TEST(X8_ZIP_X3__SSE2, n_div_16) {
290 TEST_REQUIRES_X86_SSE2;
291 for (size_t n = 16; n < 256; n += 16) {
292 ZipMicrokernelTester()
293 .n(n)
294 .g(3)
295 .Test(xnn_x8_zip_x3_ukernel__sse2);
296 }
297 }
298
299 TEST(X8_ZIP_X3__SSE2, n_gt_16) {
300 TEST_REQUIRES_X86_SSE2;
301 for (size_t n = 17; n < 32; n++) {
302 ZipMicrokernelTester()
303 .n(n)
304 .g(3)
305 .Test(xnn_x8_zip_x3_ukernel__sse2);
306 }
307 }
308
309 TEST(X8_ZIP_X3__SSE2, n_lt_16) {
310 TEST_REQUIRES_X86_SSE2;
311 for (size_t n = 1; n < 16; n++) {
312 ZipMicrokernelTester()
313 .n(n)
314 .g(3)
315 .Test(xnn_x8_zip_x3_ukernel__sse2);
316 }
317 }
318
319 TEST(X8_ZIP_X4__SSE2, n_eq_16) {
320 TEST_REQUIRES_X86_SSE2;
321 ZipMicrokernelTester()
322 .n(16)
323 .g(4)
324 .Test(xnn_x8_zip_x4_ukernel__sse2);
325 }
326
327 TEST(X8_ZIP_X4__SSE2, n_div_16) {
328 TEST_REQUIRES_X86_SSE2;
329 for (size_t n = 16; n < 256; n += 16) {
330 ZipMicrokernelTester()
331 .n(n)
332 .g(4)
333 .Test(xnn_x8_zip_x4_ukernel__sse2);
334 }
335 }
336
337 TEST(X8_ZIP_X4__SSE2, n_gt_16) {
338 TEST_REQUIRES_X86_SSE2;
339 for (size_t n = 17; n < 32; n++) {
340 ZipMicrokernelTester()
341 .n(n)
342 .g(4)
343 .Test(xnn_x8_zip_x4_ukernel__sse2);
344 }
345 }
346
347 TEST(X8_ZIP_X4__SSE2, n_lt_16) {
348 TEST_REQUIRES_X86_SSE2;
349 for (size_t n = 1; n < 16; n++) {
350 ZipMicrokernelTester()
351 .n(n)
352 .g(4)
353 .Test(xnn_x8_zip_x4_ukernel__sse2);
354 }
355 }
356
357 TEST(X8_ZIP_XM__SSE2, n_eq_8_m_eq_4) {
358 TEST_REQUIRES_X86_SSE2;
359 ZipMicrokernelTester()
360 .n(8)
361 .g(4)
362 .Test(xnn_x8_zip_xm_ukernel__sse2);
363 }
364
365 TEST(X8_ZIP_XM__SSE2, n_eq_8_m_div_4) {
366 TEST_REQUIRES_X86_SSE2;
367 for (size_t g = 4; g < 32; g += 4) {
368 ZipMicrokernelTester()
369 .n(8)
370 .g(g)
371 .Test(xnn_x8_zip_xm_ukernel__sse2);
372 }
373 }
374
375 TEST(X8_ZIP_XM__SSE2, n_eq_8_m_gt_4) {
376 TEST_REQUIRES_X86_SSE2;
377 for (size_t g = 5; g < 8; g++) {
378 ZipMicrokernelTester()
379 .n(8)
380 .g(g)
381 .Test(xnn_x8_zip_xm_ukernel__sse2);
382 }
383 }
384
385 TEST(X8_ZIP_XM__SSE2, n_eq_16_m_eq_4) {
386 TEST_REQUIRES_X86_SSE2;
387 ZipMicrokernelTester()
388 .n(16)
389 .g(4)
390 .Test(xnn_x8_zip_xm_ukernel__sse2);
391 }
392
393 TEST(X8_ZIP_XM__SSE2, n_eq_16_m_div_4) {
394 TEST_REQUIRES_X86_SSE2;
395 for (size_t g = 4; g < 32; g += 4) {
396 ZipMicrokernelTester()
397 .n(16)
398 .g(g)
399 .Test(xnn_x8_zip_xm_ukernel__sse2);
400 }
401 }
402
403 TEST(X8_ZIP_XM__SSE2, n_eq_16_m_gt_4) {
404 TEST_REQUIRES_X86_SSE2;
405 for (size_t g = 5; g < 8; g++) {
406 ZipMicrokernelTester()
407 .n(16)
408 .g(g)
409 .Test(xnn_x8_zip_xm_ukernel__sse2);
410 }
411 }
412
413 TEST(X8_ZIP_XM__SSE2, n_div_16_m_eq_4) {
414 TEST_REQUIRES_X86_SSE2;
415 for (size_t n = 16; n < 256; n += 16) {
416 ZipMicrokernelTester()
417 .n(n)
418 .g(4)
419 .Test(xnn_x8_zip_xm_ukernel__sse2);
420 }
421 }
422
423 TEST(X8_ZIP_XM__SSE2, n_div_16_m_div_4) {
424 TEST_REQUIRES_X86_SSE2;
425 for (size_t n = 16; n < 256; n += 16) {
426 for (size_t g = 4; g < 32; g += 4) {
427 ZipMicrokernelTester()
428 .n(n)
429 .g(g)
430 .Test(xnn_x8_zip_xm_ukernel__sse2);
431 }
432 }
433 }
434
435 TEST(X8_ZIP_XM__SSE2, n_div_16_m_gt_4) {
436 TEST_REQUIRES_X86_SSE2;
437 for (size_t n = 16; n < 256; n += 16) {
438 for (size_t g = 5; g < 8; g++) {
439 ZipMicrokernelTester()
440 .n(n)
441 .g(g)
442 .Test(xnn_x8_zip_xm_ukernel__sse2);
443 }
444 }
445 }
446
447 TEST(X8_ZIP_XM__SSE2, n_gt_16_m_eq_4) {
448 TEST_REQUIRES_X86_SSE2;
449 for (size_t n = 17; n < 32; n++) {
450 ZipMicrokernelTester()
451 .n(n)
452 .g(4)
453 .Test(xnn_x8_zip_xm_ukernel__sse2);
454 }
455 }
456
457 TEST(X8_ZIP_XM__SSE2, n_gt_16_m_div_4) {
458 TEST_REQUIRES_X86_SSE2;
459 for (size_t n = 17; n < 32; n++) {
460 for (size_t g = 4; g < 32; g += 4) {
461 ZipMicrokernelTester()
462 .n(n)
463 .g(g)
464 .Test(xnn_x8_zip_xm_ukernel__sse2);
465 }
466 }
467 }
468
469 TEST(X8_ZIP_XM__SSE2, n_gt_16_m_gt_4) {
470 TEST_REQUIRES_X86_SSE2;
471 for (size_t n = 17; n < 32; n++) {
472 for (size_t g = 5; g < 8; g++) {
473 ZipMicrokernelTester()
474 .n(n)
475 .g(g)
476 .Test(xnn_x8_zip_xm_ukernel__sse2);
477 }
478 }
479 }
480
481 TEST(X8_ZIP_XM__SSE2, n_lt_16) {
482 TEST_REQUIRES_X86_SSE2;
483 for (size_t n = 1; n < 16; n++) {
484 for (size_t g = 4; g < 12; g++) {
485 ZipMicrokernelTester()
486 .n(n)
487 .g(g)
488 .Test(xnn_x8_zip_xm_ukernel__sse2);
489 }
490 }
491 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700492#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700493
494TEST(X8_ZIP_X2__SCALAR, n_eq_1) {
495 ZipMicrokernelTester()
496 .n(1)
497 .g(2)
498 .Test(xnn_x8_zip_x2_ukernel__scalar);
499}
500
501TEST(X8_ZIP_X2__SCALAR, n_gt_1) {
502 for (size_t n = 2; n < 8; n++) {
503 ZipMicrokernelTester()
504 .n(n)
505 .g(2)
506 .Test(xnn_x8_zip_x2_ukernel__scalar);
507 }
508}
509
510TEST(X8_ZIP_X3__SCALAR, n_eq_1) {
511 ZipMicrokernelTester()
512 .n(9)
513 .g(3)
514 .Test(xnn_x8_zip_x3_ukernel__scalar);
515}
516
517TEST(X8_ZIP_X3__SCALAR, n_gt_1) {
518 for (size_t n = 2; n < 8; n++) {
519 ZipMicrokernelTester()
520 .n(n)
521 .g(3)
522 .Test(xnn_x8_zip_x3_ukernel__scalar);
523 }
524}
525
526TEST(X8_ZIP_X4__SCALAR, n_eq_1) {
527 ZipMicrokernelTester()
528 .n(1)
529 .g(4)
530 .Test(xnn_x8_zip_x4_ukernel__scalar);
531}
532
533TEST(X8_ZIP_X4__SCALAR, n_gt_1) {
534 for (size_t n = 2; n < 8; n++) {
535 ZipMicrokernelTester()
536 .n(n)
537 .g(4)
538 .Test(xnn_x8_zip_x4_ukernel__scalar);
539 }
540}
541
542TEST(X8_ZIP_XM__SCALAR, n_eq_1_m_eq_4) {
543 ZipMicrokernelTester()
544 .n(1)
545 .g(4)
546 .Test(xnn_x8_zip_xm_ukernel__scalar);
547}
548
549TEST(X8_ZIP_XM__SCALAR, n_eq_1_m_div_4) {
550 for (size_t g = 4; g < 32; g += 4) {
551 ZipMicrokernelTester()
552 .n(1)
553 .g(g)
554 .Test(xnn_x8_zip_xm_ukernel__scalar);
555 }
556}
557
558TEST(X8_ZIP_XM__SCALAR, n_eq_1_m_gt_4) {
559 for (size_t g = 5; g < 8; g++) {
560 ZipMicrokernelTester()
561 .n(1)
562 .g(g)
563 .Test(xnn_x8_zip_xm_ukernel__scalar);
564 }
565}
566
567TEST(X8_ZIP_XM__SCALAR, n_gt_1_m_eq_4) {
568 for (size_t n = 2; n < 8; n++) {
569 ZipMicrokernelTester()
570 .n(n)
571 .g(4)
572 .Test(xnn_x8_zip_xm_ukernel__scalar);
573 }
574}
575
576TEST(X8_ZIP_XM__SCALAR, n_gt_1_m_div_4) {
577 for (size_t n = 2; n < 8; n++) {
578 for (size_t g = 4; g < 32; g += 4) {
579 ZipMicrokernelTester()
580 .n(n)
581 .g(g)
582 .Test(xnn_x8_zip_xm_ukernel__scalar);
583 }
584 }
585}
586
587TEST(X8_ZIP_XM__SCALAR, n_gt_1_m_gt_4) {
588 for (size_t n = 2; n < 8; n++) {
589 for (size_t g = 5; g < 8; g++) {
590 ZipMicrokernelTester()
591 .n(n)
592 .g(g)
593 .Test(xnn_x8_zip_xm_ukernel__scalar);
594 }
595 }
596}