blob: b5832dced9d95888f11ba5a564a7f011b0034d2a [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
XNNPACK Teamb455b122019-09-27 18:10:33 -07009#include <gtest/gtest.h>
10
Marat Dukhan1dadbf72019-10-01 10:46:20 -070011#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070013
Marat Dukhan1dadbf72019-10-01 10:46:20 -070014#include <xnnpack/maxpool.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070015#include "maxpool-microkernel-tester.h"
16
17
Marat Dukhan1dadbf72019-10-01 10:46:20 -070018#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070019 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_unipass_fulltile) {
20 TEST_REQUIRES_ARM_NEON;
21 auto tester = MaxPoolMicrokernelTester()
22 .mr(9)
23 .qr(8)
24 .kc(16);
25 for (size_t kh = 1; kh <= tester.mr(); kh++) {
26 for (size_t kw = 1; kw <= tester.mr(); kw++) {
27 if (kh * kw == tester.mr()) {
28 tester
29 .kh(kh)
30 .kw(kw)
31 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
32 }
33 }
34 }
35 }
36
37 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_unipass_fulltile_with_qmin) {
38 TEST_REQUIRES_ARM_NEON;
39 auto tester = MaxPoolMicrokernelTester()
40 .mr(9)
41 .qr(8)
42 .kc(16);
43 for (size_t kh = 1; kh <= tester.mr(); kh++) {
44 for (size_t kw = 1; kw <= tester.mr(); kw++) {
45 if (kh * kw == tester.mr()) {
46 tester
47 .kh(kh)
48 .kw(kw)
49 .qmin(192)
50 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
51 }
52 }
53 }
54 }
55
56 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_unipass_fulltile_with_qmax) {
57 TEST_REQUIRES_ARM_NEON;
58 auto tester = MaxPoolMicrokernelTester()
59 .mr(9)
60 .qr(8)
61 .kc(16);
62 for (size_t kh = 1; kh <= tester.mr(); kh++) {
63 for (size_t kw = 1; kw <= tester.mr(); kw++) {
64 if (kh * kw == tester.mr()) {
65 tester
66 .kh(kh)
67 .kw(kw)
68 .qmax(192)
69 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
70 }
71 }
72 }
73 }
74
75 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_unipass_subtile) {
76 TEST_REQUIRES_ARM_NEON;
77 auto tester = MaxPoolMicrokernelTester()
78 .mr(9)
79 .qr(8)
80 .kc(16);
81 for (size_t ks = 2; ks < tester.mr(); ks++) {
82 tester
83 .kh(ks)
84 .kw(1)
85 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
86 tester
87 .kh(1)
88 .kw(ks)
89 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
90 }
91 }
92
93 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_unipass_fulltile) {
94 TEST_REQUIRES_ARM_NEON;
95 auto tester = MaxPoolMicrokernelTester()
96 .mr(9)
97 .qr(8);
98 for (size_t kh = 1; kh <= tester.mr(); kh++) {
99 for (size_t kw = 1; kw <= tester.mr(); kw++) {
100 if (kh * kw == tester.mr()) {
101 for (size_t kc = 16; kc < 256; kc += 48) {
102 tester
103 .kh(kh)
104 .kw(kw)
105 .kc(kc)
106 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
107 }
108 }
109 }
110 }
111 }
112
113 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_unipass_fulltile_with_qmin) {
114 TEST_REQUIRES_ARM_NEON;
115 auto tester = MaxPoolMicrokernelTester()
116 .mr(9)
117 .qr(8);
118 for (size_t kh = 1; kh <= tester.mr(); kh++) {
119 for (size_t kw = 1; kw <= tester.mr(); kw++) {
120 if (kh * kw == tester.mr()) {
121 for (size_t kc = 16; kc < 256; kc += 48) {
122 tester
123 .kh(kh)
124 .kw(kw)
125 .kc(kc)
126 .qmin(192)
127 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
128 }
129 }
130 }
131 }
132 }
133
134 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_unipass_fulltile_with_qmax) {
135 TEST_REQUIRES_ARM_NEON;
136 auto tester = MaxPoolMicrokernelTester()
137 .mr(9)
138 .qr(8);
139 for (size_t kh = 1; kh <= tester.mr(); kh++) {
140 for (size_t kw = 1; kw <= tester.mr(); kw++) {
141 if (kh * kw == tester.mr()) {
142 for (size_t kc = 16; kc < 256; kc += 48) {
143 tester
144 .kh(kh)
145 .kw(kw)
146 .kc(kc)
147 .qmax(192)
148 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
149 }
150 }
151 }
152 }
153 }
154
155 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_unipass_fulltile_with_x_stride) {
156 TEST_REQUIRES_ARM_NEON;
157 auto tester = MaxPoolMicrokernelTester()
158 .mr(9)
159 .qr(8)
160 .iterations(3);
161 for (size_t kh = 1; kh <= tester.mr(); kh++) {
162 for (size_t kw = 1; kw <= tester.mr(); kw++) {
163 if (kh * kw == tester.mr()) {
164 for (size_t kc = 16; kc < 256; kc += 48) {
165 tester
166 .kh(kh)
167 .kw(kw)
168 .kc(kc)
169 .x_stride(257)
170 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
171 }
172 }
173 }
174 }
175 }
176
177 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_unipass_subtile) {
178 TEST_REQUIRES_ARM_NEON;
179 auto tester = MaxPoolMicrokernelTester()
180 .mr(9)
181 .qr(8)
182 .iterations(3);
183 for (size_t ks = 2; ks < tester.mr(); ks++) {
184 for (size_t kc = 16; kc < 256; kc += 48) {
185 tester
186 .kh(ks)
187 .kw(1)
188 .kc(kc)
189 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
190 tester
191 .kh(1)
192 .kw(ks)
193 .kc(kc)
194 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
195 }
196 }
197 }
198
199 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_unipass_fulltile) {
200 TEST_REQUIRES_ARM_NEON;
201 auto tester = MaxPoolMicrokernelTester()
202 .mr(9)
203 .qr(8);
204 for (size_t kh = 1; kh <= tester.mr(); kh++) {
205 for (size_t kw = 1; kw <= tester.mr(); kw++) {
206 if (kh * kw == tester.mr()) {
207 for (size_t kc = 1; kc < 16; kc++) {
208 tester
209 .kh(kh)
210 .kw(kw)
211 .kc(kc)
212 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
213 }
214 }
215 }
216 }
217 }
218
219 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_unipass_fulltile_with_qmin) {
220 TEST_REQUIRES_ARM_NEON;
221 auto tester = MaxPoolMicrokernelTester()
222 .mr(9)
223 .qr(8);
224 for (size_t kh = 1; kh <= tester.mr(); kh++) {
225 for (size_t kw = 1; kw <= tester.mr(); kw++) {
226 if (kh * kw == tester.mr()) {
227 for (size_t kc = 1; kc < 16; kc++) {
228 tester
229 .kh(kh)
230 .kw(kw)
231 .kc(kc)
232 .qmin(192)
233 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
234 }
235 }
236 }
237 }
238 }
239
240 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_unipass_fulltile_with_qmax) {
241 TEST_REQUIRES_ARM_NEON;
242 auto tester = MaxPoolMicrokernelTester()
243 .mr(9)
244 .qr(8);
245 for (size_t kh = 1; kh <= tester.mr(); kh++) {
246 for (size_t kw = 1; kw <= tester.mr(); kw++) {
247 if (kh * kw == tester.mr()) {
248 for (size_t kc = 1; kc < 16; kc++) {
249 tester
250 .kh(kh)
251 .kw(kw)
252 .kc(kc)
253 .qmax(192)
254 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
255 }
256 }
257 }
258 }
259 }
260
261 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_unipass_fulltile_with_x_stride) {
262 TEST_REQUIRES_ARM_NEON;
263 auto tester = MaxPoolMicrokernelTester()
264 .mr(9)
265 .qr(8)
266 .iterations(3);
267 for (size_t kh = 1; kh <= tester.mr(); kh++) {
268 for (size_t kw = 1; kw <= tester.mr(); kw++) {
269 if (kh * kw == tester.mr()) {
270 for (size_t kc = 1; kc < 16; kc++) {
271 tester
272 .kh(kh)
273 .kw(kw)
274 .kc(kc)
275 .x_stride(257)
276 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
277 }
278 }
279 }
280 }
281 }
282
283 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_unipass_subtile) {
284 TEST_REQUIRES_ARM_NEON;
285 auto tester = MaxPoolMicrokernelTester()
286 .mr(9)
287 .qr(8)
288 .iterations(3);
289 for (size_t ks = 2; ks < tester.mr(); ks++) {
290 for (size_t kc = 1; kc < 16; kc++) {
291 tester
292 .kh(ks)
293 .kw(1)
294 .kc(kc)
295 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
296 tester
297 .kh(1)
298 .kw(ks)
299 .kc(kc)
300 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
301 }
302 }
303 }
304
305 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_unipass_fulltile) {
306 TEST_REQUIRES_ARM_NEON;
307 auto tester = MaxPoolMicrokernelTester()
308 .mr(9)
309 .qr(8);
310 for (size_t kh = 1; kh <= tester.mr(); kh++) {
311 for (size_t kw = 1; kw <= tester.mr(); kw++) {
312 if (kh * kw == tester.mr()) {
313 for (size_t kc = 17; kc < 32; kc++) {
314 tester
315 .kh(kh)
316 .kw(kw)
317 .kc(kc)
318 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
319 }
320 }
321 }
322 }
323 }
324
325 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_unipass_fulltile_with_qmin) {
326 TEST_REQUIRES_ARM_NEON;
327 auto tester = MaxPoolMicrokernelTester()
328 .mr(9)
329 .qr(8);
330 for (size_t kh = 1; kh <= tester.mr(); kh++) {
331 for (size_t kw = 1; kw <= tester.mr(); kw++) {
332 if (kh * kw == tester.mr()) {
333 for (size_t kc = 17; kc < 32; kc++) {
334 tester
335 .kh(kh)
336 .kw(kw)
337 .kc(kc)
338 .qmin(192)
339 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
340 }
341 }
342 }
343 }
344 }
345
346 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_unipass_fulltile_with_qmax) {
347 TEST_REQUIRES_ARM_NEON;
348 auto tester = MaxPoolMicrokernelTester()
349 .mr(9)
350 .qr(8);
351 for (size_t kh = 1; kh <= tester.mr(); kh++) {
352 for (size_t kw = 1; kw <= tester.mr(); kw++) {
353 if (kh * kw == tester.mr()) {
354 for (size_t kc = 17; kc < 32; kc++) {
355 tester
356 .kh(kh)
357 .kw(kw)
358 .kc(kc)
359 .qmax(192)
360 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
361 }
362 }
363 }
364 }
365 }
366
367 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_unipass_fulltile_with_x_stride) {
368 TEST_REQUIRES_ARM_NEON;
369 auto tester = MaxPoolMicrokernelTester()
370 .mr(9)
371 .qr(8)
372 .iterations(3);
373 for (size_t kh = 1; kh <= tester.mr(); kh++) {
374 for (size_t kw = 1; kw <= tester.mr(); kw++) {
375 if (kh * kw == tester.mr()) {
376 for (size_t kc = 17; kc < 32; kc++) {
377 tester
378 .kh(kh)
379 .kw(kw)
380 .kc(kc)
381 .x_stride(257)
382 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
383 }
384 }
385 }
386 }
387 }
388
389 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_unipass_subtile) {
390 TEST_REQUIRES_ARM_NEON;
391 auto tester = MaxPoolMicrokernelTester()
392 .mr(9)
393 .qr(8)
394 .iterations(3);
395 for (size_t ks = 2; ks < tester.mr(); ks++) {
396 for (size_t kc = 17; kc < 32; kc++) {
397 tester
398 .kh(ks)
399 .kw(1)
400 .kc(kc)
401 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
402 tester
403 .kh(1)
404 .kw(ks)
405 .kc(kc)
406 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
407 }
408 }
409 }
410
411 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_twopass_fulltile) {
412 TEST_REQUIRES_ARM_NEON;
413 auto tester = MaxPoolMicrokernelTester()
414 .mr(9)
415 .qr(8)
416 .kc(16);
417 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
418 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
419 if (kh * kw == tester.mr() + tester.qr()) {
420 tester
421 .kh(kh)
422 .kw(kw)
423 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
424 }
425 }
426 }
427 }
428
429 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_twopass_fulltile_with_qmin) {
430 TEST_REQUIRES_ARM_NEON;
431 auto tester = MaxPoolMicrokernelTester()
432 .mr(9)
433 .qr(8)
434 .kc(16);
435 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
436 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
437 if (kh * kw == tester.mr() + tester.qr()) {
438 tester
439 .kh(kh)
440 .kw(kw)
441 .qmin(192)
442 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
443 }
444 }
445 }
446 }
447
448 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_twopass_fulltile_with_qmax) {
449 TEST_REQUIRES_ARM_NEON;
450 auto tester = MaxPoolMicrokernelTester()
451 .mr(9)
452 .qr(8)
453 .kc(16);
454 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
455 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
456 if (kh * kw == tester.mr() + tester.qr()) {
457 tester
458 .kh(kh)
459 .kw(kw)
460 .qmax(192)
461 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
462 }
463 }
464 }
465 }
466
467 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_twopass_subtile) {
468 TEST_REQUIRES_ARM_NEON;
469 auto tester = MaxPoolMicrokernelTester()
470 .mr(9)
471 .qr(8)
472 .kc(16);
473 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
474 tester
475 .kh(ks)
476 .kw(1)
477 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
478 tester
479 .kh(1)
480 .kw(ks)
481 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
482 }
483 }
484
485 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_twopass_fulltile) {
486 TEST_REQUIRES_ARM_NEON;
487 auto tester = MaxPoolMicrokernelTester()
488 .mr(9)
489 .qr(8);
490 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
491 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
492 if (kh * kw == tester.mr() + tester.qr()) {
493 for (size_t kc = 16; kc < 256; kc += 48) {
494 tester
495 .kh(kh)
496 .kw(kw)
497 .kc(kc)
498 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
499 }
500 }
501 }
502 }
503 }
504
505 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_twopass_fulltile_with_qmin) {
506 TEST_REQUIRES_ARM_NEON;
507 auto tester = MaxPoolMicrokernelTester()
508 .mr(9)
509 .qr(8);
510 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
511 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
512 if (kh * kw == tester.mr() + tester.qr()) {
513 for (size_t kc = 16; kc < 256; kc += 48) {
514 tester
515 .kh(kh)
516 .kw(kw)
517 .kc(kc)
518 .qmin(192)
519 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
520 }
521 }
522 }
523 }
524 }
525
526 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_twopass_fulltile_with_qmax) {
527 TEST_REQUIRES_ARM_NEON;
528 auto tester = MaxPoolMicrokernelTester()
529 .mr(9)
530 .qr(8);
531 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
532 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
533 if (kh * kw == tester.mr() + tester.qr()) {
534 for (size_t kc = 16; kc < 256; kc += 48) {
535 tester
536 .kh(kh)
537 .kw(kw)
538 .kc(kc)
539 .qmax(192)
540 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
541 }
542 }
543 }
544 }
545 }
546
547 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_twopass_fulltile_with_x_stride) {
548 TEST_REQUIRES_ARM_NEON;
549 auto tester = MaxPoolMicrokernelTester()
550 .mr(9)
551 .qr(8)
552 .iterations(3);
553 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
554 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
555 if (kh * kw == tester.mr() + tester.qr()) {
556 for (size_t kc = 16; kc < 256; kc += 48) {
557 tester
558 .kh(kh)
559 .kw(kw)
560 .kc(kc)
561 .x_stride(257)
562 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
563 }
564 }
565 }
566 }
567 }
568
569 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_twopass_subtile) {
570 TEST_REQUIRES_ARM_NEON;
571 auto tester = MaxPoolMicrokernelTester()
572 .mr(9)
573 .qr(8)
574 .iterations(3);
575 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
576 for (size_t kc = 16; kc < 256; kc += 48) {
577 tester
578 .kh(ks)
579 .kw(1)
580 .kc(kc)
581 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
582 tester
583 .kh(1)
584 .kw(ks)
585 .kc(kc)
586 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
587 }
588 }
589 }
590
591 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_twopass_fulltile) {
592 TEST_REQUIRES_ARM_NEON;
593 auto tester = MaxPoolMicrokernelTester()
594 .mr(9)
595 .qr(8);
596 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
597 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
598 if (kh * kw == tester.mr() + tester.qr()) {
599 for (size_t kc = 1; kc < 16; kc++) {
600 tester
601 .kh(kh)
602 .kw(kw)
603 .kc(kc)
604 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
605 }
606 }
607 }
608 }
609 }
610
611 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_twopass_fulltile_with_qmin) {
612 TEST_REQUIRES_ARM_NEON;
613 auto tester = MaxPoolMicrokernelTester()
614 .mr(9)
615 .qr(8);
616 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
617 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
618 if (kh * kw == tester.mr() + tester.qr()) {
619 for (size_t kc = 1; kc < 16; kc++) {
620 tester
621 .kh(kh)
622 .kw(kw)
623 .kc(kc)
624 .qmin(192)
625 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
626 }
627 }
628 }
629 }
630 }
631
632 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_twopass_fulltile_with_qmax) {
633 TEST_REQUIRES_ARM_NEON;
634 auto tester = MaxPoolMicrokernelTester()
635 .mr(9)
636 .qr(8);
637 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
638 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
639 if (kh * kw == tester.mr() + tester.qr()) {
640 for (size_t kc = 1; kc < 16; kc++) {
641 tester
642 .kh(kh)
643 .kw(kw)
644 .kc(kc)
645 .qmax(192)
646 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
647 }
648 }
649 }
650 }
651 }
652
653 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_twopass_fulltile_with_x_stride) {
654 TEST_REQUIRES_ARM_NEON;
655 auto tester = MaxPoolMicrokernelTester()
656 .mr(9)
657 .qr(8)
658 .iterations(3);
659 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
660 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
661 if (kh * kw == tester.mr() + tester.qr()) {
662 for (size_t kc = 1; kc < 16; kc++) {
663 tester
664 .kh(kh)
665 .kw(kw)
666 .kc(kc)
667 .x_stride(257)
668 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
669 }
670 }
671 }
672 }
673 }
674
675 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_twopass_subtile) {
676 TEST_REQUIRES_ARM_NEON;
677 auto tester = MaxPoolMicrokernelTester()
678 .mr(9)
679 .qr(8)
680 .iterations(3);
681 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
682 for (size_t kc = 1; kc < 16; kc++) {
683 tester
684 .kh(ks)
685 .kw(1)
686 .kc(kc)
687 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
688 tester
689 .kh(1)
690 .kw(ks)
691 .kc(kc)
692 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
693 }
694 }
695 }
696
697 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_twopass_fulltile) {
698 TEST_REQUIRES_ARM_NEON;
699 auto tester = MaxPoolMicrokernelTester()
700 .mr(9)
701 .qr(8);
702 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
703 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
704 if (kh * kw == tester.mr() + tester.qr()) {
705 for (size_t kc = 17; kc < 32; kc++) {
706 tester
707 .kh(kh)
708 .kw(kw)
709 .kc(kc)
710 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
711 }
712 }
713 }
714 }
715 }
716
717 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_twopass_fulltile_with_qmin) {
718 TEST_REQUIRES_ARM_NEON;
719 auto tester = MaxPoolMicrokernelTester()
720 .mr(9)
721 .qr(8);
722 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
723 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
724 if (kh * kw == tester.mr() + tester.qr()) {
725 for (size_t kc = 17; kc < 32; kc++) {
726 tester
727 .kh(kh)
728 .kw(kw)
729 .kc(kc)
730 .qmin(192)
731 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
732 }
733 }
734 }
735 }
736 }
737
738 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_twopass_fulltile_with_qmax) {
739 TEST_REQUIRES_ARM_NEON;
740 auto tester = MaxPoolMicrokernelTester()
741 .mr(9)
742 .qr(8);
743 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
744 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
745 if (kh * kw == tester.mr() + tester.qr()) {
746 for (size_t kc = 17; kc < 32; kc++) {
747 tester
748 .kh(kh)
749 .kw(kw)
750 .kc(kc)
751 .qmax(192)
752 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
753 }
754 }
755 }
756 }
757 }
758
759 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_twopass_fulltile_with_x_stride) {
760 TEST_REQUIRES_ARM_NEON;
761 auto tester = MaxPoolMicrokernelTester()
762 .mr(9)
763 .qr(8)
764 .iterations(3);
765 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
766 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
767 if (kh * kw == tester.mr() + tester.qr()) {
768 for (size_t kc = 17; kc < 32; kc++) {
769 tester
770 .kh(kh)
771 .kw(kw)
772 .kc(kc)
773 .x_stride(257)
774 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
775 }
776 }
777 }
778 }
779 }
780
781 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_twopass_subtile) {
782 TEST_REQUIRES_ARM_NEON;
783 auto tester = MaxPoolMicrokernelTester()
784 .mr(9)
785 .qr(8)
786 .iterations(3);
787 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
788 for (size_t kc = 17; kc < 32; kc++) {
789 tester
790 .kh(ks)
791 .kw(1)
792 .kc(kc)
793 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
794 tester
795 .kh(1)
796 .kw(ks)
797 .kc(kc)
798 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
799 }
800 }
801 }
802
803 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_multipass) {
804 TEST_REQUIRES_ARM_NEON;
805 auto tester = MaxPoolMicrokernelTester()
806 .mr(9)
807 .qr(8)
808 .kc(16);
809 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
810 tester
811 .kh(ks)
812 .kw(1)
813 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
814 tester
815 .kh(1)
816 .kw(ks)
817 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
818 }
819 }
820
821 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_multipass_with_qmin) {
822 TEST_REQUIRES_ARM_NEON;
823 auto tester = MaxPoolMicrokernelTester()
824 .mr(9)
825 .qr(8)
826 .kc(16);
827 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
828 tester
829 .kh(ks)
830 .kw(1)
831 .qmin(192)
832 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
833 tester
834 .kh(1)
835 .kw(ks)
836 .qmin(192)
837 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
838 }
839 }
840
841 TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_multipass_with_qmax) {
842 TEST_REQUIRES_ARM_NEON;
843 auto tester = MaxPoolMicrokernelTester()
844 .mr(9)
845 .qr(8)
846 .kc(16);
847 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
848 tester
849 .kh(ks)
850 .kw(1)
851 .qmax(192)
852 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
853 tester
854 .kh(1)
855 .kw(ks)
856 .qmax(192)
857 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
858 }
859 }
860
861 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_multipass) {
862 TEST_REQUIRES_ARM_NEON;
863 auto tester = MaxPoolMicrokernelTester()
864 .mr(9)
865 .qr(8);
866 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
867 for (size_t kc = 16; kc < 256; kc += 48) {
868 tester
869 .kh(ks)
870 .kw(1)
871 .kc(kc)
872 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
873 tester
874 .kh(1)
875 .kw(ks)
876 .kc(kc)
877 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
878 }
879 }
880 }
881
882 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_multipass_with_qmin) {
883 TEST_REQUIRES_ARM_NEON;
884 auto tester = MaxPoolMicrokernelTester()
885 .mr(9)
886 .qr(8);
887 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
888 for (size_t kc = 16; kc < 256; kc += 48) {
889 tester
890 .kh(ks)
891 .kw(1)
892 .kc(kc)
893 .qmin(192)
894 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
895 tester
896 .kh(1)
897 .kw(ks)
898 .kc(kc)
899 .qmin(192)
900 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
901 }
902 }
903 }
904
905 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_multipass_with_qmax) {
906 TEST_REQUIRES_ARM_NEON;
907 auto tester = MaxPoolMicrokernelTester()
908 .mr(9)
909 .qr(8);
910 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
911 for (size_t kc = 16; kc < 256; kc += 48) {
912 tester
913 .kh(ks)
914 .kw(1)
915 .kc(kc)
916 .qmax(192)
917 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
918 tester
919 .kh(1)
920 .kw(ks)
921 .kc(kc)
922 .qmax(192)
923 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
924 }
925 }
926 }
927
928 TEST(U8_MAXPOOL_9P8Q__NEON, kc_div_16_multipass_with_x_stride) {
929 TEST_REQUIRES_ARM_NEON;
930 auto tester = MaxPoolMicrokernelTester()
931 .mr(9)
932 .qr(8)
933 .iterations(3);
934 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
935 for (size_t kc = 16; kc < 256; kc += 48) {
936 tester
937 .kh(ks)
938 .kw(1)
939 .kc(kc)
940 .x_stride(257)
941 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
942 tester
943 .kh(1)
944 .kw(ks)
945 .kc(kc)
946 .x_stride(257)
947 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
948 }
949 }
950 }
951
952 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_multipass) {
953 TEST_REQUIRES_ARM_NEON;
954 auto tester = MaxPoolMicrokernelTester()
955 .mr(9)
956 .qr(8)
957 .iterations(3);
958 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
959 for (size_t kc = 1; kc < 16; kc++) {
960 tester
961 .kh(ks)
962 .kw(1)
963 .kc(kc)
964 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
965 tester
966 .kh(1)
967 .kw(ks)
968 .kc(kc)
969 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
970 }
971 }
972 }
973
974 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_multipass_with_qmin) {
975 TEST_REQUIRES_ARM_NEON;
976 auto tester = MaxPoolMicrokernelTester()
977 .mr(9)
978 .qr(8)
979 .iterations(3);
980 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
981 for (size_t kc = 1; kc < 16; kc++) {
982 tester
983 .kh(ks)
984 .kw(1)
985 .kc(kc)
986 .qmin(192)
987 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
988 tester
989 .kh(1)
990 .kw(ks)
991 .kc(kc)
992 .qmin(192)
993 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
994 }
995 }
996 }
997
998 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_multipass_with_qmax) {
999 TEST_REQUIRES_ARM_NEON;
1000 auto tester = MaxPoolMicrokernelTester()
1001 .mr(9)
1002 .qr(8)
1003 .iterations(3);
1004 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
1005 for (size_t kc = 1; kc < 16; kc++) {
1006 tester
1007 .kh(ks)
1008 .kw(1)
1009 .kc(kc)
1010 .qmax(192)
1011 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1012 tester
1013 .kh(1)
1014 .kw(ks)
1015 .kc(kc)
1016 .qmax(192)
1017 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1018 }
1019 }
1020 }
1021
1022 TEST(U8_MAXPOOL_9P8Q__NEON, kc_lt_16_multipass_with_x_stride) {
1023 TEST_REQUIRES_ARM_NEON;
1024 auto tester = MaxPoolMicrokernelTester()
1025 .mr(9)
1026 .qr(8)
1027 .iterations(3);
1028 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
1029 for (size_t kc = 1; kc < 16; kc++) {
1030 tester
1031 .kh(ks)
1032 .kw(1)
1033 .kc(kc)
1034 .x_stride(257)
1035 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1036 tester
1037 .kh(1)
1038 .kw(ks)
1039 .kc(kc)
1040 .x_stride(257)
1041 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1042 }
1043 }
1044 }
1045
1046 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_multipass) {
1047 TEST_REQUIRES_ARM_NEON;
1048 auto tester = MaxPoolMicrokernelTester()
1049 .mr(9)
1050 .qr(8)
1051 .iterations(3);
1052 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
1053 for (size_t kc = 17; kc < 32; kc++) {
1054 tester
1055 .kh(ks)
1056 .kw(1)
1057 .kc(kc)
1058 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1059 tester
1060 .kh(1)
1061 .kw(ks)
1062 .kc(kc)
1063 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1064 }
1065 }
1066 }
1067
1068 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_multipass_with_qmin) {
1069 TEST_REQUIRES_ARM_NEON;
1070 auto tester = MaxPoolMicrokernelTester()
1071 .mr(9)
1072 .qr(8)
1073 .iterations(3);
1074 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
1075 for (size_t kc = 17; kc < 32; kc++) {
1076 tester
1077 .kh(ks)
1078 .kw(1)
1079 .kc(kc)
1080 .qmin(192)
1081 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1082 tester
1083 .kh(1)
1084 .kw(ks)
1085 .kc(kc)
1086 .qmin(192)
1087 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1088 }
1089 }
1090 }
1091
1092 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_multipass_with_qmax) {
1093 TEST_REQUIRES_ARM_NEON;
1094 auto tester = MaxPoolMicrokernelTester()
1095 .mr(9)
1096 .qr(8)
1097 .iterations(3);
1098 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
1099 for (size_t kc = 17; kc < 32; kc++) {
1100 tester
1101 .kh(ks)
1102 .kw(1)
1103 .kc(kc)
1104 .qmax(192)
1105 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1106 tester
1107 .kh(1)
1108 .kw(ks)
1109 .kc(kc)
1110 .qmax(192)
1111 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1112 }
1113 }
1114 }
1115
1116 TEST(U8_MAXPOOL_9P8Q__NEON, kc_gt_16_multipass_with_x_stride) {
1117 TEST_REQUIRES_ARM_NEON;
1118 auto tester = MaxPoolMicrokernelTester()
1119 .mr(9)
1120 .qr(8)
1121 .iterations(3);
1122 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
1123 for (size_t kc = 17; kc < 32; kc++) {
1124 tester
1125 .kh(ks)
1126 .kw(1)
1127 .kc(kc)
1128 .x_stride(257)
1129 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1130 tester
1131 .kh(1)
1132 .kw(ks)
1133 .kc(kc)
1134 .x_stride(257)
1135 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1136 }
1137 }
1138 }
1139
1140 TEST(U8_MAXPOOL_9P8Q__NEON, small_n) {
1141 TEST_REQUIRES_ARM_NEON;
1142 for (size_t n = 2; n < 5; n++) {
1143 for (size_t ks : std::vector<size_t>{{2, 3, 5, 10}}) {
1144 for (size_t kc = 1; kc < 51; kc += 5) {
1145 MaxPoolMicrokernelTester()
1146 .mr(9)
1147 .qr(8)
1148 .n(n)
1149 .kh(ks)
1150 .kw(ks)
1151 .kc(kc)
1152 .iterations(3)
1153 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1154 }
1155 }
1156 }
1157 }
1158
1159 TEST(U8_MAXPOOL_9P8Q__NEON, small_n_with_x_stride) {
1160 TEST_REQUIRES_ARM_NEON;
1161 for (size_t n = 2; n < 5; n++) {
1162 for (size_t ks : std::vector<size_t>{{2, 3, 5, 10}}) {
1163 for (size_t kc = 1; kc < 51; kc += 5) {
1164 MaxPoolMicrokernelTester()
1165 .mr(9)
1166 .qr(8)
1167 .n(n)
1168 .kh(ks)
1169 .kw(ks)
1170 .kc(kc)
1171 .x_stride(101)
1172 .iterations(1)
1173 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1174 }
1175 }
1176 }
1177 }
1178
1179 TEST(U8_MAXPOOL_9P8Q__NEON, small_n_with_y_stride) {
1180 TEST_REQUIRES_ARM_NEON;
1181 for (size_t n = 2; n < 5; n++) {
1182 for (size_t ks : std::vector<size_t>{{2, 3, 5, 10}}) {
1183 for (size_t kc = 1; kc < 51; kc += 5) {
1184 MaxPoolMicrokernelTester()
1185 .mr(9)
1186 .qr(8)
1187 .n(n)
1188 .kh(ks)
1189 .kw(ks)
1190 .kc(kc)
1191 .y_stride(103)
1192 .iterations(1)
1193 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1194 }
1195 }
1196 }
1197 }
1198
1199 TEST(U8_MAXPOOL_9P8Q__NEON, small_n_with_s) {
1200 TEST_REQUIRES_ARM_NEON;
1201 for (size_t n = 2; n < 5; n++) {
1202 for (size_t ks : std::vector<size_t>{{2, 3, 5}}) {
1203 for (size_t kc = 1; kc < 51; kc += 5) {
1204 for (size_t s = 2; s <= ks; s++) {
1205 MaxPoolMicrokernelTester()
1206 .mr(9)
1207 .qr(8)
1208 .n(n)
1209 .kh(ks)
1210 .kw(ks)
1211 .kc(kc)
1212 .s(s)
1213 .iterations(1)
1214 .Test(xnn_u8_maxpool_ukernel_9p8q__neon);
1215 }
1216 }
1217 }
1218 }
1219 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001220#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001221
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001222#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001223 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_unipass_fulltile) {
1224 TEST_REQUIRES_X86_SSE2;
1225 auto tester = MaxPoolMicrokernelTester()
1226 .mr(9)
1227 .qr(8)
1228 .kc(16);
1229 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1230 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1231 if (kh * kw == tester.mr()) {
1232 tester
1233 .kh(kh)
1234 .kw(kw)
1235 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1236 }
1237 }
1238 }
1239 }
1240
1241 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_unipass_fulltile_with_qmin) {
1242 TEST_REQUIRES_X86_SSE2;
1243 auto tester = MaxPoolMicrokernelTester()
1244 .mr(9)
1245 .qr(8)
1246 .kc(16);
1247 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1248 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1249 if (kh * kw == tester.mr()) {
1250 tester
1251 .kh(kh)
1252 .kw(kw)
1253 .qmin(192)
1254 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1255 }
1256 }
1257 }
1258 }
1259
1260 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_unipass_fulltile_with_qmax) {
1261 TEST_REQUIRES_X86_SSE2;
1262 auto tester = MaxPoolMicrokernelTester()
1263 .mr(9)
1264 .qr(8)
1265 .kc(16);
1266 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1267 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1268 if (kh * kw == tester.mr()) {
1269 tester
1270 .kh(kh)
1271 .kw(kw)
1272 .qmax(192)
1273 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1274 }
1275 }
1276 }
1277 }
1278
1279 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_unipass_subtile) {
1280 TEST_REQUIRES_X86_SSE2;
1281 auto tester = MaxPoolMicrokernelTester()
1282 .mr(9)
1283 .qr(8)
1284 .kc(16);
1285 for (size_t ks = 2; ks < tester.mr(); ks++) {
1286 tester
1287 .kh(ks)
1288 .kw(1)
1289 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1290 tester
1291 .kh(1)
1292 .kw(ks)
1293 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1294 }
1295 }
1296
1297 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_unipass_fulltile) {
1298 TEST_REQUIRES_X86_SSE2;
1299 auto tester = MaxPoolMicrokernelTester()
1300 .mr(9)
1301 .qr(8);
1302 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1303 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1304 if (kh * kw == tester.mr()) {
1305 for (size_t kc = 16; kc < 256; kc += 48) {
1306 tester
1307 .kh(kh)
1308 .kw(kw)
1309 .kc(kc)
1310 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1311 }
1312 }
1313 }
1314 }
1315 }
1316
1317 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_unipass_fulltile_with_qmin) {
1318 TEST_REQUIRES_X86_SSE2;
1319 auto tester = MaxPoolMicrokernelTester()
1320 .mr(9)
1321 .qr(8);
1322 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1323 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1324 if (kh * kw == tester.mr()) {
1325 for (size_t kc = 16; kc < 256; kc += 48) {
1326 tester
1327 .kh(kh)
1328 .kw(kw)
1329 .kc(kc)
1330 .qmin(192)
1331 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1332 }
1333 }
1334 }
1335 }
1336 }
1337
1338 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_unipass_fulltile_with_qmax) {
1339 TEST_REQUIRES_X86_SSE2;
1340 auto tester = MaxPoolMicrokernelTester()
1341 .mr(9)
1342 .qr(8);
1343 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1344 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1345 if (kh * kw == tester.mr()) {
1346 for (size_t kc = 16; kc < 256; kc += 48) {
1347 tester
1348 .kh(kh)
1349 .kw(kw)
1350 .kc(kc)
1351 .qmax(192)
1352 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1353 }
1354 }
1355 }
1356 }
1357 }
1358
1359 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_unipass_fulltile_with_x_stride) {
1360 TEST_REQUIRES_X86_SSE2;
1361 auto tester = MaxPoolMicrokernelTester()
1362 .mr(9)
1363 .qr(8)
1364 .iterations(3);
1365 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1366 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1367 if (kh * kw == tester.mr()) {
1368 for (size_t kc = 16; kc < 256; kc += 48) {
1369 tester
1370 .kh(kh)
1371 .kw(kw)
1372 .kc(kc)
1373 .x_stride(257)
1374 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1375 }
1376 }
1377 }
1378 }
1379 }
1380
1381 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_unipass_subtile) {
1382 TEST_REQUIRES_X86_SSE2;
1383 auto tester = MaxPoolMicrokernelTester()
1384 .mr(9)
1385 .qr(8)
1386 .iterations(3);
1387 for (size_t ks = 2; ks < tester.mr(); ks++) {
1388 for (size_t kc = 16; kc < 256; kc += 48) {
1389 tester
1390 .kh(ks)
1391 .kw(1)
1392 .kc(kc)
1393 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1394 tester
1395 .kh(1)
1396 .kw(ks)
1397 .kc(kc)
1398 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1399 }
1400 }
1401 }
1402
1403 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_unipass_fulltile) {
1404 TEST_REQUIRES_X86_SSE2;
1405 auto tester = MaxPoolMicrokernelTester()
1406 .mr(9)
1407 .qr(8);
1408 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1409 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1410 if (kh * kw == tester.mr()) {
1411 for (size_t kc = 1; kc < 16; kc++) {
1412 tester
1413 .kh(kh)
1414 .kw(kw)
1415 .kc(kc)
1416 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1417 }
1418 }
1419 }
1420 }
1421 }
1422
1423 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_unipass_fulltile_with_qmin) {
1424 TEST_REQUIRES_X86_SSE2;
1425 auto tester = MaxPoolMicrokernelTester()
1426 .mr(9)
1427 .qr(8);
1428 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1429 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1430 if (kh * kw == tester.mr()) {
1431 for (size_t kc = 1; kc < 16; kc++) {
1432 tester
1433 .kh(kh)
1434 .kw(kw)
1435 .kc(kc)
1436 .qmin(192)
1437 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1438 }
1439 }
1440 }
1441 }
1442 }
1443
1444 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_unipass_fulltile_with_qmax) {
1445 TEST_REQUIRES_X86_SSE2;
1446 auto tester = MaxPoolMicrokernelTester()
1447 .mr(9)
1448 .qr(8);
1449 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1450 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1451 if (kh * kw == tester.mr()) {
1452 for (size_t kc = 1; kc < 16; kc++) {
1453 tester
1454 .kh(kh)
1455 .kw(kw)
1456 .kc(kc)
1457 .qmax(192)
1458 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1459 }
1460 }
1461 }
1462 }
1463 }
1464
1465 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_unipass_fulltile_with_x_stride) {
1466 TEST_REQUIRES_X86_SSE2;
1467 auto tester = MaxPoolMicrokernelTester()
1468 .mr(9)
1469 .qr(8)
1470 .iterations(3);
1471 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1472 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1473 if (kh * kw == tester.mr()) {
1474 for (size_t kc = 1; kc < 16; kc++) {
1475 tester
1476 .kh(kh)
1477 .kw(kw)
1478 .kc(kc)
1479 .x_stride(257)
1480 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1481 }
1482 }
1483 }
1484 }
1485 }
1486
1487 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_unipass_subtile) {
1488 TEST_REQUIRES_X86_SSE2;
1489 auto tester = MaxPoolMicrokernelTester()
1490 .mr(9)
1491 .qr(8)
1492 .iterations(3);
1493 for (size_t ks = 2; ks < tester.mr(); ks++) {
1494 for (size_t kc = 1; kc < 16; kc++) {
1495 tester
1496 .kh(ks)
1497 .kw(1)
1498 .kc(kc)
1499 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1500 tester
1501 .kh(1)
1502 .kw(ks)
1503 .kc(kc)
1504 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1505 }
1506 }
1507 }
1508
1509 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_unipass_fulltile) {
1510 TEST_REQUIRES_X86_SSE2;
1511 auto tester = MaxPoolMicrokernelTester()
1512 .mr(9)
1513 .qr(8);
1514 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1515 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1516 if (kh * kw == tester.mr()) {
1517 for (size_t kc = 17; kc < 32; kc++) {
1518 tester
1519 .kh(kh)
1520 .kw(kw)
1521 .kc(kc)
1522 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1523 }
1524 }
1525 }
1526 }
1527 }
1528
1529 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_unipass_fulltile_with_qmin) {
1530 TEST_REQUIRES_X86_SSE2;
1531 auto tester = MaxPoolMicrokernelTester()
1532 .mr(9)
1533 .qr(8);
1534 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1535 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1536 if (kh * kw == tester.mr()) {
1537 for (size_t kc = 17; kc < 32; kc++) {
1538 tester
1539 .kh(kh)
1540 .kw(kw)
1541 .kc(kc)
1542 .qmin(192)
1543 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1544 }
1545 }
1546 }
1547 }
1548 }
1549
1550 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_unipass_fulltile_with_qmax) {
1551 TEST_REQUIRES_X86_SSE2;
1552 auto tester = MaxPoolMicrokernelTester()
1553 .mr(9)
1554 .qr(8);
1555 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1556 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1557 if (kh * kw == tester.mr()) {
1558 for (size_t kc = 17; kc < 32; kc++) {
1559 tester
1560 .kh(kh)
1561 .kw(kw)
1562 .kc(kc)
1563 .qmax(192)
1564 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1565 }
1566 }
1567 }
1568 }
1569 }
1570
1571 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_unipass_fulltile_with_x_stride) {
1572 TEST_REQUIRES_X86_SSE2;
1573 auto tester = MaxPoolMicrokernelTester()
1574 .mr(9)
1575 .qr(8)
1576 .iterations(3);
1577 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1578 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1579 if (kh * kw == tester.mr()) {
1580 for (size_t kc = 17; kc < 32; kc++) {
1581 tester
1582 .kh(kh)
1583 .kw(kw)
1584 .kc(kc)
1585 .x_stride(257)
1586 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1587 }
1588 }
1589 }
1590 }
1591 }
1592
1593 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_unipass_subtile) {
1594 TEST_REQUIRES_X86_SSE2;
1595 auto tester = MaxPoolMicrokernelTester()
1596 .mr(9)
1597 .qr(8)
1598 .iterations(3);
1599 for (size_t ks = 2; ks < tester.mr(); ks++) {
1600 for (size_t kc = 17; kc < 32; kc++) {
1601 tester
1602 .kh(ks)
1603 .kw(1)
1604 .kc(kc)
1605 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1606 tester
1607 .kh(1)
1608 .kw(ks)
1609 .kc(kc)
1610 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1611 }
1612 }
1613 }
1614
1615 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_twopass_fulltile) {
1616 TEST_REQUIRES_X86_SSE2;
1617 auto tester = MaxPoolMicrokernelTester()
1618 .mr(9)
1619 .qr(8)
1620 .kc(16);
1621 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1622 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1623 if (kh * kw == tester.mr() + tester.qr()) {
1624 tester
1625 .kh(kh)
1626 .kw(kw)
1627 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1628 }
1629 }
1630 }
1631 }
1632
1633 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_twopass_fulltile_with_qmin) {
1634 TEST_REQUIRES_X86_SSE2;
1635 auto tester = MaxPoolMicrokernelTester()
1636 .mr(9)
1637 .qr(8)
1638 .kc(16);
1639 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1640 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1641 if (kh * kw == tester.mr() + tester.qr()) {
1642 tester
1643 .kh(kh)
1644 .kw(kw)
1645 .qmin(192)
1646 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1647 }
1648 }
1649 }
1650 }
1651
1652 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_twopass_fulltile_with_qmax) {
1653 TEST_REQUIRES_X86_SSE2;
1654 auto tester = MaxPoolMicrokernelTester()
1655 .mr(9)
1656 .qr(8)
1657 .kc(16);
1658 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1659 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1660 if (kh * kw == tester.mr() + tester.qr()) {
1661 tester
1662 .kh(kh)
1663 .kw(kw)
1664 .qmax(192)
1665 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1666 }
1667 }
1668 }
1669 }
1670
1671 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_twopass_subtile) {
1672 TEST_REQUIRES_X86_SSE2;
1673 auto tester = MaxPoolMicrokernelTester()
1674 .mr(9)
1675 .qr(8)
1676 .kc(16);
1677 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
1678 tester
1679 .kh(ks)
1680 .kw(1)
1681 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1682 tester
1683 .kh(1)
1684 .kw(ks)
1685 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1686 }
1687 }
1688
1689 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_twopass_fulltile) {
1690 TEST_REQUIRES_X86_SSE2;
1691 auto tester = MaxPoolMicrokernelTester()
1692 .mr(9)
1693 .qr(8);
1694 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1695 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1696 if (kh * kw == tester.mr() + tester.qr()) {
1697 for (size_t kc = 16; kc < 256; kc += 48) {
1698 tester
1699 .kh(kh)
1700 .kw(kw)
1701 .kc(kc)
1702 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1703 }
1704 }
1705 }
1706 }
1707 }
1708
1709 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_twopass_fulltile_with_qmin) {
1710 TEST_REQUIRES_X86_SSE2;
1711 auto tester = MaxPoolMicrokernelTester()
1712 .mr(9)
1713 .qr(8);
1714 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1715 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1716 if (kh * kw == tester.mr() + tester.qr()) {
1717 for (size_t kc = 16; kc < 256; kc += 48) {
1718 tester
1719 .kh(kh)
1720 .kw(kw)
1721 .kc(kc)
1722 .qmin(192)
1723 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1724 }
1725 }
1726 }
1727 }
1728 }
1729
1730 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_twopass_fulltile_with_qmax) {
1731 TEST_REQUIRES_X86_SSE2;
1732 auto tester = MaxPoolMicrokernelTester()
1733 .mr(9)
1734 .qr(8);
1735 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1736 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1737 if (kh * kw == tester.mr() + tester.qr()) {
1738 for (size_t kc = 16; kc < 256; kc += 48) {
1739 tester
1740 .kh(kh)
1741 .kw(kw)
1742 .kc(kc)
1743 .qmax(192)
1744 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1745 }
1746 }
1747 }
1748 }
1749 }
1750
1751 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_twopass_fulltile_with_x_stride) {
1752 TEST_REQUIRES_X86_SSE2;
1753 auto tester = MaxPoolMicrokernelTester()
1754 .mr(9)
1755 .qr(8)
1756 .iterations(3);
1757 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1758 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1759 if (kh * kw == tester.mr() + tester.qr()) {
1760 for (size_t kc = 16; kc < 256; kc += 48) {
1761 tester
1762 .kh(kh)
1763 .kw(kw)
1764 .kc(kc)
1765 .x_stride(257)
1766 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1767 }
1768 }
1769 }
1770 }
1771 }
1772
1773 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_twopass_subtile) {
1774 TEST_REQUIRES_X86_SSE2;
1775 auto tester = MaxPoolMicrokernelTester()
1776 .mr(9)
1777 .qr(8)
1778 .iterations(3);
1779 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
1780 for (size_t kc = 16; kc < 256; kc += 48) {
1781 tester
1782 .kh(ks)
1783 .kw(1)
1784 .kc(kc)
1785 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1786 tester
1787 .kh(1)
1788 .kw(ks)
1789 .kc(kc)
1790 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1791 }
1792 }
1793 }
1794
1795 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_twopass_fulltile) {
1796 TEST_REQUIRES_X86_SSE2;
1797 auto tester = MaxPoolMicrokernelTester()
1798 .mr(9)
1799 .qr(8);
1800 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1801 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1802 if (kh * kw == tester.mr() + tester.qr()) {
1803 for (size_t kc = 1; kc < 16; kc++) {
1804 tester
1805 .kh(kh)
1806 .kw(kw)
1807 .kc(kc)
1808 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1809 }
1810 }
1811 }
1812 }
1813 }
1814
1815 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_twopass_fulltile_with_qmin) {
1816 TEST_REQUIRES_X86_SSE2;
1817 auto tester = MaxPoolMicrokernelTester()
1818 .mr(9)
1819 .qr(8);
1820 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1821 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1822 if (kh * kw == tester.mr() + tester.qr()) {
1823 for (size_t kc = 1; kc < 16; kc++) {
1824 tester
1825 .kh(kh)
1826 .kw(kw)
1827 .kc(kc)
1828 .qmin(192)
1829 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1830 }
1831 }
1832 }
1833 }
1834 }
1835
1836 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_twopass_fulltile_with_qmax) {
1837 TEST_REQUIRES_X86_SSE2;
1838 auto tester = MaxPoolMicrokernelTester()
1839 .mr(9)
1840 .qr(8);
1841 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1842 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1843 if (kh * kw == tester.mr() + tester.qr()) {
1844 for (size_t kc = 1; kc < 16; kc++) {
1845 tester
1846 .kh(kh)
1847 .kw(kw)
1848 .kc(kc)
1849 .qmax(192)
1850 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1851 }
1852 }
1853 }
1854 }
1855 }
1856
1857 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_twopass_fulltile_with_x_stride) {
1858 TEST_REQUIRES_X86_SSE2;
1859 auto tester = MaxPoolMicrokernelTester()
1860 .mr(9)
1861 .qr(8)
1862 .iterations(3);
1863 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1864 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1865 if (kh * kw == tester.mr() + tester.qr()) {
1866 for (size_t kc = 1; kc < 16; kc++) {
1867 tester
1868 .kh(kh)
1869 .kw(kw)
1870 .kc(kc)
1871 .x_stride(257)
1872 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1873 }
1874 }
1875 }
1876 }
1877 }
1878
1879 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_twopass_subtile) {
1880 TEST_REQUIRES_X86_SSE2;
1881 auto tester = MaxPoolMicrokernelTester()
1882 .mr(9)
1883 .qr(8)
1884 .iterations(3);
1885 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
1886 for (size_t kc = 1; kc < 16; kc++) {
1887 tester
1888 .kh(ks)
1889 .kw(1)
1890 .kc(kc)
1891 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1892 tester
1893 .kh(1)
1894 .kw(ks)
1895 .kc(kc)
1896 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1897 }
1898 }
1899 }
1900
1901 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_twopass_fulltile) {
1902 TEST_REQUIRES_X86_SSE2;
1903 auto tester = MaxPoolMicrokernelTester()
1904 .mr(9)
1905 .qr(8);
1906 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1907 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1908 if (kh * kw == tester.mr() + tester.qr()) {
1909 for (size_t kc = 17; kc < 32; kc++) {
1910 tester
1911 .kh(kh)
1912 .kw(kw)
1913 .kc(kc)
1914 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1915 }
1916 }
1917 }
1918 }
1919 }
1920
1921 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_twopass_fulltile_with_qmin) {
1922 TEST_REQUIRES_X86_SSE2;
1923 auto tester = MaxPoolMicrokernelTester()
1924 .mr(9)
1925 .qr(8);
1926 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1927 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1928 if (kh * kw == tester.mr() + tester.qr()) {
1929 for (size_t kc = 17; kc < 32; kc++) {
1930 tester
1931 .kh(kh)
1932 .kw(kw)
1933 .kc(kc)
1934 .qmin(192)
1935 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1936 }
1937 }
1938 }
1939 }
1940 }
1941
1942 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_twopass_fulltile_with_qmax) {
1943 TEST_REQUIRES_X86_SSE2;
1944 auto tester = MaxPoolMicrokernelTester()
1945 .mr(9)
1946 .qr(8);
1947 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1948 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1949 if (kh * kw == tester.mr() + tester.qr()) {
1950 for (size_t kc = 17; kc < 32; kc++) {
1951 tester
1952 .kh(kh)
1953 .kw(kw)
1954 .kc(kc)
1955 .qmax(192)
1956 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1957 }
1958 }
1959 }
1960 }
1961 }
1962
1963 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_twopass_fulltile_with_x_stride) {
1964 TEST_REQUIRES_X86_SSE2;
1965 auto tester = MaxPoolMicrokernelTester()
1966 .mr(9)
1967 .qr(8)
1968 .iterations(3);
1969 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
1970 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
1971 if (kh * kw == tester.mr() + tester.qr()) {
1972 for (size_t kc = 17; kc < 32; kc++) {
1973 tester
1974 .kh(kh)
1975 .kw(kw)
1976 .kc(kc)
1977 .x_stride(257)
1978 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1979 }
1980 }
1981 }
1982 }
1983 }
1984
1985 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_twopass_subtile) {
1986 TEST_REQUIRES_X86_SSE2;
1987 auto tester = MaxPoolMicrokernelTester()
1988 .mr(9)
1989 .qr(8)
1990 .iterations(3);
1991 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
1992 for (size_t kc = 17; kc < 32; kc++) {
1993 tester
1994 .kh(ks)
1995 .kw(1)
1996 .kc(kc)
1997 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
1998 tester
1999 .kh(1)
2000 .kw(ks)
2001 .kc(kc)
2002 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2003 }
2004 }
2005 }
2006
2007 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_multipass) {
2008 TEST_REQUIRES_X86_SSE2;
2009 auto tester = MaxPoolMicrokernelTester()
2010 .mr(9)
2011 .qr(8)
2012 .kc(16);
2013 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2014 tester
2015 .kh(ks)
2016 .kw(1)
2017 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2018 tester
2019 .kh(1)
2020 .kw(ks)
2021 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2022 }
2023 }
2024
2025 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_multipass_with_qmin) {
2026 TEST_REQUIRES_X86_SSE2;
2027 auto tester = MaxPoolMicrokernelTester()
2028 .mr(9)
2029 .qr(8)
2030 .kc(16);
2031 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2032 tester
2033 .kh(ks)
2034 .kw(1)
2035 .qmin(192)
2036 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2037 tester
2038 .kh(1)
2039 .kw(ks)
2040 .qmin(192)
2041 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2042 }
2043 }
2044
2045 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_multipass_with_qmax) {
2046 TEST_REQUIRES_X86_SSE2;
2047 auto tester = MaxPoolMicrokernelTester()
2048 .mr(9)
2049 .qr(8)
2050 .kc(16);
2051 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2052 tester
2053 .kh(ks)
2054 .kw(1)
2055 .qmax(192)
2056 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2057 tester
2058 .kh(1)
2059 .kw(ks)
2060 .qmax(192)
2061 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2062 }
2063 }
2064
2065 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_multipass) {
2066 TEST_REQUIRES_X86_SSE2;
2067 auto tester = MaxPoolMicrokernelTester()
2068 .mr(9)
2069 .qr(8);
2070 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2071 for (size_t kc = 16; kc < 256; kc += 48) {
2072 tester
2073 .kh(ks)
2074 .kw(1)
2075 .kc(kc)
2076 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2077 tester
2078 .kh(1)
2079 .kw(ks)
2080 .kc(kc)
2081 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2082 }
2083 }
2084 }
2085
2086 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_multipass_with_qmin) {
2087 TEST_REQUIRES_X86_SSE2;
2088 auto tester = MaxPoolMicrokernelTester()
2089 .mr(9)
2090 .qr(8);
2091 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2092 for (size_t kc = 16; kc < 256; kc += 48) {
2093 tester
2094 .kh(ks)
2095 .kw(1)
2096 .kc(kc)
2097 .qmin(192)
2098 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2099 tester
2100 .kh(1)
2101 .kw(ks)
2102 .kc(kc)
2103 .qmin(192)
2104 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2105 }
2106 }
2107 }
2108
2109 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_multipass_with_qmax) {
2110 TEST_REQUIRES_X86_SSE2;
2111 auto tester = MaxPoolMicrokernelTester()
2112 .mr(9)
2113 .qr(8);
2114 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2115 for (size_t kc = 16; kc < 256; kc += 48) {
2116 tester
2117 .kh(ks)
2118 .kw(1)
2119 .kc(kc)
2120 .qmax(192)
2121 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2122 tester
2123 .kh(1)
2124 .kw(ks)
2125 .kc(kc)
2126 .qmax(192)
2127 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2128 }
2129 }
2130 }
2131
2132 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_div_16_multipass_with_x_stride) {
2133 TEST_REQUIRES_X86_SSE2;
2134 auto tester = MaxPoolMicrokernelTester()
2135 .mr(9)
2136 .qr(8)
2137 .iterations(3);
2138 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2139 for (size_t kc = 16; kc < 256; kc += 48) {
2140 tester
2141 .kh(ks)
2142 .kw(1)
2143 .kc(kc)
2144 .x_stride(257)
2145 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2146 tester
2147 .kh(1)
2148 .kw(ks)
2149 .kc(kc)
2150 .x_stride(257)
2151 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2152 }
2153 }
2154 }
2155
2156 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_multipass) {
2157 TEST_REQUIRES_X86_SSE2;
2158 auto tester = MaxPoolMicrokernelTester()
2159 .mr(9)
2160 .qr(8)
2161 .iterations(3);
2162 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2163 for (size_t kc = 1; kc < 16; kc++) {
2164 tester
2165 .kh(ks)
2166 .kw(1)
2167 .kc(kc)
2168 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2169 tester
2170 .kh(1)
2171 .kw(ks)
2172 .kc(kc)
2173 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2174 }
2175 }
2176 }
2177
2178 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_multipass_with_qmin) {
2179 TEST_REQUIRES_X86_SSE2;
2180 auto tester = MaxPoolMicrokernelTester()
2181 .mr(9)
2182 .qr(8)
2183 .iterations(3);
2184 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2185 for (size_t kc = 1; kc < 16; kc++) {
2186 tester
2187 .kh(ks)
2188 .kw(1)
2189 .kc(kc)
2190 .qmin(192)
2191 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2192 tester
2193 .kh(1)
2194 .kw(ks)
2195 .kc(kc)
2196 .qmin(192)
2197 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2198 }
2199 }
2200 }
2201
2202 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_multipass_with_qmax) {
2203 TEST_REQUIRES_X86_SSE2;
2204 auto tester = MaxPoolMicrokernelTester()
2205 .mr(9)
2206 .qr(8)
2207 .iterations(3);
2208 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2209 for (size_t kc = 1; kc < 16; kc++) {
2210 tester
2211 .kh(ks)
2212 .kw(1)
2213 .kc(kc)
2214 .qmax(192)
2215 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2216 tester
2217 .kh(1)
2218 .kw(ks)
2219 .kc(kc)
2220 .qmax(192)
2221 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2222 }
2223 }
2224 }
2225
2226 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_lt_16_multipass_with_x_stride) {
2227 TEST_REQUIRES_X86_SSE2;
2228 auto tester = MaxPoolMicrokernelTester()
2229 .mr(9)
2230 .qr(8)
2231 .iterations(3);
2232 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2233 for (size_t kc = 1; kc < 16; kc++) {
2234 tester
2235 .kh(ks)
2236 .kw(1)
2237 .kc(kc)
2238 .x_stride(257)
2239 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2240 tester
2241 .kh(1)
2242 .kw(ks)
2243 .kc(kc)
2244 .x_stride(257)
2245 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2246 }
2247 }
2248 }
2249
2250 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_multipass) {
2251 TEST_REQUIRES_X86_SSE2;
2252 auto tester = MaxPoolMicrokernelTester()
2253 .mr(9)
2254 .qr(8)
2255 .iterations(3);
2256 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2257 for (size_t kc = 17; kc < 32; kc++) {
2258 tester
2259 .kh(ks)
2260 .kw(1)
2261 .kc(kc)
2262 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2263 tester
2264 .kh(1)
2265 .kw(ks)
2266 .kc(kc)
2267 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2268 }
2269 }
2270 }
2271
2272 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_multipass_with_qmin) {
2273 TEST_REQUIRES_X86_SSE2;
2274 auto tester = MaxPoolMicrokernelTester()
2275 .mr(9)
2276 .qr(8)
2277 .iterations(3);
2278 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2279 for (size_t kc = 17; kc < 32; kc++) {
2280 tester
2281 .kh(ks)
2282 .kw(1)
2283 .kc(kc)
2284 .qmin(192)
2285 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2286 tester
2287 .kh(1)
2288 .kw(ks)
2289 .kc(kc)
2290 .qmin(192)
2291 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2292 }
2293 }
2294 }
2295
2296 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_multipass_with_qmax) {
2297 TEST_REQUIRES_X86_SSE2;
2298 auto tester = MaxPoolMicrokernelTester()
2299 .mr(9)
2300 .qr(8)
2301 .iterations(3);
2302 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2303 for (size_t kc = 17; kc < 32; kc++) {
2304 tester
2305 .kh(ks)
2306 .kw(1)
2307 .kc(kc)
2308 .qmax(192)
2309 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2310 tester
2311 .kh(1)
2312 .kw(ks)
2313 .kc(kc)
2314 .qmax(192)
2315 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2316 }
2317 }
2318 }
2319
2320 TEST(U8_MAXPOOL_9P8Q__SSE2, kc_gt_16_multipass_with_x_stride) {
2321 TEST_REQUIRES_X86_SSE2;
2322 auto tester = MaxPoolMicrokernelTester()
2323 .mr(9)
2324 .qr(8)
2325 .iterations(3);
2326 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2327 for (size_t kc = 17; kc < 32; kc++) {
2328 tester
2329 .kh(ks)
2330 .kw(1)
2331 .kc(kc)
2332 .x_stride(257)
2333 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2334 tester
2335 .kh(1)
2336 .kw(ks)
2337 .kc(kc)
2338 .x_stride(257)
2339 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2340 }
2341 }
2342 }
2343
2344 TEST(U8_MAXPOOL_9P8Q__SSE2, small_n) {
2345 TEST_REQUIRES_X86_SSE2;
2346 for (size_t n = 2; n < 5; n++) {
2347 for (size_t ks : std::vector<size_t>{{2, 3, 5, 10}}) {
2348 for (size_t kc = 1; kc < 51; kc += 5) {
2349 MaxPoolMicrokernelTester()
2350 .mr(9)
2351 .qr(8)
2352 .n(n)
2353 .kh(ks)
2354 .kw(ks)
2355 .kc(kc)
2356 .iterations(3)
2357 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2358 }
2359 }
2360 }
2361 }
2362
2363 TEST(U8_MAXPOOL_9P8Q__SSE2, small_n_with_x_stride) {
2364 TEST_REQUIRES_X86_SSE2;
2365 for (size_t n = 2; n < 5; n++) {
2366 for (size_t ks : std::vector<size_t>{{2, 3, 5, 10}}) {
2367 for (size_t kc = 1; kc < 51; kc += 5) {
2368 MaxPoolMicrokernelTester()
2369 .mr(9)
2370 .qr(8)
2371 .n(n)
2372 .kh(ks)
2373 .kw(ks)
2374 .kc(kc)
2375 .x_stride(101)
2376 .iterations(1)
2377 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2378 }
2379 }
2380 }
2381 }
2382
2383 TEST(U8_MAXPOOL_9P8Q__SSE2, small_n_with_y_stride) {
2384 TEST_REQUIRES_X86_SSE2;
2385 for (size_t n = 2; n < 5; n++) {
2386 for (size_t ks : std::vector<size_t>{{2, 3, 5, 10}}) {
2387 for (size_t kc = 1; kc < 51; kc += 5) {
2388 MaxPoolMicrokernelTester()
2389 .mr(9)
2390 .qr(8)
2391 .n(n)
2392 .kh(ks)
2393 .kw(ks)
2394 .kc(kc)
2395 .y_stride(103)
2396 .iterations(1)
2397 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2398 }
2399 }
2400 }
2401 }
2402
2403 TEST(U8_MAXPOOL_9P8Q__SSE2, small_n_with_s) {
2404 TEST_REQUIRES_X86_SSE2;
2405 for (size_t n = 2; n < 5; n++) {
2406 for (size_t ks : std::vector<size_t>{{2, 3, 5}}) {
2407 for (size_t kc = 1; kc < 51; kc += 5) {
2408 for (size_t s = 2; s <= ks; s++) {
2409 MaxPoolMicrokernelTester()
2410 .mr(9)
2411 .qr(8)
2412 .n(n)
2413 .kh(ks)
2414 .kw(ks)
2415 .kc(kc)
2416 .s(s)
2417 .iterations(1)
2418 .Test(xnn_u8_maxpool_ukernel_9p8q__sse2);
2419 }
2420 }
2421 }
2422 }
2423 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002424#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002425
2426TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_unipass_fulltile) {
2427 auto tester = MaxPoolMicrokernelTester()
2428 .mr(9)
2429 .qr(8)
2430 .kc(1);
2431 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2432 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2433 if (kh * kw == tester.mr()) {
2434 tester
2435 .kh(kh)
2436 .kw(kw)
2437 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2438 }
2439 }
2440 }
2441}
2442
2443TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_unipass_fulltile_with_qmin) {
2444 auto tester = MaxPoolMicrokernelTester()
2445 .mr(9)
2446 .qr(8)
2447 .kc(1);
2448 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2449 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2450 if (kh * kw == tester.mr()) {
2451 tester
2452 .kh(kh)
2453 .kw(kw)
2454 .qmin(192)
2455 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2456 }
2457 }
2458 }
2459}
2460
2461TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_unipass_fulltile_with_qmax) {
2462 auto tester = MaxPoolMicrokernelTester()
2463 .mr(9)
2464 .qr(8)
2465 .kc(1);
2466 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2467 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2468 if (kh * kw == tester.mr()) {
2469 tester
2470 .kh(kh)
2471 .kw(kw)
2472 .qmax(192)
2473 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2474 }
2475 }
2476 }
2477}
2478
2479TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_unipass_subtile) {
2480 auto tester = MaxPoolMicrokernelTester()
2481 .mr(9)
2482 .qr(8)
2483 .kc(1);
2484 for (size_t ks = 2; ks < tester.mr(); ks++) {
2485 tester
2486 .kh(ks)
2487 .kw(1)
2488 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2489 tester
2490 .kh(1)
2491 .kw(ks)
2492 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2493 }
2494}
2495
2496TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_unipass_fulltile) {
2497 auto tester = MaxPoolMicrokernelTester()
2498 .mr(9)
2499 .qr(8);
2500 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2501 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2502 if (kh * kw == tester.mr()) {
2503 for (size_t kc = 2; kc < 8; kc++) {
2504 tester
2505 .kh(kh)
2506 .kw(kw)
2507 .kc(kc)
2508 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2509 }
2510 }
2511 }
2512 }
2513}
2514
2515TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_unipass_fulltile_with_qmin) {
2516 auto tester = MaxPoolMicrokernelTester()
2517 .mr(9)
2518 .qr(8);
2519 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2520 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2521 if (kh * kw == tester.mr()) {
2522 for (size_t kc = 2; kc < 8; kc++) {
2523 tester
2524 .kh(kh)
2525 .kw(kw)
2526 .kc(kc)
2527 .qmin(192)
2528 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2529 }
2530 }
2531 }
2532 }
2533}
2534
2535TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_unipass_fulltile_with_qmax) {
2536 auto tester = MaxPoolMicrokernelTester()
2537 .mr(9)
2538 .qr(8);
2539 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2540 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2541 if (kh * kw == tester.mr()) {
2542 for (size_t kc = 2; kc < 8; kc++) {
2543 tester
2544 .kh(kh)
2545 .kw(kw)
2546 .kc(kc)
2547 .qmax(192)
2548 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2549 }
2550 }
2551 }
2552 }
2553}
2554
2555TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_unipass_fulltile_with_x_stride) {
2556 auto tester = MaxPoolMicrokernelTester()
2557 .mr(9)
2558 .qr(8)
2559 .iterations(3);
2560 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2561 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2562 if (kh * kw == tester.mr()) {
2563 for (size_t kc = 2; kc < 8; kc++) {
2564 tester
2565 .kh(kh)
2566 .kw(kw)
2567 .kc(kc)
2568 .x_stride(257)
2569 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2570 }
2571 }
2572 }
2573 }
2574}
2575
2576TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_unipass_subtile) {
2577 auto tester = MaxPoolMicrokernelTester()
2578 .mr(9)
2579 .qr(8)
2580 .iterations(3);
2581 for (size_t ks = 2; ks < tester.mr(); ks++) {
2582 for (size_t kc = 2; kc < 8; kc++) {
2583 tester
2584 .kh(ks)
2585 .kw(1)
2586 .kc(kc)
2587 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2588 tester
2589 .kh(1)
2590 .kw(ks)
2591 .kc(kc)
2592 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2593 }
2594 }
2595}
2596
2597TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_twopass_fulltile) {
2598 auto tester = MaxPoolMicrokernelTester()
2599 .mr(9)
2600 .qr(8)
2601 .kc(1);
2602 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
2603 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
2604 if (kh * kw == tester.mr() + tester.qr()) {
2605 tester
2606 .kh(kh)
2607 .kw(kw)
2608 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2609 }
2610 }
2611 }
2612}
2613
2614TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_twopass_fulltile_with_qmin) {
2615 auto tester = MaxPoolMicrokernelTester()
2616 .mr(9)
2617 .qr(8)
2618 .kc(1);
2619 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
2620 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
2621 if (kh * kw == tester.mr() + tester.qr()) {
2622 tester
2623 .kh(kh)
2624 .kw(kw)
2625 .qmin(192)
2626 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2627 }
2628 }
2629 }
2630}
2631
2632TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_twopass_fulltile_with_qmax) {
2633 auto tester = MaxPoolMicrokernelTester()
2634 .mr(9)
2635 .qr(8)
2636 .kc(1);
2637 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
2638 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
2639 if (kh * kw == tester.mr() + tester.qr()) {
2640 tester
2641 .kh(kh)
2642 .kw(kw)
2643 .qmax(192)
2644 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2645 }
2646 }
2647 }
2648}
2649
2650TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_twopass_subtile) {
2651 auto tester = MaxPoolMicrokernelTester()
2652 .mr(9)
2653 .qr(8)
2654 .kc(1);
2655 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
2656 tester
2657 .kh(ks)
2658 .kw(1)
2659 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2660 tester
2661 .kh(1)
2662 .kw(ks)
2663 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2664 }
2665}
2666
2667TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_twopass_fulltile) {
2668 auto tester = MaxPoolMicrokernelTester()
2669 .mr(9)
2670 .qr(8);
2671 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
2672 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
2673 if (kh * kw == tester.mr() + tester.qr()) {
2674 for (size_t kc = 2; kc < 8; kc++) {
2675 tester
2676 .kh(kh)
2677 .kw(kw)
2678 .kc(kc)
2679 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2680 }
2681 }
2682 }
2683 }
2684}
2685
2686TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_twopass_fulltile_with_qmin) {
2687 auto tester = MaxPoolMicrokernelTester()
2688 .mr(9)
2689 .qr(8);
2690 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
2691 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
2692 if (kh * kw == tester.mr() + tester.qr()) {
2693 for (size_t kc = 2; kc < 8; kc++) {
2694 tester
2695 .kh(kh)
2696 .kw(kw)
2697 .kc(kc)
2698 .qmin(192)
2699 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2700 }
2701 }
2702 }
2703 }
2704}
2705
2706TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_twopass_fulltile_with_qmax) {
2707 auto tester = MaxPoolMicrokernelTester()
2708 .mr(9)
2709 .qr(8);
2710 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
2711 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
2712 if (kh * kw == tester.mr() + tester.qr()) {
2713 for (size_t kc = 2; kc < 8; kc++) {
2714 tester
2715 .kh(kh)
2716 .kw(kw)
2717 .kc(kc)
2718 .qmax(192)
2719 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2720 }
2721 }
2722 }
2723 }
2724}
2725
2726TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_twopass_fulltile_with_x_stride) {
2727 auto tester = MaxPoolMicrokernelTester()
2728 .mr(9)
2729 .qr(8)
2730 .iterations(3);
2731 for (size_t kh = 1; kh <= tester.mr() + tester.qr(); kh++) {
2732 for (size_t kw = 1; kw <= tester.mr() + tester.qr(); kw++) {
2733 if (kh * kw == tester.mr() + tester.qr()) {
2734 for (size_t kc = 2; kc < 8; kc++) {
2735 tester
2736 .kh(kh)
2737 .kw(kw)
2738 .kc(kc)
2739 .x_stride(257)
2740 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2741 }
2742 }
2743 }
2744 }
2745}
2746
2747TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_twopass_subtile) {
2748 auto tester = MaxPoolMicrokernelTester()
2749 .mr(9)
2750 .qr(8)
2751 .iterations(3);
2752 for (size_t ks = tester.mr() + 1; ks < tester.mr() + tester.qr(); ks++) {
2753 for (size_t kc = 2; kc < 8; kc++) {
2754 tester
2755 .kh(ks)
2756 .kw(1)
2757 .kc(kc)
2758 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2759 tester
2760 .kh(1)
2761 .kw(ks)
2762 .kc(kc)
2763 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2764 }
2765 }
2766}
2767
2768TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_multipass) {
2769 auto tester = MaxPoolMicrokernelTester()
2770 .mr(9)
2771 .qr(8)
2772 .kc(1);
2773 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2774 tester
2775 .kh(ks)
2776 .kw(1)
2777 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2778 tester
2779 .kh(1)
2780 .kw(ks)
2781 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2782 }
2783}
2784
2785TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_multipass_with_qmin) {
2786 auto tester = MaxPoolMicrokernelTester()
2787 .mr(9)
2788 .qr(8)
2789 .kc(1);
2790 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2791 tester
2792 .kh(ks)
2793 .kw(1)
2794 .qmin(192)
2795 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2796 tester
2797 .kh(1)
2798 .kw(ks)
2799 .qmin(192)
2800 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2801 }
2802}
2803
2804TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_multipass_with_qmax) {
2805 auto tester = MaxPoolMicrokernelTester()
2806 .mr(9)
2807 .qr(8)
2808 .kc(1);
2809 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2810 tester
2811 .kh(ks)
2812 .kw(1)
2813 .qmax(192)
2814 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2815 tester
2816 .kh(1)
2817 .kw(ks)
2818 .qmax(192)
2819 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2820 }
2821}
2822
2823TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_multipass) {
2824 auto tester = MaxPoolMicrokernelTester()
2825 .mr(9)
2826 .qr(8)
2827 .iterations(3);
2828 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2829 for (size_t kc = 2; kc < 8; kc++) {
2830 tester
2831 .kh(ks)
2832 .kw(1)
2833 .kc(kc)
2834 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2835 tester
2836 .kh(1)
2837 .kw(ks)
2838 .kc(kc)
2839 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2840 }
2841 }
2842}
2843
2844TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_multipass_with_qmin) {
2845 auto tester = MaxPoolMicrokernelTester()
2846 .mr(9)
2847 .qr(8)
2848 .iterations(3);
2849 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2850 for (size_t kc = 2; kc < 8; kc++) {
2851 tester
2852 .kh(ks)
2853 .kw(1)
2854 .kc(kc)
2855 .qmin(192)
2856 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2857 tester
2858 .kh(1)
2859 .kw(ks)
2860 .kc(kc)
2861 .qmin(192)
2862 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2863 }
2864 }
2865}
2866
2867TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_multipass_with_qmax) {
2868 auto tester = MaxPoolMicrokernelTester()
2869 .mr(9)
2870 .qr(8)
2871 .iterations(3);
2872 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2873 for (size_t kc = 2; kc < 8; kc++) {
2874 tester
2875 .kh(ks)
2876 .kw(1)
2877 .kc(kc)
2878 .qmax(192)
2879 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2880 tester
2881 .kh(1)
2882 .kw(ks)
2883 .kc(kc)
2884 .qmax(192)
2885 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2886 }
2887 }
2888}
2889
2890TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_gt_1_multipass_with_x_stride) {
2891 auto tester = MaxPoolMicrokernelTester()
2892 .mr(9)
2893 .qr(8)
2894 .iterations(3);
2895 for (size_t ks = tester.mr() + tester.qr() + 1; ks < tester.mr() + 3 * tester.qr(); ks += 3) {
2896 for (size_t kc = 2; kc < 8; kc++) {
2897 tester
2898 .kh(ks)
2899 .kw(1)
2900 .kc(kc)
2901 .x_stride(257)
2902 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2903 tester
2904 .kh(1)
2905 .kw(ks)
2906 .kc(kc)
2907 .x_stride(257)
2908 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2909 }
2910 }
2911}
2912
2913TEST(U8_MAXPOOL_9P8Q__SCALAR, small_n) {
2914 for (size_t n = 2; n < 5; n++) {
2915 for (size_t ks : std::vector<size_t>{{2, 3, 5, 10}}) {
2916 for (size_t kc = 1; kc < 16; kc += 5) {
2917 MaxPoolMicrokernelTester()
2918 .mr(9)
2919 .qr(8)
2920 .n(n)
2921 .kh(ks)
2922 .kw(ks)
2923 .kc(kc)
2924 .iterations(3)
2925 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2926 }
2927 }
2928 }
2929}
2930
2931TEST(U8_MAXPOOL_9P8Q__SCALAR, small_n_with_x_stride) {
2932 for (size_t n = 2; n < 5; n++) {
2933 for (size_t ks : std::vector<size_t>{{2, 3, 5, 10}}) {
2934 for (size_t kc = 1; kc < 16; kc += 5) {
2935 MaxPoolMicrokernelTester()
2936 .mr(9)
2937 .qr(8)
2938 .n(n)
2939 .kh(ks)
2940 .kw(ks)
2941 .kc(kc)
2942 .x_stride(101)
2943 .iterations(1)
2944 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2945 }
2946 }
2947 }
2948}
2949
2950TEST(U8_MAXPOOL_9P8Q__SCALAR, small_n_with_y_stride) {
2951 for (size_t n = 2; n < 5; n++) {
2952 for (size_t ks : std::vector<size_t>{{2, 3, 5, 10}}) {
2953 for (size_t kc = 1; kc < 16; kc += 5) {
2954 MaxPoolMicrokernelTester()
2955 .mr(9)
2956 .qr(8)
2957 .n(n)
2958 .kh(ks)
2959 .kw(ks)
2960 .kc(kc)
2961 .y_stride(103)
2962 .iterations(1)
2963 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2964 }
2965 }
2966 }
2967}
2968
2969TEST(U8_MAXPOOL_9P8Q__SCALAR, small_n_with_s) {
2970 for (size_t n = 2; n < 5; n++) {
2971 for (size_t ks : std::vector<size_t>{{2, 3, 5}}) {
2972 for (size_t kc = 1; kc < 16; kc += 5) {
2973 for (size_t s = 2; s <= ks; s++) {
2974 MaxPoolMicrokernelTester()
2975 .mr(9)
2976 .qr(8)
2977 .n(n)
2978 .kh(ks)
2979 .kw(ks)
2980 .kc(kc)
2981 .s(s)
2982 .iterations(1)
2983 .Test(xnn_u8_maxpool_ukernel_9p8q__scalar, MaxPoolMicrokernelTester::Variant::Scalar);
2984 }
2985 }
2986 }
2987 }
2988}