blob: c0cfaea9182303e4fef78eeacbe1aedb66baddce [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
XNNPACK Teamb455b122019-09-27 18:10:33 -07006#include <gtest/gtest.h>
7
Marat Dukhan1dadbf72019-10-01 10:46:20 -07008#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -07009#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070010
Marat Dukhan1dadbf72019-10-01 10:46:20 -070011#include <xnnpack/pavgpool.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include "avgpool-microkernel-tester.h"
13
14
Marat Dukhan1dadbf72019-10-01 10:46:20 -070015#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070016 TEST(F32_PAVGPOOL_UP9__NEON, kc_eq_4_fulltile) {
17 TEST_REQUIRES_ARM_NEON;
18 auto tester = AvgPoolMicrokernelTester()
19 .mr(9)
20 .kc(4);
21 for (size_t kh = 1; kh <= tester.mr(); kh++) {
22 for (size_t kw = 1; kw <= tester.mr(); kw++) {
23 if (kh * kw == tester.mr()) {
24 tester
25 .kh(kh)
26 .kw(kw)
27 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
28 }
29 }
30 }
31 }
32
33 TEST(F32_PAVGPOOL_UP9__NEON, kc_eq_4_subtile) {
34 TEST_REQUIRES_ARM_NEON;
35 auto tester = AvgPoolMicrokernelTester()
36 .mr(9)
37 .kc(4);
38 for (size_t ks = 2; ks < tester.mr(); ks++) {
39 for (size_t kh = 1; kh <= ks; kh++) {
40 for (size_t kw = 1; kw <= ks; kw++) {
41 if (kh * kw == ks) {
42 tester
43 .kh(kh)
44 .kw(kw)
45 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
46 }
47 }
48 }
49 }
50 }
51
52 TEST(F32_PAVGPOOL_UP9__NEON, kc_div_4_fulltile) {
53 TEST_REQUIRES_ARM_NEON;
54 auto tester = AvgPoolMicrokernelTester()
55 .mr(9);
56 for (size_t kh = 1; kh <= tester.mr(); kh++) {
57 for (size_t kw = 1; kw <= tester.mr(); kw++) {
58 if (kh * kw == tester.mr()) {
59 for (size_t kc = 4; kc < 64; kc += 12) {
60 tester
61 .kh(kh)
62 .kw(kw)
63 .kc(kc)
64 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
65 }
66 }
67 }
68 }
69 }
70
71 TEST(F32_PAVGPOOL_UP9__NEON, kc_div_4_subtile) {
72 TEST_REQUIRES_ARM_NEON;
73 auto tester = AvgPoolMicrokernelTester()
74 .mr(9)
75 .iterations(3);
76 for (size_t ks = 2; ks < tester.mr(); ks++) {
77 for (size_t kh = 1; kh <= ks; kh++) {
78 for (size_t kw = 1; kw <= ks; kw++) {
79 if (kh * kw == ks) {
80 for (size_t kc = 4; kc < 64; kc += 12) {
81 tester
82 .kh(kh)
83 .kw(kw)
84 .kc(kc)
85 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
86 }
87 }
88 }
89 }
90 }
91 }
92
93 TEST(F32_PAVGPOOL_UP9__NEON, kc_div_4_fulltile_with_x_stride) {
94 TEST_REQUIRES_ARM_NEON;
95 auto tester = AvgPoolMicrokernelTester()
96 .mr(9)
97 .iterations(3);
98 for (size_t kh = 1; kh <= tester.mr(); kh++) {
99 for (size_t kw = 1; kw <= tester.mr(); kw++) {
100 if (kh * kw == tester.mr()) {
101 for (size_t kc = 4; kc < 64; kc += 12) {
102 tester
103 .kh(kh)
104 .kw(kw)
105 .kc(kc)
106 .x_stride(131)
107 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
108 }
109 }
110 }
111 }
112 }
113
114 TEST(F32_PAVGPOOL_UP9__NEON, kc_lt_4_fulltile) {
115 TEST_REQUIRES_ARM_NEON;
116 auto tester = AvgPoolMicrokernelTester()
117 .mr(9);
118 for (size_t kh = 1; kh <= tester.mr(); kh++) {
119 for (size_t kw = 1; kw <= tester.mr(); kw++) {
120 if (kh * kw == tester.mr()) {
121 for (size_t kc = 1; kc < 4; kc++) {
122 tester
123 .kh(kh)
124 .kw(kw)
125 .kc(kc)
126 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
127 }
128 }
129 }
130 }
131 }
132
133 TEST(F32_PAVGPOOL_UP9__NEON, kc_lt_4_subtile) {
134 TEST_REQUIRES_ARM_NEON;
135 auto tester = AvgPoolMicrokernelTester()
136 .mr(9)
137 .iterations(3);
138 for (size_t ks = 2; ks < tester.mr(); ks++) {
139 for (size_t kh = 1; kh <= ks; kh++) {
140 for (size_t kw = 1; kw <= ks; kw++) {
141 if (kh * kw == ks) {
142 for (size_t kc = 1; kc < 4; kc++) {
143 tester
144 .kh(kh)
145 .kw(kw)
146 .kc(kc)
147 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
148 }
149 }
150 }
151 }
152 }
153 }
154
155 TEST(F32_PAVGPOOL_UP9__NEON, kc_lt_4_fulltile_with_x_stride) {
156 TEST_REQUIRES_ARM_NEON;
157 auto tester = AvgPoolMicrokernelTester()
158 .mr(9)
159 .iterations(3);
160 for (size_t kh = 1; kh <= tester.mr(); kh++) {
161 for (size_t kw = 1; kw <= tester.mr(); kw++) {
162 if (kh * kw == tester.mr()) {
163 for (size_t kc = 1; kc < 4; kc++) {
164 tester
165 .kh(kh)
166 .kw(kw)
167 .kc(kc)
168 .x_stride(23)
169 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
170 }
171 }
172 }
173 }
174 }
175
176 TEST(F32_PAVGPOOL_UP9__NEON, kc_gt_4_fulltile) {
177 TEST_REQUIRES_ARM_NEON;
178 auto tester = AvgPoolMicrokernelTester()
179 .mr(9);
180 for (size_t kh = 1; kh <= tester.mr(); kh++) {
181 for (size_t kw = 1; kw <= tester.mr(); kw++) {
182 if (kh * kw == tester.mr()) {
183 for (size_t kc = 5; kc < 8; kc++) {
184 tester
185 .kh(kh)
186 .kw(kw)
187 .kc(kc)
188 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
189 }
190 }
191 }
192 }
193 }
194
195 TEST(F32_PAVGPOOL_UP9__NEON, kc_gt_4_subtile) {
196 TEST_REQUIRES_ARM_NEON;
197 auto tester = AvgPoolMicrokernelTester()
198 .mr(9)
199 .iterations(3);
200 for (size_t ks = 2; ks < tester.mr(); ks++) {
201 for (size_t kh = 1; kh <= ks; kh++) {
202 for (size_t kw = 1; kw <= ks; kw++) {
203 if (kh * kw == ks) {
204 for (size_t kc = 5; kc < 8; kc++) {
205 tester
206 .kh(kh)
207 .kw(kw)
208 .kc(kc)
209 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
210 }
211 }
212 }
213 }
214 }
215 }
216
217 TEST(F32_PAVGPOOL_UP9__NEON, kc_gt_4_fulltile_with_x_stride) {
218 TEST_REQUIRES_ARM_NEON;
219 auto tester = AvgPoolMicrokernelTester()
220 .mr(9)
221 .iterations(3);
222 for (size_t kh = 1; kh <= tester.mr(); kh++) {
223 for (size_t kw = 1; kw <= tester.mr(); kw++) {
224 if (kh * kw == tester.mr()) {
225 for (size_t kc = 5; kc < 8; kc++) {
226 tester
227 .kh(kh)
228 .kw(kw)
229 .kc(kc)
230 .x_stride(23)
231 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
232 }
233 }
234 }
235 }
236 }
237
238 TEST(F32_PAVGPOOL_UP9__NEON, kc_div_4_with_qmax) {
239 TEST_REQUIRES_ARM_NEON;
240 for (size_t n = 1; n <= 5; n += 2) {
241 for (size_t kc = 4; kc < 64; kc += 12) {
242 AvgPoolMicrokernelTester()
243 .mr(9)
244 .n(n)
245 .kh(3)
246 .kw(3)
247 .kc(kc)
248 .qmax(128)
249 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
250 }
251 }
252 }
253
254 TEST(F32_PAVGPOOL_UP9__NEON, kc_div_4_with_qmin) {
255 TEST_REQUIRES_ARM_NEON;
256 for (size_t n = 1; n <= 5; n += 2) {
257 for (size_t kc = 4; kc < 64; kc += 12) {
258 AvgPoolMicrokernelTester()
259 .mr(9)
260 .n(n)
261 .kh(3)
262 .kw(3)
263 .kc(kc)
264 .qmin(128)
265 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
266 }
267 }
268 }
269
270 TEST(F32_PAVGPOOL_UP9__NEON, small_n) {
271 TEST_REQUIRES_ARM_NEON;
272 for (size_t n = 2; n < 5; n++) {
273 for (size_t ks : std::vector<size_t>{{2, 3}}) {
274 for (size_t kc = 8; kc < 25; kc += 5) {
275 AvgPoolMicrokernelTester()
276 .mr(9)
277 .n(n)
278 .kh(ks)
279 .kw(ks)
280 .kc(kc)
281 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
282 }
283 }
284 }
285 }
286
287 TEST(F32_PAVGPOOL_UP9__NEON, small_n_with_x_stride) {
288 TEST_REQUIRES_ARM_NEON;
289 for (size_t n = 2; n < 5; n++) {
290 for (size_t ks : std::vector<size_t>{{2, 3}}) {
291 for (size_t kc = 8; kc < 25; kc += 5) {
292 AvgPoolMicrokernelTester()
293 .mr(9)
294 .n(n)
295 .kh(ks)
296 .kw(ks)
297 .kc(kc)
298 .x_stride(29)
299 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
300 }
301 }
302 }
303 }
304
305 TEST(F32_PAVGPOOL_UP9__NEON, small_n_with_y_stride) {
306 TEST_REQUIRES_ARM_NEON;
307 for (size_t n = 2; n < 5; n++) {
308 for (size_t ks : std::vector<size_t>{{2, 3}}) {
309 for (size_t kc = 8; kc < 25; kc += 5) {
310 AvgPoolMicrokernelTester()
311 .mr(9)
312 .n(n)
313 .kh(ks)
314 .kw(ks)
315 .kc(kc)
316 .y_stride(31)
317 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
318 }
319 }
320 }
321 }
322
323 TEST(F32_PAVGPOOL_UP9__NEON, small_n_with_s) {
324 TEST_REQUIRES_ARM_NEON;
325 for (size_t n = 2; n < 5; n++) {
326 for (size_t ks : std::vector<size_t>{{2, 3}}) {
327 for (size_t kc = 8; kc < 25; kc += 5) {
328 for (size_t s = 2; s <= ks; s++) {
329 AvgPoolMicrokernelTester()
330 .mr(9)
331 .n(n)
332 .kh(ks)
333 .kw(ks)
334 .kc(kc)
335 .s(s)
336 .Test(xnn_f32_pavgpool_ukernel_up9__neon);
337 }
338 }
339 }
340 }
341 }
342
343 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_eq_4_twopass_fulltile) {
344 TEST_REQUIRES_ARM_NEON;
345 auto tester = AvgPoolMicrokernelTester()
346 .mr(9)
347 .qr(8)
348 .kc(4);
349 const size_t ks = tester.mr() + tester.qr();
350 for (size_t kh = 1; kh <= ks; kh++) {
351 for (size_t kw = 1; kw <= ks; kw++) {
352 if (kh * kw == ks) {
353 tester
354 .kh(kh)
355 .kw(kw)
356 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
357 }
358 }
359 }
360 }
361
362 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_eq_4_twopass_subtile) {
363 TEST_REQUIRES_ARM_NEON;
364 auto tester = AvgPoolMicrokernelTester()
365 .mr(9)
366 .qr(8)
367 .kc(4);
368 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
369 tester
370 .kh(ks)
371 .kw(1)
372 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
373 tester
374 .kh(1)
375 .kw(ks)
376 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
377 }
378 }
379
380 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_eq_4_multipass_fulltile) {
381 TEST_REQUIRES_ARM_NEON;
382 for (size_t ks : std::vector<size_t>{{25, 49}}) {
383 auto tester = AvgPoolMicrokernelTester()
384 .mr(9)
385 .qr(8)
386 .kc(4);
387 for (size_t kh = 1; kh <= ks; kh++) {
388 for (size_t kw = 1; kw <= ks; kw++) {
389 if (kh * kw == ks) {
390 tester
391 .kh(kh)
392 .kw(kw)
393 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
394 }
395 }
396 }
397 }
398 }
399
400 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_eq_4_multipass_subtile) {
401 TEST_REQUIRES_ARM_NEON;
402 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
403 auto tester = AvgPoolMicrokernelTester()
404 .mr(9)
405 .qr(8)
406 .kc(4);
407 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
408 tester
409 .kh(ks)
410 .kw(1)
411 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
412 tester
413 .kh(1)
414 .kw(ks)
415 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
416 }
417 }
418 }
419
420 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_div_4_twopass_fulltile) {
421 TEST_REQUIRES_ARM_NEON;
422 auto tester = AvgPoolMicrokernelTester()
423 .mr(9)
424 .qr(8)
425 .iterations(3);
426 const size_t ks = 17;
427 for (size_t kc = 4; kc < 64; kc += 12) {
428 tester
429 .kc(kc)
430 .kh(ks)
431 .kw(1)
432 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
433 tester
434 .kc(kc)
435 .kh(1)
436 .kw(ks)
437 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
438 }
439 }
440
441 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_div_4_twopass_subtile) {
442 TEST_REQUIRES_ARM_NEON;
443 auto tester = AvgPoolMicrokernelTester()
444 .mr(9)
445 .qr(8)
446 .iterations(3);
447 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
448 for (size_t kc = 4; kc < 64; kc += 12) {
449 tester
450 .kc(kc)
451 .kh(ks)
452 .kw(1)
453 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
454 tester
455 .kc(kc)
456 .kh(1)
457 .kw(ks)
458 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
459 }
460 }
461 }
462
463 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_div_4_twopass_fulltile_with_x_stride) {
464 TEST_REQUIRES_ARM_NEON;
465 auto tester = AvgPoolMicrokernelTester()
466 .mr(9)
467 .qr(8)
468 .iterations(3);
469 const size_t ks = tester.mr() + tester.qr();
470 for (size_t kh = 1; kh <= ks; kh++) {
471 for (size_t kw = 1; kw <= ks; kw++) {
472 if (kh * kw == ks) {
473 for (size_t kc = 4; kc < 64; kc += 12) {
474 tester
475 .kh(kh)
476 .kw(kw)
477 .kc(kc)
478 .x_stride(131)
479 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
480 }
481 }
482 }
483 }
484 }
485
486 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_div_4_multipass_fulltile) {
487 TEST_REQUIRES_ARM_NEON;
488 for (size_t ks : std::vector<size_t>{{25, 49}}) {
489 auto tester = AvgPoolMicrokernelTester()
490 .mr(9)
491 .qr(8)
492 .iterations(3);
493 for (size_t kh = 1; kh <= ks; kh++) {
494 for (size_t kw = 1; kw <= ks; kw++) {
495 if (kh * kw == ks) {
496 for (size_t kc = 4; kc < 64; kc += 12) {
497 tester
498 .kh(kh)
499 .kw(kw)
500 .kc(kc)
501 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
502 }
503 }
504 }
505 }
506 }
507 }
508
509 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_div_4_multipass_subtile) {
510 TEST_REQUIRES_ARM_NEON;
511 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
512 auto tester = AvgPoolMicrokernelTester()
513 .mr(9)
514 .qr(8)
515 .iterations(3);
516 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
517 for (size_t kc = 4; kc < 64; kc += 12) {
518 tester
519 .kc(kc)
520 .kh(ks)
521 .kw(1)
522 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
523 tester
524 .kc(kc)
525 .kh(1)
526 .kw(ks)
527 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
528 }
529 }
530 }
531 }
532
533 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_div_4_multipass_fulltile_with_x_stride) {
534 TEST_REQUIRES_ARM_NEON;
535 for (size_t ks : std::vector<size_t>{{25, 49}}) {
536 auto tester = AvgPoolMicrokernelTester()
537 .mr(9)
538 .qr(8)
539 .iterations(3);
540 for (size_t kh = 1; kh <= ks; kh++) {
541 for (size_t kw = 1; kw <= ks; kw++) {
542 if (kh * kw == ks) {
543 for (size_t kc = 4; kc < 64; kc += 12) {
544 tester
545 .kh(kh)
546 .kw(kw)
547 .kc(kc)
548 .x_stride(131)
549 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
550 }
551 }
552 }
553 }
554 }
555 }
556
557 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_lt_4_twopass_fulltile) {
558 TEST_REQUIRES_ARM_NEON;
559 auto tester = AvgPoolMicrokernelTester()
560 .mr(9)
561 .qr(8)
562 .iterations(3);
563 const size_t ks = tester.mr() + tester.qr();
564 for (size_t kh = 1; kh <= ks; kh++) {
565 for (size_t kw = 1; kw <= ks; kw++) {
566 if (kh * kw == ks) {
567 for (size_t kc = 1; kc < 4; kc++) {
568 tester
569 .kh(kh)
570 .kw(kw)
571 .kc(kc)
572 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
573 }
574 }
575 }
576 }
577 }
578
579 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_lt_4_twopass_subtile) {
580 TEST_REQUIRES_ARM_NEON;
581 auto tester = AvgPoolMicrokernelTester()
582 .mr(9)
583 .qr(8)
584 .iterations(3);
585 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
586 for (size_t kc = 1; kc < 4; kc++) {
587 tester
588 .kc(kc)
589 .kh(ks)
590 .kw(1)
591 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
592 tester
593 .kc(kc)
594 .kh(1)
595 .kw(ks)
596 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
597 }
598 }
599 }
600
601 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_lt_4_twopass_fulltile_with_x_stride) {
602 TEST_REQUIRES_ARM_NEON;
603 auto tester = AvgPoolMicrokernelTester()
604 .mr(9)
605 .qr(8)
606 .iterations(3);
607 const size_t ks = tester.mr() + tester.qr();
608 for (size_t kh = 1; kh <= ks; kh++) {
609 for (size_t kw = 1; kw <= ks; kw++) {
610 if (kh * kw == ks) {
611 for (size_t kc = 1; kc < 4; kc++) {
612 tester
613 .kh(kh)
614 .kw(kw)
615 .kc(kc)
616 .x_stride(23)
617 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
618 }
619 }
620 }
621 }
622 }
623
624 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_lt_4_multipass_fulltile) {
625 TEST_REQUIRES_ARM_NEON;
626 for (size_t ks : std::vector<size_t>{{25, 49}}) {
627 auto tester = AvgPoolMicrokernelTester()
628 .mr(9)
629 .qr(8)
630 .iterations(3);
631 for (size_t kh = 1; kh <= ks; kh++) {
632 for (size_t kw = 1; kw <= ks; kw++) {
633 if (kh * kw == ks) {
634 for (size_t kc = 1; kc < 4; kc++) {
635 tester
636 .kh(kh)
637 .kw(kw)
638 .kc(kc)
639 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
640 }
641 }
642 }
643 }
644 }
645 }
646
647 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_lt_4_multipass_subtile) {
648 TEST_REQUIRES_ARM_NEON;
649 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
650 auto tester = AvgPoolMicrokernelTester()
651 .mr(9)
652 .qr(8)
653 .iterations(3);
654 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
655 for (size_t kc = 1; kc < 4; kc++) {
656 tester
657 .kc(kc)
658 .kh(ks)
659 .kw(1)
660 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
661 tester
662 .kc(kc)
663 .kh(1)
664 .kw(ks)
665 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
666 }
667 }
668 }
669 }
670
671 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_lt_4_multipass_fulltile_with_x_stride) {
672 TEST_REQUIRES_ARM_NEON;
673 for (size_t ks : std::vector<size_t>{{25, 49}}) {
674 auto tester = AvgPoolMicrokernelTester()
675 .mr(9)
676 .qr(8)
677 .iterations(3);
678 for (size_t kh = 1; kh <= ks; kh++) {
679 for (size_t kw = 1; kw <= ks; kw++) {
680 if (kh * kw == ks) {
681 for (size_t kc = 1; kc < 4; kc++) {
682 tester
683 .kh(kh)
684 .kw(kw)
685 .kc(kc)
686 .x_stride(23)
687 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
688 }
689 }
690 }
691 }
692 }
693 }
694
695 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_gt_4_twopass_fulltile) {
696 TEST_REQUIRES_ARM_NEON;
697 auto tester = AvgPoolMicrokernelTester()
698 .mr(9)
699 .qr(8)
700 .iterations(3);
701 const size_t ks = tester.mr() + tester.qr();
702 for (size_t kh = 1; kh <= ks; kh++) {
703 for (size_t kw = 1; kw <= ks; kw++) {
704 if (kh * kw == ks) {
705 for (size_t kc = 5; kc < 8; kc++) {
706 tester
707 .kh(kh)
708 .kw(kw)
709 .kc(kc)
710 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
711 }
712 }
713 }
714 }
715 }
716
717 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_gt_4_twopass_subtile) {
718 TEST_REQUIRES_ARM_NEON;
719 auto tester = AvgPoolMicrokernelTester()
720 .mr(9)
721 .qr(8)
722 .iterations(3);
723 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
724 for (size_t kc = 5; kc < 8; kc++) {
725 tester
726 .kc(kc)
727 .kh(ks)
728 .kw(1)
729 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
730 tester
731 .kc(kc)
732 .kh(1)
733 .kw(ks)
734 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
735 }
736 }
737 }
738
739 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_gt_4_twopass_fulltile_with_x_stride) {
740 TEST_REQUIRES_ARM_NEON;
741 auto tester = AvgPoolMicrokernelTester()
742 .mr(9)
743 .qr(8)
744 .iterations(3);
745 const size_t ks = tester.mr() + tester.qr();
746 for (size_t kh = 1; kh <= ks; kh++) {
747 for (size_t kw = 1; kw <= ks; kw++) {
748 if (kh * kw == ks) {
749 for (size_t kc = 5; kc < 8; kc++) {
750 tester
751 .kh(kh)
752 .kw(kw)
753 .kc(kc)
754 .x_stride(23)
755 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
756 }
757 }
758 }
759 }
760 }
761
762 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_gt_4_multipass_fulltile) {
763 TEST_REQUIRES_ARM_NEON;
764 for (size_t ks : std::vector<size_t>{{25, 49}}) {
765 auto tester = AvgPoolMicrokernelTester()
766 .mr(9)
767 .qr(8)
768 .iterations(3);
769 for (size_t kh = 1; kh <= ks; kh++) {
770 for (size_t kw = 1; kw <= ks; kw++) {
771 if (kh * kw == ks) {
772 for (size_t kc = 5; kc < 8; kc++) {
773 tester
774 .kh(kh)
775 .kw(kw)
776 .kc(kc)
777 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
778 }
779 }
780 }
781 }
782 }
783 }
784
785 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_gt_4_multipass_subtile) {
786 TEST_REQUIRES_ARM_NEON;
787 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
788 auto tester = AvgPoolMicrokernelTester()
789 .mr(9)
790 .qr(8)
791 .iterations(3);
792 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
793 for (size_t kc = 5; kc < 8; kc++) {
794 tester
795 .kc(kc)
796 .kh(ks)
797 .kw(1)
798 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
799 tester
800 .kc(kc)
801 .kh(1)
802 .kw(ks)
803 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
804 }
805 }
806 }
807 }
808
809 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_gt_4_multipass_fulltile_with_x_stride) {
810 TEST_REQUIRES_ARM_NEON;
811 for (size_t ks : std::vector<size_t>{{25, 49}}) {
812 auto tester = AvgPoolMicrokernelTester()
813 .mr(9)
814 .qr(8)
815 .iterations(3);
816 for (size_t kh = 1; kh <= ks; kh++) {
817 for (size_t kw = 1; kw <= ks; kw++) {
818 if (kh * kw == ks) {
819 for (size_t kc = 5; kc < 8; kc++) {
820 tester
821 .kh(kh)
822 .kw(kw)
823 .kc(kc)
824 .x_stride(23)
825 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
826 }
827 }
828 }
829 }
830 }
831 }
832
833 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_div_4_with_qmax) {
834 TEST_REQUIRES_ARM_NEON;
835 for (size_t n = 1; n <= 5; n += 2) {
836 for (size_t kc = 4; kc < 64; kc += 12) {
837 AvgPoolMicrokernelTester()
838 .mr(9)
839 .qr(8)
840 .n(n)
841 .kh(5)
842 .kw(5)
843 .kc(kc)
844 .qmax(128)
845 .iterations(3)
846 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
847 }
848 }
849 }
850
851 TEST(F32_PAVGPOOL_MP9P8Q__NEON, kc_div_4_with_qmin) {
852 TEST_REQUIRES_ARM_NEON;
853 for (size_t n = 1; n <= 5; n += 2) {
854 for (size_t kc = 4; kc < 64; kc += 12) {
855 AvgPoolMicrokernelTester()
856 .mr(9)
857 .qr(8)
858 .n(n)
859 .kh(5)
860 .kw(5)
861 .kc(kc)
862 .qmin(128)
863 .iterations(3)
864 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
865 }
866 }
867 }
868
869 TEST(F32_PAVGPOOL_MP9P8Q__NEON, small_n) {
870 TEST_REQUIRES_ARM_NEON;
871 for (size_t n = 2; n < 5; n++) {
872 for (size_t ks : std::vector<size_t>{{5, 7}}) {
873 for (size_t kc = 8; kc < 25; kc += 5) {
874 AvgPoolMicrokernelTester()
875 .mr(9)
876 .qr(8)
877 .n(n)
878 .kh(ks)
879 .kw(ks)
880 .kc(kc)
881 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
882 }
883 }
884 }
885 }
886
887 TEST(F32_PAVGPOOL_MP9P8Q__NEON, small_n_with_x_stride) {
888 TEST_REQUIRES_ARM_NEON;
889 for (size_t n = 2; n < 5; n++) {
890 for (size_t ks : std::vector<size_t>{{5, 7}}) {
891 for (size_t kc = 8; kc < 25; kc += 5) {
892 AvgPoolMicrokernelTester()
893 .mr(9)
894 .qr(8)
895 .n(n)
896 .kh(ks)
897 .kw(ks)
898 .kc(kc)
899 .x_stride(29)
900 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
901 }
902 }
903 }
904 }
905
906 TEST(F32_PAVGPOOL_MP9P8Q__NEON, small_n_with_y_stride) {
907 TEST_REQUIRES_ARM_NEON;
908 for (size_t n = 2; n < 5; n++) {
909 for (size_t ks : std::vector<size_t>{{5, 7}}) {
910 for (size_t kc = 8; kc < 25; kc += 5) {
911 AvgPoolMicrokernelTester()
912 .mr(9)
913 .qr(8)
914 .n(n)
915 .kh(ks)
916 .kw(ks)
917 .kc(kc)
918 .y_stride(31)
919 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
920 }
921 }
922 }
923 }
924
925 TEST(F32_PAVGPOOL_MP9P8Q__NEON, small_n_with_s) {
926 TEST_REQUIRES_ARM_NEON;
927 for (size_t n = 2; n < 5; n++) {
928 for (size_t ks : std::vector<size_t>{{5, 7}}) {
929 for (size_t s = 2; s <= 5; s++) {
930 for (size_t kc = 8; kc < 25; kc += 5) {
931 AvgPoolMicrokernelTester()
932 .mr(9)
933 .qr(8)
934 .n(n)
935 .kh(ks)
936 .kw(ks)
937 .kc(kc)
938 .s(s)
939 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__neon);
940 }
941 }
942 }
943 }
944 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700945#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700946
947
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700948#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700949 TEST(F32_PAVGPOOL_UP9__SSE2, kc_eq_4_fulltile) {
950 TEST_REQUIRES_X86_SSE2;
951 auto tester = AvgPoolMicrokernelTester()
952 .mr(9)
953 .kc(4);
954 for (size_t kh = 1; kh <= tester.mr(); kh++) {
955 for (size_t kw = 1; kw <= tester.mr(); kw++) {
956 if (kh * kw == tester.mr()) {
957 tester
958 .kh(kh)
959 .kw(kw)
960 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
961 }
962 }
963 }
964 }
965
966 TEST(F32_PAVGPOOL_UP9__SSE2, kc_eq_4_subtile) {
967 TEST_REQUIRES_X86_SSE2;
968 auto tester = AvgPoolMicrokernelTester()
969 .mr(9)
970 .kc(4);
971 for (size_t ks = 2; ks < tester.mr(); ks++) {
972 for (size_t kh = 1; kh <= ks; kh++) {
973 for (size_t kw = 1; kw <= ks; kw++) {
974 if (kh * kw == ks) {
975 tester
976 .kh(kh)
977 .kw(kw)
978 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
979 }
980 }
981 }
982 }
983 }
984
985 TEST(F32_PAVGPOOL_UP9__SSE2, kc_div_4_fulltile) {
986 TEST_REQUIRES_X86_SSE2;
987 auto tester = AvgPoolMicrokernelTester()
988 .mr(9);
989 for (size_t kh = 1; kh <= tester.mr(); kh++) {
990 for (size_t kw = 1; kw <= tester.mr(); kw++) {
991 if (kh * kw == tester.mr()) {
992 for (size_t kc = 4; kc < 64; kc += 12) {
993 tester
994 .kh(kh)
995 .kw(kw)
996 .kc(kc)
997 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
998 }
999 }
1000 }
1001 }
1002 }
1003
1004 TEST(F32_PAVGPOOL_UP9__SSE2, kc_div_4_subtile) {
1005 TEST_REQUIRES_X86_SSE2;
1006 auto tester = AvgPoolMicrokernelTester()
1007 .mr(9)
1008 .iterations(3);
1009 for (size_t ks = 2; ks < tester.mr(); ks++) {
1010 for (size_t kh = 1; kh <= ks; kh++) {
1011 for (size_t kw = 1; kw <= ks; kw++) {
1012 if (kh * kw == ks) {
1013 for (size_t kc = 4; kc < 64; kc += 12) {
1014 tester
1015 .kh(kh)
1016 .kw(kw)
1017 .kc(kc)
1018 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1019 }
1020 }
1021 }
1022 }
1023 }
1024 }
1025
1026 TEST(F32_PAVGPOOL_UP9__SSE2, kc_div_4_fulltile_with_x_stride) {
1027 TEST_REQUIRES_X86_SSE2;
1028 auto tester = AvgPoolMicrokernelTester()
1029 .mr(9)
1030 .iterations(3);
1031 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1032 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1033 if (kh * kw == tester.mr()) {
1034 for (size_t kc = 4; kc < 64; kc += 12) {
1035 tester
1036 .kh(kh)
1037 .kw(kw)
1038 .kc(kc)
1039 .x_stride(131)
1040 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1041 }
1042 }
1043 }
1044 }
1045 }
1046
1047 TEST(F32_PAVGPOOL_UP9__SSE2, kc_lt_4_fulltile) {
1048 TEST_REQUIRES_X86_SSE2;
1049 auto tester = AvgPoolMicrokernelTester()
1050 .mr(9);
1051 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1052 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1053 if (kh * kw == tester.mr()) {
1054 for (size_t kc = 1; kc < 4; kc++) {
1055 tester
1056 .kh(kh)
1057 .kw(kw)
1058 .kc(kc)
1059 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1060 }
1061 }
1062 }
1063 }
1064 }
1065
1066 TEST(F32_PAVGPOOL_UP9__SSE2, kc_lt_4_subtile) {
1067 TEST_REQUIRES_X86_SSE2;
1068 auto tester = AvgPoolMicrokernelTester()
1069 .mr(9)
1070 .iterations(3);
1071 for (size_t ks = 2; ks < tester.mr(); ks++) {
1072 for (size_t kh = 1; kh <= ks; kh++) {
1073 for (size_t kw = 1; kw <= ks; kw++) {
1074 if (kh * kw == ks) {
1075 for (size_t kc = 1; kc < 4; kc++) {
1076 tester
1077 .kh(kh)
1078 .kw(kw)
1079 .kc(kc)
1080 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1081 }
1082 }
1083 }
1084 }
1085 }
1086 }
1087
1088 TEST(F32_PAVGPOOL_UP9__SSE2, kc_lt_4_fulltile_with_x_stride) {
1089 TEST_REQUIRES_X86_SSE2;
1090 auto tester = AvgPoolMicrokernelTester()
1091 .mr(9)
1092 .iterations(3);
1093 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1094 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1095 if (kh * kw == tester.mr()) {
1096 for (size_t kc = 1; kc < 4; kc++) {
1097 tester
1098 .kh(kh)
1099 .kw(kw)
1100 .kc(kc)
1101 .x_stride(23)
1102 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1103 }
1104 }
1105 }
1106 }
1107 }
1108
1109 TEST(F32_PAVGPOOL_UP9__SSE2, kc_gt_4_fulltile) {
1110 TEST_REQUIRES_X86_SSE2;
1111 auto tester = AvgPoolMicrokernelTester()
1112 .mr(9);
1113 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1114 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1115 if (kh * kw == tester.mr()) {
1116 for (size_t kc = 5; kc < 8; kc++) {
1117 tester
1118 .kh(kh)
1119 .kw(kw)
1120 .kc(kc)
1121 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1122 }
1123 }
1124 }
1125 }
1126 }
1127
1128 TEST(F32_PAVGPOOL_UP9__SSE2, kc_gt_4_subtile) {
1129 TEST_REQUIRES_X86_SSE2;
1130 auto tester = AvgPoolMicrokernelTester()
1131 .mr(9)
1132 .iterations(3);
1133 for (size_t ks = 2; ks < tester.mr(); ks++) {
1134 for (size_t kh = 1; kh <= ks; kh++) {
1135 for (size_t kw = 1; kw <= ks; kw++) {
1136 if (kh * kw == ks) {
1137 for (size_t kc = 5; kc < 8; kc++) {
1138 tester
1139 .kh(kh)
1140 .kw(kw)
1141 .kc(kc)
1142 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1143 }
1144 }
1145 }
1146 }
1147 }
1148 }
1149
1150 TEST(F32_PAVGPOOL_UP9__SSE2, kc_gt_4_fulltile_with_x_stride) {
1151 TEST_REQUIRES_X86_SSE2;
1152 auto tester = AvgPoolMicrokernelTester()
1153 .mr(9)
1154 .iterations(3);
1155 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1156 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1157 if (kh * kw == tester.mr()) {
1158 for (size_t kc = 5; kc < 8; kc++) {
1159 tester
1160 .kh(kh)
1161 .kw(kw)
1162 .kc(kc)
1163 .x_stride(23)
1164 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1165 }
1166 }
1167 }
1168 }
1169 }
1170
1171 TEST(F32_PAVGPOOL_UP9__SSE2, kc_div_4_with_qmax) {
1172 TEST_REQUIRES_X86_SSE2;
1173 for (size_t n = 1; n <= 5; n += 2) {
1174 for (size_t kc = 4; kc < 64; kc += 12) {
1175 AvgPoolMicrokernelTester()
1176 .mr(9)
1177 .n(n)
1178 .kh(3)
1179 .kw(3)
1180 .kc(kc)
1181 .qmax(128)
1182 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1183 }
1184 }
1185 }
1186
1187 TEST(F32_PAVGPOOL_UP9__SSE2, kc_div_4_with_qmin) {
1188 TEST_REQUIRES_X86_SSE2;
1189 for (size_t n = 1; n <= 5; n += 2) {
1190 for (size_t kc = 4; kc < 64; kc += 12) {
1191 AvgPoolMicrokernelTester()
1192 .mr(9)
1193 .n(n)
1194 .kh(3)
1195 .kw(3)
1196 .kc(kc)
1197 .qmin(128)
1198 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1199 }
1200 }
1201 }
1202
1203 TEST(F32_PAVGPOOL_UP9__SSE2, small_n) {
1204 TEST_REQUIRES_X86_SSE2;
1205 for (size_t n = 2; n < 5; n++) {
1206 for (size_t ks : std::vector<size_t>{{2, 3}}) {
1207 for (size_t kc = 8; kc < 25; kc += 5) {
1208 AvgPoolMicrokernelTester()
1209 .mr(9)
1210 .n(n)
1211 .kh(ks)
1212 .kw(ks)
1213 .kc(kc)
1214 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1215 }
1216 }
1217 }
1218 }
1219
1220 TEST(F32_PAVGPOOL_UP9__SSE2, small_n_with_x_stride) {
1221 TEST_REQUIRES_X86_SSE2;
1222 for (size_t n = 2; n < 5; n++) {
1223 for (size_t ks : std::vector<size_t>{{2, 3}}) {
1224 for (size_t kc = 8; kc < 25; kc += 5) {
1225 AvgPoolMicrokernelTester()
1226 .mr(9)
1227 .n(n)
1228 .kh(ks)
1229 .kw(ks)
1230 .kc(kc)
1231 .x_stride(29)
1232 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1233 }
1234 }
1235 }
1236 }
1237
1238 TEST(F32_PAVGPOOL_UP9__SSE2, small_n_with_y_stride) {
1239 TEST_REQUIRES_X86_SSE2;
1240 for (size_t n = 2; n < 5; n++) {
1241 for (size_t ks : std::vector<size_t>{{2, 3}}) {
1242 for (size_t kc = 8; kc < 25; kc += 5) {
1243 AvgPoolMicrokernelTester()
1244 .mr(9)
1245 .n(n)
1246 .kh(ks)
1247 .kw(ks)
1248 .kc(kc)
1249 .y_stride(31)
1250 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1251 }
1252 }
1253 }
1254 }
1255
1256 TEST(F32_PAVGPOOL_UP9__SSE2, small_n_with_s) {
1257 TEST_REQUIRES_X86_SSE2;
1258 for (size_t n = 2; n < 5; n++) {
1259 for (size_t ks : std::vector<size_t>{{2, 3}}) {
1260 for (size_t kc = 8; kc < 25; kc += 5) {
1261 for (size_t s = 2; s <= ks; s++) {
1262 AvgPoolMicrokernelTester()
1263 .mr(9)
1264 .n(n)
1265 .kh(ks)
1266 .kw(ks)
1267 .kc(kc)
1268 .s(s)
1269 .Test(xnn_f32_pavgpool_ukernel_up9__sse);
1270 }
1271 }
1272 }
1273 }
1274 }
1275
1276 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_eq_4_twopass_fulltile) {
1277 TEST_REQUIRES_X86_SSE2;
1278 auto tester = AvgPoolMicrokernelTester()
1279 .mr(9)
1280 .qr(8)
1281 .kc(4);
1282 const size_t ks = tester.mr() + tester.qr();
1283 for (size_t kh = 1; kh <= ks; kh++) {
1284 for (size_t kw = 1; kw <= ks; kw++) {
1285 if (kh * kw == ks) {
1286 tester
1287 .kh(kh)
1288 .kw(kw)
1289 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1290 }
1291 }
1292 }
1293 }
1294
1295 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_eq_4_twopass_subtile) {
1296 TEST_REQUIRES_X86_SSE2;
1297 auto tester = AvgPoolMicrokernelTester()
1298 .mr(9)
1299 .qr(8)
1300 .kc(4);
1301 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1302 tester
1303 .kh(ks)
1304 .kw(1)
1305 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1306 tester
1307 .kh(1)
1308 .kw(ks)
1309 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1310 }
1311 }
1312
1313 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_eq_4_multipass_fulltile) {
1314 TEST_REQUIRES_X86_SSE2;
1315 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1316 auto tester = AvgPoolMicrokernelTester()
1317 .mr(9)
1318 .qr(8)
1319 .kc(4);
1320 for (size_t kh = 1; kh <= ks; kh++) {
1321 for (size_t kw = 1; kw <= ks; kw++) {
1322 if (kh * kw == ks) {
1323 tester
1324 .kh(kh)
1325 .kw(kw)
1326 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1327 }
1328 }
1329 }
1330 }
1331 }
1332
1333 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_eq_4_multipass_subtile) {
1334 TEST_REQUIRES_X86_SSE2;
1335 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1336 auto tester = AvgPoolMicrokernelTester()
1337 .mr(9)
1338 .qr(8)
1339 .kc(4);
1340 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1341 tester
1342 .kh(ks)
1343 .kw(1)
1344 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1345 tester
1346 .kh(1)
1347 .kw(ks)
1348 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1349 }
1350 }
1351 }
1352
1353 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_div_4_twopass_fulltile) {
1354 TEST_REQUIRES_X86_SSE2;
1355 auto tester = AvgPoolMicrokernelTester()
1356 .mr(9)
1357 .qr(8)
1358 .iterations(3);
1359 const size_t ks = 17;
1360 for (size_t kc = 4; kc < 64; kc += 12) {
1361 tester
1362 .kc(kc)
1363 .kh(ks)
1364 .kw(1)
1365 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1366 tester
1367 .kc(kc)
1368 .kh(1)
1369 .kw(ks)
1370 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1371 }
1372 }
1373
1374 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_div_4_twopass_subtile) {
1375 TEST_REQUIRES_X86_SSE2;
1376 auto tester = AvgPoolMicrokernelTester()
1377 .mr(9)
1378 .qr(8)
1379 .iterations(3);
1380 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1381 for (size_t kc = 4; kc < 64; kc += 12) {
1382 tester
1383 .kc(kc)
1384 .kh(ks)
1385 .kw(1)
1386 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1387 tester
1388 .kc(kc)
1389 .kh(1)
1390 .kw(ks)
1391 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1392 }
1393 }
1394 }
1395
1396 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_div_4_twopass_fulltile_with_x_stride) {
1397 TEST_REQUIRES_X86_SSE2;
1398 auto tester = AvgPoolMicrokernelTester()
1399 .mr(9)
1400 .qr(8)
1401 .iterations(3);
1402 const size_t ks = tester.mr() + tester.qr();
1403 for (size_t kh = 1; kh <= ks; kh++) {
1404 for (size_t kw = 1; kw <= ks; kw++) {
1405 if (kh * kw == ks) {
1406 for (size_t kc = 4; kc < 64; kc += 12) {
1407 tester
1408 .kh(kh)
1409 .kw(kw)
1410 .kc(kc)
1411 .x_stride(131)
1412 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1413 }
1414 }
1415 }
1416 }
1417 }
1418
1419 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_div_4_multipass_fulltile) {
1420 TEST_REQUIRES_X86_SSE2;
1421 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1422 auto tester = AvgPoolMicrokernelTester()
1423 .mr(9)
1424 .qr(8)
1425 .iterations(3);
1426 for (size_t kh = 1; kh <= ks; kh++) {
1427 for (size_t kw = 1; kw <= ks; kw++) {
1428 if (kh * kw == ks) {
1429 for (size_t kc = 4; kc < 64; kc += 12) {
1430 tester
1431 .kh(kh)
1432 .kw(kw)
1433 .kc(kc)
1434 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1435 }
1436 }
1437 }
1438 }
1439 }
1440 }
1441
1442 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_div_4_multipass_subtile) {
1443 TEST_REQUIRES_X86_SSE2;
1444 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1445 auto tester = AvgPoolMicrokernelTester()
1446 .mr(9)
1447 .qr(8)
1448 .iterations(3);
1449 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1450 for (size_t kc = 4; kc < 64; kc += 12) {
1451 tester
1452 .kc(kc)
1453 .kh(ks)
1454 .kw(1)
1455 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1456 tester
1457 .kc(kc)
1458 .kh(1)
1459 .kw(ks)
1460 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1461 }
1462 }
1463 }
1464 }
1465
1466 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_div_4_multipass_fulltile_with_x_stride) {
1467 TEST_REQUIRES_X86_SSE2;
1468 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1469 auto tester = AvgPoolMicrokernelTester()
1470 .mr(9)
1471 .qr(8)
1472 .iterations(3);
1473 for (size_t kh = 1; kh <= ks; kh++) {
1474 for (size_t kw = 1; kw <= ks; kw++) {
1475 if (kh * kw == ks) {
1476 for (size_t kc = 4; kc < 64; kc += 12) {
1477 tester
1478 .kh(kh)
1479 .kw(kw)
1480 .kc(kc)
1481 .x_stride(131)
1482 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1483 }
1484 }
1485 }
1486 }
1487 }
1488 }
1489
1490 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_lt_4_twopass_fulltile) {
1491 TEST_REQUIRES_X86_SSE2;
1492 auto tester = AvgPoolMicrokernelTester()
1493 .mr(9)
1494 .qr(8)
1495 .iterations(3);
1496 const size_t ks = tester.mr() + tester.qr();
1497 for (size_t kh = 1; kh <= ks; kh++) {
1498 for (size_t kw = 1; kw <= ks; kw++) {
1499 if (kh * kw == ks) {
1500 for (size_t kc = 1; kc < 4; kc++) {
1501 tester
1502 .kh(kh)
1503 .kw(kw)
1504 .kc(kc)
1505 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1506 }
1507 }
1508 }
1509 }
1510 }
1511
1512 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_lt_4_twopass_subtile) {
1513 TEST_REQUIRES_X86_SSE2;
1514 auto tester = AvgPoolMicrokernelTester()
1515 .mr(9)
1516 .qr(8)
1517 .iterations(3);
1518 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1519 for (size_t kc = 1; kc < 4; kc++) {
1520 tester
1521 .kc(kc)
1522 .kh(ks)
1523 .kw(1)
1524 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1525 tester
1526 .kc(kc)
1527 .kh(1)
1528 .kw(ks)
1529 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1530 }
1531 }
1532 }
1533
1534 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_lt_4_twopass_fulltile_with_x_stride) {
1535 TEST_REQUIRES_X86_SSE2;
1536 auto tester = AvgPoolMicrokernelTester()
1537 .mr(9)
1538 .qr(8)
1539 .iterations(3);
1540 const size_t ks = tester.mr() + tester.qr();
1541 for (size_t kh = 1; kh <= ks; kh++) {
1542 for (size_t kw = 1; kw <= ks; kw++) {
1543 if (kh * kw == ks) {
1544 for (size_t kc = 1; kc < 4; kc++) {
1545 tester
1546 .kh(kh)
1547 .kw(kw)
1548 .kc(kc)
1549 .x_stride(23)
1550 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1551 }
1552 }
1553 }
1554 }
1555 }
1556
1557 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_lt_4_multipass_fulltile) {
1558 TEST_REQUIRES_X86_SSE2;
1559 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1560 auto tester = AvgPoolMicrokernelTester()
1561 .mr(9)
1562 .qr(8)
1563 .iterations(3);
1564 for (size_t kh = 1; kh <= ks; kh++) {
1565 for (size_t kw = 1; kw <= ks; kw++) {
1566 if (kh * kw == ks) {
1567 for (size_t kc = 1; kc < 4; kc++) {
1568 tester
1569 .kh(kh)
1570 .kw(kw)
1571 .kc(kc)
1572 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1573 }
1574 }
1575 }
1576 }
1577 }
1578 }
1579
1580 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_lt_4_multipass_subtile) {
1581 TEST_REQUIRES_X86_SSE2;
1582 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1583 auto tester = AvgPoolMicrokernelTester()
1584 .mr(9)
1585 .qr(8)
1586 .iterations(3);
1587 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1588 for (size_t kc = 1; kc < 4; kc++) {
1589 tester
1590 .kc(kc)
1591 .kh(ks)
1592 .kw(1)
1593 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1594 tester
1595 .kc(kc)
1596 .kh(1)
1597 .kw(ks)
1598 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1599 }
1600 }
1601 }
1602 }
1603
1604 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_lt_4_multipass_fulltile_with_x_stride) {
1605 TEST_REQUIRES_X86_SSE2;
1606 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1607 auto tester = AvgPoolMicrokernelTester()
1608 .mr(9)
1609 .qr(8)
1610 .iterations(3);
1611 for (size_t kh = 1; kh <= ks; kh++) {
1612 for (size_t kw = 1; kw <= ks; kw++) {
1613 if (kh * kw == ks) {
1614 for (size_t kc = 1; kc < 4; kc++) {
1615 tester
1616 .kh(kh)
1617 .kw(kw)
1618 .kc(kc)
1619 .x_stride(23)
1620 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1621 }
1622 }
1623 }
1624 }
1625 }
1626 }
1627
1628 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_gt_4_twopass_fulltile) {
1629 TEST_REQUIRES_X86_SSE2;
1630 auto tester = AvgPoolMicrokernelTester()
1631 .mr(9)
1632 .qr(8)
1633 .iterations(3);
1634 const size_t ks = tester.mr() + tester.qr();
1635 for (size_t kh = 1; kh <= ks; kh++) {
1636 for (size_t kw = 1; kw <= ks; kw++) {
1637 if (kh * kw == ks) {
1638 for (size_t kc = 5; kc < 8; kc++) {
1639 tester
1640 .kh(kh)
1641 .kw(kw)
1642 .kc(kc)
1643 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1644 }
1645 }
1646 }
1647 }
1648 }
1649
1650 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_gt_4_twopass_subtile) {
1651 TEST_REQUIRES_X86_SSE2;
1652 auto tester = AvgPoolMicrokernelTester()
1653 .mr(9)
1654 .qr(8)
1655 .iterations(3);
1656 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1657 for (size_t kc = 5; kc < 8; kc++) {
1658 tester
1659 .kc(kc)
1660 .kh(ks)
1661 .kw(1)
1662 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1663 tester
1664 .kc(kc)
1665 .kh(1)
1666 .kw(ks)
1667 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1668 }
1669 }
1670 }
1671
1672 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_gt_4_twopass_fulltile_with_x_stride) {
1673 TEST_REQUIRES_X86_SSE2;
1674 auto tester = AvgPoolMicrokernelTester()
1675 .mr(9)
1676 .qr(8)
1677 .iterations(3);
1678 const size_t ks = tester.mr() + tester.qr();
1679 for (size_t kh = 1; kh <= ks; kh++) {
1680 for (size_t kw = 1; kw <= ks; kw++) {
1681 if (kh * kw == ks) {
1682 for (size_t kc = 5; kc < 8; kc++) {
1683 tester
1684 .kh(kh)
1685 .kw(kw)
1686 .kc(kc)
1687 .x_stride(23)
1688 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1689 }
1690 }
1691 }
1692 }
1693 }
1694
1695 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_gt_4_multipass_fulltile) {
1696 TEST_REQUIRES_X86_SSE2;
1697 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1698 auto tester = AvgPoolMicrokernelTester()
1699 .mr(9)
1700 .qr(8)
1701 .iterations(3);
1702 for (size_t kh = 1; kh <= ks; kh++) {
1703 for (size_t kw = 1; kw <= ks; kw++) {
1704 if (kh * kw == ks) {
1705 for (size_t kc = 5; kc < 8; kc++) {
1706 tester
1707 .kh(kh)
1708 .kw(kw)
1709 .kc(kc)
1710 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1711 }
1712 }
1713 }
1714 }
1715 }
1716 }
1717
1718 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_gt_4_multipass_subtile) {
1719 TEST_REQUIRES_X86_SSE2;
1720 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1721 auto tester = AvgPoolMicrokernelTester()
1722 .mr(9)
1723 .qr(8)
1724 .iterations(3);
1725 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1726 for (size_t kc = 5; kc < 8; kc++) {
1727 tester
1728 .kc(kc)
1729 .kh(ks)
1730 .kw(1)
1731 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1732 tester
1733 .kc(kc)
1734 .kh(1)
1735 .kw(ks)
1736 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1737 }
1738 }
1739 }
1740 }
1741
1742 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_gt_4_multipass_fulltile_with_x_stride) {
1743 TEST_REQUIRES_X86_SSE2;
1744 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1745 auto tester = AvgPoolMicrokernelTester()
1746 .mr(9)
1747 .qr(8)
1748 .iterations(3);
1749 for (size_t kh = 1; kh <= ks; kh++) {
1750 for (size_t kw = 1; kw <= ks; kw++) {
1751 if (kh * kw == ks) {
1752 for (size_t kc = 5; kc < 8; kc++) {
1753 tester
1754 .kh(kh)
1755 .kw(kw)
1756 .kc(kc)
1757 .x_stride(23)
1758 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1759 }
1760 }
1761 }
1762 }
1763 }
1764 }
1765
1766 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_div_4_with_qmax) {
1767 TEST_REQUIRES_X86_SSE2;
1768 for (size_t n = 1; n <= 5; n += 2) {
1769 for (size_t kc = 4; kc < 64; kc += 12) {
1770 AvgPoolMicrokernelTester()
1771 .mr(9)
1772 .qr(8)
1773 .n(n)
1774 .kh(5)
1775 .kw(5)
1776 .kc(kc)
1777 .qmax(128)
1778 .iterations(3)
1779 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1780 }
1781 }
1782 }
1783
1784 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, kc_div_4_with_qmin) {
1785 TEST_REQUIRES_X86_SSE2;
1786 for (size_t n = 1; n <= 5; n += 2) {
1787 for (size_t kc = 4; kc < 64; kc += 12) {
1788 AvgPoolMicrokernelTester()
1789 .mr(9)
1790 .qr(8)
1791 .n(n)
1792 .kh(5)
1793 .kw(5)
1794 .kc(kc)
1795 .qmin(128)
1796 .iterations(3)
1797 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1798 }
1799 }
1800 }
1801
1802 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, small_n) {
1803 TEST_REQUIRES_X86_SSE2;
1804 for (size_t n = 2; n < 5; n++) {
1805 for (size_t ks : std::vector<size_t>{{5, 7}}) {
1806 for (size_t kc = 8; kc < 25; kc += 5) {
1807 AvgPoolMicrokernelTester()
1808 .mr(9)
1809 .qr(8)
1810 .n(n)
1811 .kh(ks)
1812 .kw(ks)
1813 .kc(kc)
1814 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1815 }
1816 }
1817 }
1818 }
1819
1820 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, small_n_with_x_stride) {
1821 TEST_REQUIRES_X86_SSE2;
1822 for (size_t n = 2; n < 5; n++) {
1823 for (size_t ks : std::vector<size_t>{{5, 7}}) {
1824 for (size_t kc = 8; kc < 25; kc += 5) {
1825 AvgPoolMicrokernelTester()
1826 .mr(9)
1827 .qr(8)
1828 .n(n)
1829 .kh(ks)
1830 .kw(ks)
1831 .kc(kc)
1832 .x_stride(29)
1833 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1834 }
1835 }
1836 }
1837 }
1838
1839 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, small_n_with_y_stride) {
1840 TEST_REQUIRES_X86_SSE2;
1841 for (size_t n = 2; n < 5; n++) {
1842 for (size_t ks : std::vector<size_t>{{5, 7}}) {
1843 for (size_t kc = 8; kc < 25; kc += 5) {
1844 AvgPoolMicrokernelTester()
1845 .mr(9)
1846 .qr(8)
1847 .n(n)
1848 .kh(ks)
1849 .kw(ks)
1850 .kc(kc)
1851 .y_stride(31)
1852 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1853 }
1854 }
1855 }
1856 }
1857
1858 TEST(F32_PAVGPOOL_MP9P8Q__SSE2, small_n_with_s) {
1859 TEST_REQUIRES_X86_SSE2;
1860 for (size_t n = 2; n < 5; n++) {
1861 for (size_t ks : std::vector<size_t>{{5, 7}}) {
1862 for (size_t s = 2; s <= 5; s++) {
1863 for (size_t kc = 8; kc < 25; kc += 5) {
1864 AvgPoolMicrokernelTester()
1865 .mr(9)
1866 .qr(8)
1867 .n(n)
1868 .kh(ks)
1869 .kw(ks)
1870 .kc(kc)
1871 .s(s)
1872 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__sse);
1873 }
1874 }
1875 }
1876 }
1877 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001878#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001879
1880
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001881#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001882 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_eq_4_fulltile) {
1883 TEST_REQUIRES_PSIMD;
1884 auto tester = AvgPoolMicrokernelTester()
1885 .mr(9)
1886 .kc(4);
1887 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1888 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1889 if (kh * kw == tester.mr()) {
1890 tester
1891 .kh(kh)
1892 .kw(kw)
1893 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
1894 }
1895 }
1896 }
1897 }
1898
1899 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_eq_4_subtile) {
1900 TEST_REQUIRES_PSIMD;
1901 auto tester = AvgPoolMicrokernelTester()
1902 .mr(9)
1903 .kc(4);
1904 for (size_t ks = 2; ks < tester.mr(); ks++) {
1905 for (size_t kh = 1; kh <= ks; kh++) {
1906 for (size_t kw = 1; kw <= ks; kw++) {
1907 if (kh * kw == ks) {
1908 tester
1909 .kh(kh)
1910 .kw(kw)
1911 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
1912 }
1913 }
1914 }
1915 }
1916 }
1917
1918 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_div_4_fulltile) {
1919 TEST_REQUIRES_PSIMD;
1920 auto tester = AvgPoolMicrokernelTester()
1921 .mr(9);
1922 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1923 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1924 if (kh * kw == tester.mr()) {
1925 for (size_t kc = 4; kc < 64; kc += 12) {
1926 tester
1927 .kh(kh)
1928 .kw(kw)
1929 .kc(kc)
1930 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
1931 }
1932 }
1933 }
1934 }
1935 }
1936
1937 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_div_4_subtile) {
1938 TEST_REQUIRES_PSIMD;
1939 auto tester = AvgPoolMicrokernelTester()
1940 .mr(9)
1941 .iterations(3);
1942 for (size_t ks = 2; ks < tester.mr(); ks++) {
1943 for (size_t kh = 1; kh <= ks; kh++) {
1944 for (size_t kw = 1; kw <= ks; kw++) {
1945 if (kh * kw == ks) {
1946 for (size_t kc = 4; kc < 64; kc += 12) {
1947 tester
1948 .kh(kh)
1949 .kw(kw)
1950 .kc(kc)
1951 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
1952 }
1953 }
1954 }
1955 }
1956 }
1957 }
1958
1959 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_div_4_fulltile_with_x_stride) {
1960 TEST_REQUIRES_PSIMD;
1961 auto tester = AvgPoolMicrokernelTester()
1962 .mr(9)
1963 .iterations(3);
1964 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1965 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1966 if (kh * kw == tester.mr()) {
1967 for (size_t kc = 4; kc < 64; kc += 12) {
1968 tester
1969 .kh(kh)
1970 .kw(kw)
1971 .kc(kc)
1972 .x_stride(131)
1973 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
1974 }
1975 }
1976 }
1977 }
1978 }
1979
1980 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_lt_4_fulltile) {
1981 TEST_REQUIRES_PSIMD;
1982 auto tester = AvgPoolMicrokernelTester()
1983 .mr(9);
1984 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1985 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1986 if (kh * kw == tester.mr()) {
1987 for (size_t kc = 1; kc < 4; kc++) {
1988 tester
1989 .kh(kh)
1990 .kw(kw)
1991 .kc(kc)
1992 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
1993 }
1994 }
1995 }
1996 }
1997 }
1998
1999 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_lt_4_subtile) {
2000 TEST_REQUIRES_PSIMD;
2001 auto tester = AvgPoolMicrokernelTester()
2002 .mr(9)
2003 .iterations(3);
2004 for (size_t ks = 2; ks < tester.mr(); ks++) {
2005 for (size_t kh = 1; kh <= ks; kh++) {
2006 for (size_t kw = 1; kw <= ks; kw++) {
2007 if (kh * kw == ks) {
2008 for (size_t kc = 1; kc < 4; kc++) {
2009 tester
2010 .kh(kh)
2011 .kw(kw)
2012 .kc(kc)
2013 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2014 }
2015 }
2016 }
2017 }
2018 }
2019 }
2020
2021 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_lt_4_fulltile_with_x_stride) {
2022 TEST_REQUIRES_PSIMD;
2023 auto tester = AvgPoolMicrokernelTester()
2024 .mr(9)
2025 .iterations(3);
2026 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2027 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2028 if (kh * kw == tester.mr()) {
2029 for (size_t kc = 1; kc < 4; kc++) {
2030 tester
2031 .kh(kh)
2032 .kw(kw)
2033 .kc(kc)
2034 .x_stride(23)
2035 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2036 }
2037 }
2038 }
2039 }
2040 }
2041
2042 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_gt_4_fulltile) {
2043 TEST_REQUIRES_PSIMD;
2044 auto tester = AvgPoolMicrokernelTester()
2045 .mr(9);
2046 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2047 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2048 if (kh * kw == tester.mr()) {
2049 for (size_t kc = 5; kc < 8; kc++) {
2050 tester
2051 .kh(kh)
2052 .kw(kw)
2053 .kc(kc)
2054 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2055 }
2056 }
2057 }
2058 }
2059 }
2060
2061 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_gt_4_subtile) {
2062 TEST_REQUIRES_PSIMD;
2063 auto tester = AvgPoolMicrokernelTester()
2064 .mr(9)
2065 .iterations(3);
2066 for (size_t ks = 2; ks < tester.mr(); ks++) {
2067 for (size_t kh = 1; kh <= ks; kh++) {
2068 for (size_t kw = 1; kw <= ks; kw++) {
2069 if (kh * kw == ks) {
2070 for (size_t kc = 5; kc < 8; kc++) {
2071 tester
2072 .kh(kh)
2073 .kw(kw)
2074 .kc(kc)
2075 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2076 }
2077 }
2078 }
2079 }
2080 }
2081 }
2082
2083 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_gt_4_fulltile_with_x_stride) {
2084 TEST_REQUIRES_PSIMD;
2085 auto tester = AvgPoolMicrokernelTester()
2086 .mr(9)
2087 .iterations(3);
2088 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2089 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2090 if (kh * kw == tester.mr()) {
2091 for (size_t kc = 5; kc < 8; kc++) {
2092 tester
2093 .kh(kh)
2094 .kw(kw)
2095 .kc(kc)
2096 .x_stride(23)
2097 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2098 }
2099 }
2100 }
2101 }
2102 }
2103
2104 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_div_4_with_qmax) {
2105 TEST_REQUIRES_PSIMD;
2106 for (size_t n = 1; n <= 5; n += 2) {
2107 for (size_t kc = 4; kc < 64; kc += 12) {
2108 AvgPoolMicrokernelTester()
2109 .mr(9)
2110 .n(n)
2111 .kh(3)
2112 .kw(3)
2113 .kc(kc)
2114 .qmax(128)
2115 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2116 }
2117 }
2118 }
2119
2120 TEST(F32_PAVGPOOL_UP9__PSIMD, kc_div_4_with_qmin) {
2121 TEST_REQUIRES_PSIMD;
2122 for (size_t n = 1; n <= 5; n += 2) {
2123 for (size_t kc = 4; kc < 64; kc += 12) {
2124 AvgPoolMicrokernelTester()
2125 .mr(9)
2126 .n(n)
2127 .kh(3)
2128 .kw(3)
2129 .kc(kc)
2130 .qmin(128)
2131 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2132 }
2133 }
2134 }
2135
2136 TEST(F32_PAVGPOOL_UP9__PSIMD, small_n) {
2137 TEST_REQUIRES_PSIMD;
2138 for (size_t n = 2; n < 5; n++) {
2139 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2140 for (size_t kc = 8; kc < 25; kc += 5) {
2141 AvgPoolMicrokernelTester()
2142 .mr(9)
2143 .n(n)
2144 .kh(ks)
2145 .kw(ks)
2146 .kc(kc)
2147 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2148 }
2149 }
2150 }
2151 }
2152
2153 TEST(F32_PAVGPOOL_UP9__PSIMD, small_n_with_x_stride) {
2154 TEST_REQUIRES_PSIMD;
2155 for (size_t n = 2; n < 5; n++) {
2156 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2157 for (size_t kc = 8; kc < 25; kc += 5) {
2158 AvgPoolMicrokernelTester()
2159 .mr(9)
2160 .n(n)
2161 .kh(ks)
2162 .kw(ks)
2163 .kc(kc)
2164 .x_stride(29)
2165 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2166 }
2167 }
2168 }
2169 }
2170
2171 TEST(F32_PAVGPOOL_UP9__PSIMD, small_n_with_y_stride) {
2172 TEST_REQUIRES_PSIMD;
2173 for (size_t n = 2; n < 5; n++) {
2174 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2175 for (size_t kc = 8; kc < 25; kc += 5) {
2176 AvgPoolMicrokernelTester()
2177 .mr(9)
2178 .n(n)
2179 .kh(ks)
2180 .kw(ks)
2181 .kc(kc)
2182 .y_stride(31)
2183 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2184 }
2185 }
2186 }
2187 }
2188
2189 TEST(F32_PAVGPOOL_UP9__PSIMD, small_n_with_s) {
2190 TEST_REQUIRES_PSIMD;
2191 for (size_t n = 2; n < 5; n++) {
2192 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2193 for (size_t kc = 8; kc < 25; kc += 5) {
2194 for (size_t s = 2; s <= ks; s++) {
2195 AvgPoolMicrokernelTester()
2196 .mr(9)
2197 .n(n)
2198 .kh(ks)
2199 .kw(ks)
2200 .kc(kc)
2201 .s(s)
2202 .Test(xnn_f32_pavgpool_ukernel_up9__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2203 }
2204 }
2205 }
2206 }
2207 }
2208
2209 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_eq_4_twopass_fulltile) {
2210 TEST_REQUIRES_PSIMD;
2211 auto tester = AvgPoolMicrokernelTester()
2212 .mr(9)
2213 .qr(8)
2214 .kc(4);
2215 const size_t ks = tester.mr() + tester.qr();
2216 for (size_t kh = 1; kh <= ks; kh++) {
2217 for (size_t kw = 1; kw <= ks; kw++) {
2218 if (kh * kw == ks) {
2219 tester
2220 .kh(kh)
2221 .kw(kw)
2222 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2223 }
2224 }
2225 }
2226 }
2227
2228 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_eq_4_twopass_subtile) {
2229 TEST_REQUIRES_PSIMD;
2230 auto tester = AvgPoolMicrokernelTester()
2231 .mr(9)
2232 .qr(8)
2233 .kc(4);
2234 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
2235 tester
2236 .kh(ks)
2237 .kw(1)
2238 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2239 tester
2240 .kh(1)
2241 .kw(ks)
2242 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2243 }
2244 }
2245
2246 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_eq_4_multipass_fulltile) {
2247 TEST_REQUIRES_PSIMD;
2248 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2249 auto tester = AvgPoolMicrokernelTester()
2250 .mr(9)
2251 .qr(8)
2252 .kc(4);
2253 for (size_t kh = 1; kh <= ks; kh++) {
2254 for (size_t kw = 1; kw <= ks; kw++) {
2255 if (kh * kw == ks) {
2256 tester
2257 .kh(kh)
2258 .kw(kw)
2259 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2260 }
2261 }
2262 }
2263 }
2264 }
2265
2266 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_eq_4_multipass_subtile) {
2267 TEST_REQUIRES_PSIMD;
2268 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
2269 auto tester = AvgPoolMicrokernelTester()
2270 .mr(9)
2271 .qr(8)
2272 .kc(4);
2273 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
2274 tester
2275 .kh(ks)
2276 .kw(1)
2277 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2278 tester
2279 .kh(1)
2280 .kw(ks)
2281 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2282 }
2283 }
2284 }
2285
2286 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_div_4_twopass_fulltile) {
2287 TEST_REQUIRES_PSIMD;
2288 auto tester = AvgPoolMicrokernelTester()
2289 .mr(9)
2290 .qr(8)
2291 .iterations(3);
2292 const size_t ks = 17;
2293 for (size_t kc = 4; kc < 64; kc += 12) {
2294 tester
2295 .kc(kc)
2296 .kh(ks)
2297 .kw(1)
2298 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2299 tester
2300 .kc(kc)
2301 .kh(1)
2302 .kw(ks)
2303 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2304 }
2305 }
2306
2307 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_div_4_twopass_subtile) {
2308 TEST_REQUIRES_PSIMD;
2309 auto tester = AvgPoolMicrokernelTester()
2310 .mr(9)
2311 .qr(8)
2312 .iterations(3);
2313 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
2314 for (size_t kc = 4; kc < 64; kc += 12) {
2315 tester
2316 .kc(kc)
2317 .kh(ks)
2318 .kw(1)
2319 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2320 tester
2321 .kc(kc)
2322 .kh(1)
2323 .kw(ks)
2324 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2325 }
2326 }
2327 }
2328
2329 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_div_4_twopass_fulltile_with_x_stride) {
2330 TEST_REQUIRES_PSIMD;
2331 auto tester = AvgPoolMicrokernelTester()
2332 .mr(9)
2333 .qr(8)
2334 .iterations(3);
2335 const size_t ks = tester.mr() + tester.qr();
2336 for (size_t kh = 1; kh <= ks; kh++) {
2337 for (size_t kw = 1; kw <= ks; kw++) {
2338 if (kh * kw == ks) {
2339 for (size_t kc = 4; kc < 64; kc += 12) {
2340 tester
2341 .kh(kh)
2342 .kw(kw)
2343 .kc(kc)
2344 .x_stride(131)
2345 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2346 }
2347 }
2348 }
2349 }
2350 }
2351
2352 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_div_4_multipass_fulltile) {
2353 TEST_REQUIRES_PSIMD;
2354 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2355 auto tester = AvgPoolMicrokernelTester()
2356 .mr(9)
2357 .qr(8)
2358 .iterations(3);
2359 for (size_t kh = 1; kh <= ks; kh++) {
2360 for (size_t kw = 1; kw <= ks; kw++) {
2361 if (kh * kw == ks) {
2362 for (size_t kc = 4; kc < 64; kc += 12) {
2363 tester
2364 .kh(kh)
2365 .kw(kw)
2366 .kc(kc)
2367 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2368 }
2369 }
2370 }
2371 }
2372 }
2373 }
2374
2375 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_div_4_multipass_subtile) {
2376 TEST_REQUIRES_PSIMD;
2377 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
2378 auto tester = AvgPoolMicrokernelTester()
2379 .mr(9)
2380 .qr(8)
2381 .iterations(3);
2382 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
2383 for (size_t kc = 4; kc < 64; kc += 12) {
2384 tester
2385 .kc(kc)
2386 .kh(ks)
2387 .kw(1)
2388 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2389 tester
2390 .kc(kc)
2391 .kh(1)
2392 .kw(ks)
2393 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2394 }
2395 }
2396 }
2397 }
2398
2399 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_div_4_multipass_fulltile_with_x_stride) {
2400 TEST_REQUIRES_PSIMD;
2401 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2402 auto tester = AvgPoolMicrokernelTester()
2403 .mr(9)
2404 .qr(8)
2405 .iterations(3);
2406 for (size_t kh = 1; kh <= ks; kh++) {
2407 for (size_t kw = 1; kw <= ks; kw++) {
2408 if (kh * kw == ks) {
2409 for (size_t kc = 4; kc < 64; kc += 12) {
2410 tester
2411 .kh(kh)
2412 .kw(kw)
2413 .kc(kc)
2414 .x_stride(131)
2415 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2416 }
2417 }
2418 }
2419 }
2420 }
2421 }
2422
2423 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_lt_4_twopass_fulltile) {
2424 TEST_REQUIRES_PSIMD;
2425 auto tester = AvgPoolMicrokernelTester()
2426 .mr(9)
2427 .qr(8)
2428 .iterations(3);
2429 const size_t ks = tester.mr() + tester.qr();
2430 for (size_t kh = 1; kh <= ks; kh++) {
2431 for (size_t kw = 1; kw <= ks; kw++) {
2432 if (kh * kw == ks) {
2433 for (size_t kc = 1; kc < 4; kc++) {
2434 tester
2435 .kh(kh)
2436 .kw(kw)
2437 .kc(kc)
2438 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2439 }
2440 }
2441 }
2442 }
2443 }
2444
2445 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_lt_4_twopass_subtile) {
2446 TEST_REQUIRES_PSIMD;
2447 auto tester = AvgPoolMicrokernelTester()
2448 .mr(9)
2449 .qr(8)
2450 .iterations(3);
2451 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
2452 for (size_t kc = 1; kc < 4; kc++) {
2453 tester
2454 .kc(kc)
2455 .kh(ks)
2456 .kw(1)
2457 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2458 tester
2459 .kc(kc)
2460 .kh(1)
2461 .kw(ks)
2462 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2463 }
2464 }
2465 }
2466
2467 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_lt_4_twopass_fulltile_with_x_stride) {
2468 TEST_REQUIRES_PSIMD;
2469 auto tester = AvgPoolMicrokernelTester()
2470 .mr(9)
2471 .qr(8)
2472 .iterations(3);
2473 const size_t ks = tester.mr() + tester.qr();
2474 for (size_t kh = 1; kh <= ks; kh++) {
2475 for (size_t kw = 1; kw <= ks; kw++) {
2476 if (kh * kw == ks) {
2477 for (size_t kc = 1; kc < 4; kc++) {
2478 tester
2479 .kh(kh)
2480 .kw(kw)
2481 .kc(kc)
2482 .x_stride(23)
2483 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2484 }
2485 }
2486 }
2487 }
2488 }
2489
2490 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_lt_4_multipass_fulltile) {
2491 TEST_REQUIRES_PSIMD;
2492 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2493 auto tester = AvgPoolMicrokernelTester()
2494 .mr(9)
2495 .qr(8)
2496 .iterations(3);
2497 for (size_t kh = 1; kh <= ks; kh++) {
2498 for (size_t kw = 1; kw <= ks; kw++) {
2499 if (kh * kw == ks) {
2500 for (size_t kc = 1; kc < 4; kc++) {
2501 tester
2502 .kh(kh)
2503 .kw(kw)
2504 .kc(kc)
2505 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2506 }
2507 }
2508 }
2509 }
2510 }
2511 }
2512
2513 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_lt_4_multipass_subtile) {
2514 TEST_REQUIRES_PSIMD;
2515 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
2516 auto tester = AvgPoolMicrokernelTester()
2517 .mr(9)
2518 .qr(8)
2519 .iterations(3);
2520 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
2521 for (size_t kc = 1; kc < 4; kc++) {
2522 tester
2523 .kc(kc)
2524 .kh(ks)
2525 .kw(1)
2526 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2527 tester
2528 .kc(kc)
2529 .kh(1)
2530 .kw(ks)
2531 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2532 }
2533 }
2534 }
2535 }
2536
2537 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_lt_4_multipass_fulltile_with_x_stride) {
2538 TEST_REQUIRES_PSIMD;
2539 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2540 auto tester = AvgPoolMicrokernelTester()
2541 .mr(9)
2542 .qr(8)
2543 .iterations(3);
2544 for (size_t kh = 1; kh <= ks; kh++) {
2545 for (size_t kw = 1; kw <= ks; kw++) {
2546 if (kh * kw == ks) {
2547 for (size_t kc = 1; kc < 4; kc++) {
2548 tester
2549 .kh(kh)
2550 .kw(kw)
2551 .kc(kc)
2552 .x_stride(23)
2553 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2554 }
2555 }
2556 }
2557 }
2558 }
2559 }
2560
2561 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_gt_4_twopass_fulltile) {
2562 TEST_REQUIRES_PSIMD;
2563 auto tester = AvgPoolMicrokernelTester()
2564 .mr(9)
2565 .qr(8)
2566 .iterations(3);
2567 const size_t ks = tester.mr() + tester.qr();
2568 for (size_t kh = 1; kh <= ks; kh++) {
2569 for (size_t kw = 1; kw <= ks; kw++) {
2570 if (kh * kw == ks) {
2571 for (size_t kc = 5; kc < 8; kc++) {
2572 tester
2573 .kh(kh)
2574 .kw(kw)
2575 .kc(kc)
2576 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2577 }
2578 }
2579 }
2580 }
2581 }
2582
2583 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_gt_4_twopass_subtile) {
2584 TEST_REQUIRES_PSIMD;
2585 auto tester = AvgPoolMicrokernelTester()
2586 .mr(9)
2587 .qr(8)
2588 .iterations(3);
2589 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
2590 for (size_t kc = 5; kc < 8; kc++) {
2591 tester
2592 .kc(kc)
2593 .kh(ks)
2594 .kw(1)
2595 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2596 tester
2597 .kc(kc)
2598 .kh(1)
2599 .kw(ks)
2600 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2601 }
2602 }
2603 }
2604
2605 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_gt_4_twopass_fulltile_with_x_stride) {
2606 TEST_REQUIRES_PSIMD;
2607 auto tester = AvgPoolMicrokernelTester()
2608 .mr(9)
2609 .qr(8)
2610 .iterations(3);
2611 const size_t ks = tester.mr() + tester.qr();
2612 for (size_t kh = 1; kh <= ks; kh++) {
2613 for (size_t kw = 1; kw <= ks; kw++) {
2614 if (kh * kw == ks) {
2615 for (size_t kc = 5; kc < 8; kc++) {
2616 tester
2617 .kh(kh)
2618 .kw(kw)
2619 .kc(kc)
2620 .x_stride(23)
2621 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2622 }
2623 }
2624 }
2625 }
2626 }
2627
2628 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_gt_4_multipass_fulltile) {
2629 TEST_REQUIRES_PSIMD;
2630 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2631 auto tester = AvgPoolMicrokernelTester()
2632 .mr(9)
2633 .qr(8)
2634 .iterations(3);
2635 for (size_t kh = 1; kh <= ks; kh++) {
2636 for (size_t kw = 1; kw <= ks; kw++) {
2637 if (kh * kw == ks) {
2638 for (size_t kc = 5; kc < 8; kc++) {
2639 tester
2640 .kh(kh)
2641 .kw(kw)
2642 .kc(kc)
2643 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2644 }
2645 }
2646 }
2647 }
2648 }
2649 }
2650
2651 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_gt_4_multipass_subtile) {
2652 TEST_REQUIRES_PSIMD;
2653 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
2654 auto tester = AvgPoolMicrokernelTester()
2655 .mr(9)
2656 .qr(8)
2657 .iterations(3);
2658 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
2659 for (size_t kc = 5; kc < 8; kc++) {
2660 tester
2661 .kc(kc)
2662 .kh(ks)
2663 .kw(1)
2664 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2665 tester
2666 .kc(kc)
2667 .kh(1)
2668 .kw(ks)
2669 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2670 }
2671 }
2672 }
2673 }
2674
2675 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_gt_4_multipass_fulltile_with_x_stride) {
2676 TEST_REQUIRES_PSIMD;
2677 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2678 auto tester = AvgPoolMicrokernelTester()
2679 .mr(9)
2680 .qr(8)
2681 .iterations(3);
2682 for (size_t kh = 1; kh <= ks; kh++) {
2683 for (size_t kw = 1; kw <= ks; kw++) {
2684 if (kh * kw == ks) {
2685 for (size_t kc = 5; kc < 8; kc++) {
2686 tester
2687 .kh(kh)
2688 .kw(kw)
2689 .kc(kc)
2690 .x_stride(23)
2691 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2692 }
2693 }
2694 }
2695 }
2696 }
2697 }
2698
2699 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_div_4_with_qmax) {
2700 TEST_REQUIRES_PSIMD;
2701 for (size_t n = 1; n <= 5; n += 2) {
2702 for (size_t kc = 4; kc < 64; kc += 12) {
2703 AvgPoolMicrokernelTester()
2704 .mr(9)
2705 .qr(8)
2706 .n(n)
2707 .kh(5)
2708 .kw(5)
2709 .kc(kc)
2710 .qmax(128)
2711 .iterations(3)
2712 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2713 }
2714 }
2715 }
2716
2717 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, kc_div_4_with_qmin) {
2718 TEST_REQUIRES_PSIMD;
2719 for (size_t n = 1; n <= 5; n += 2) {
2720 for (size_t kc = 4; kc < 64; kc += 12) {
2721 AvgPoolMicrokernelTester()
2722 .mr(9)
2723 .qr(8)
2724 .n(n)
2725 .kh(5)
2726 .kw(5)
2727 .kc(kc)
2728 .qmin(128)
2729 .iterations(3)
2730 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2731 }
2732 }
2733 }
2734
2735 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, small_n) {
2736 TEST_REQUIRES_PSIMD;
2737 for (size_t n = 2; n < 5; n++) {
2738 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2739 for (size_t kc = 8; kc < 25; kc += 5) {
2740 AvgPoolMicrokernelTester()
2741 .mr(9)
2742 .qr(8)
2743 .n(n)
2744 .kh(ks)
2745 .kw(ks)
2746 .kc(kc)
2747 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2748 }
2749 }
2750 }
2751 }
2752
2753 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, small_n_with_x_stride) {
2754 TEST_REQUIRES_PSIMD;
2755 for (size_t n = 2; n < 5; n++) {
2756 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2757 for (size_t kc = 8; kc < 25; kc += 5) {
2758 AvgPoolMicrokernelTester()
2759 .mr(9)
2760 .qr(8)
2761 .n(n)
2762 .kh(ks)
2763 .kw(ks)
2764 .kc(kc)
2765 .x_stride(29)
2766 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2767 }
2768 }
2769 }
2770 }
2771
2772 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, small_n_with_y_stride) {
2773 TEST_REQUIRES_PSIMD;
2774 for (size_t n = 2; n < 5; n++) {
2775 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2776 for (size_t kc = 8; kc < 25; kc += 5) {
2777 AvgPoolMicrokernelTester()
2778 .mr(9)
2779 .qr(8)
2780 .n(n)
2781 .kh(ks)
2782 .kw(ks)
2783 .kc(kc)
2784 .y_stride(31)
2785 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2786 }
2787 }
2788 }
2789 }
2790
2791 TEST(F32_PAVGPOOL_MP9P8Q__PSIMD, small_n_with_s) {
2792 TEST_REQUIRES_PSIMD;
2793 for (size_t n = 2; n < 5; n++) {
2794 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2795 for (size_t s = 2; s <= 5; s++) {
2796 for (size_t kc = 8; kc < 25; kc += 5) {
2797 AvgPoolMicrokernelTester()
2798 .mr(9)
2799 .qr(8)
2800 .n(n)
2801 .kh(ks)
2802 .kw(ks)
2803 .kc(kc)
2804 .s(s)
2805 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__psimd, AvgPoolMicrokernelTester::Variant::Scalar);
2806 }
2807 }
2808 }
2809 }
2810 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002811#endif // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002812
2813
Marat Dukhan436ebe62019-12-04 15:10:12 -08002814#if XNN_ARCH_WASM
2815 TEST(F32_PAVGPOOL_UP9__WASM, kc_eq_1_fulltile) {
2816 auto tester = AvgPoolMicrokernelTester()
2817 .mr(9)
2818 .kc(1);
2819 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2820 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2821 if (kh * kw == tester.mr()) {
2822 tester
2823 .kh(kh)
2824 .kw(kw)
2825 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2826 }
2827 }
2828 }
2829 }
2830
2831 TEST(F32_PAVGPOOL_UP9__WASM, kc_eq_1_subtile) {
2832 auto tester = AvgPoolMicrokernelTester()
2833 .mr(9)
2834 .kc(1);
2835 for (size_t ks = 2; ks < tester.mr(); ks++) {
2836 for (size_t kh = 1; kh <= ks; kh++) {
2837 for (size_t kw = 1; kw <= ks; kw++) {
2838 if (kh * kw == ks) {
2839 tester
2840 .kh(kh)
2841 .kw(kw)
2842 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2843 }
2844 }
2845 }
2846 }
2847 }
2848
2849 TEST(F32_PAVGPOOL_UP9__WASM, kc_gt_1_fulltile) {
2850 auto tester = AvgPoolMicrokernelTester()
2851 .mr(9);
2852 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2853 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2854 if (kh * kw == tester.mr()) {
2855 for (size_t kc = 2; kc < 8; kc++) {
2856 tester
2857 .kh(kh)
2858 .kw(kw)
2859 .kc(kc)
2860 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2861 }
2862 }
2863 }
2864 }
2865 }
2866
2867 TEST(F32_PAVGPOOL_UP9__WASM, kc_gt_1_subtile) {
2868 auto tester = AvgPoolMicrokernelTester()
2869 .mr(9)
2870 .iterations(3);
2871 for (size_t ks = 2; ks < tester.mr(); ks++) {
2872 for (size_t kh = 1; kh <= ks; kh++) {
2873 for (size_t kw = 1; kw <= ks; kw++) {
2874 if (kh * kw == ks) {
2875 for (size_t kc = 2; kc < 8; kc++) {
2876 tester
2877 .kh(kh)
2878 .kw(kw)
2879 .kc(kc)
2880 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2881 }
2882 }
2883 }
2884 }
2885 }
2886 }
2887
2888 TEST(F32_PAVGPOOL_UP9__WASM, kc_gt_1_fulltile_with_x_stride) {
2889 auto tester = AvgPoolMicrokernelTester()
2890 .mr(9)
2891 .iterations(3);
2892 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2893 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2894 if (kh * kw == tester.mr()) {
2895 for (size_t kc = 2; kc < 8; kc++) {
2896 tester
2897 .kh(kh)
2898 .kw(kw)
2899 .kc(kc)
2900 .x_stride(23)
2901 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2902 }
2903 }
2904 }
2905 }
2906 }
2907
2908 TEST(F32_PAVGPOOL_UP9__WASM, qmax) {
2909 for (size_t n = 1; n <= 5; n += 2) {
2910 for (size_t kc = 1; kc < 8; kc += 3) {
2911 AvgPoolMicrokernelTester()
2912 .mr(9)
2913 .n(n)
2914 .kh(3)
2915 .kw(3)
2916 .kc(kc)
2917 .qmax(128)
2918 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2919 }
2920 }
2921 }
2922
2923 TEST(F32_PAVGPOOL_UP9__WASM, qmin) {
2924 for (size_t n = 1; n <= 5; n += 2) {
2925 for (size_t kc = 1; kc < 8; kc += 3) {
2926 AvgPoolMicrokernelTester()
2927 .mr(9)
2928 .n(n)
2929 .kh(3)
2930 .kw(3)
2931 .kc(kc)
2932 .qmin(128)
2933 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2934 }
2935 }
2936 }
2937
2938 TEST(F32_PAVGPOOL_UP9__WASM, small_n) {
2939 for (size_t n = 2; n < 5; n++) {
2940 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2941 for (size_t kc = 1; kc < 8; kc += 3) {
2942 AvgPoolMicrokernelTester()
2943 .mr(9)
2944 .n(n)
2945 .kh(ks)
2946 .kw(ks)
2947 .kc(kc)
2948 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2949 }
2950 }
2951 }
2952 }
2953
2954 TEST(F32_PAVGPOOL_UP9__WASM, small_n_with_x_stride) {
2955 for (size_t n = 2; n < 5; n++) {
2956 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2957 for (size_t kc = 1; kc < 8; kc += 3) {
2958 AvgPoolMicrokernelTester()
2959 .mr(9)
2960 .n(n)
2961 .kh(ks)
2962 .kw(ks)
2963 .kc(kc)
2964 .x_stride(29)
2965 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2966 }
2967 }
2968 }
2969 }
2970
2971 TEST(F32_PAVGPOOL_UP9__WASM, small_n_with_y_stride) {
2972 for (size_t n = 2; n < 5; n++) {
2973 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2974 for (size_t kc = 1; kc < 8; kc += 3) {
2975 AvgPoolMicrokernelTester()
2976 .mr(9)
2977 .n(n)
2978 .kh(ks)
2979 .kw(ks)
2980 .kc(kc)
2981 .y_stride(31)
2982 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
2983 }
2984 }
2985 }
2986 }
2987
2988 TEST(F32_PAVGPOOL_UP9__WASM, small_n_with_s) {
2989 for (size_t n = 2; n < 5; n++) {
2990 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2991 for (size_t kc = 1; kc < 8; kc += 3) {
2992 for (size_t s = 2; s <= ks; s++) {
2993 AvgPoolMicrokernelTester()
2994 .mr(9)
2995 .n(n)
2996 .kh(ks)
2997 .kw(ks)
2998 .kc(kc)
2999 .s(s)
3000 .Test(xnn_f32_pavgpool_ukernel_up9__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3001 }
3002 }
3003 }
3004 }
3005 }
3006
3007 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_eq_1_twopass_fulltile) {
3008 auto tester = AvgPoolMicrokernelTester()
3009 .mr(9)
3010 .qr(8)
3011 .kc(1);
3012 const size_t ks = tester.mr() + tester.qr();
3013 for (size_t kh = 1; kh <= ks; kh++) {
3014 for (size_t kw = 1; kw <= ks; kw++) {
3015 if (kh * kw == ks) {
3016 tester
3017 .kh(kh)
3018 .kw(kw)
3019 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3020 }
3021 }
3022 }
3023 }
3024
3025 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_eq_1_twopass_subtile) {
3026 auto tester = AvgPoolMicrokernelTester()
3027 .mr(9)
3028 .qr(8)
3029 .kc(1);
3030 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
3031 tester
3032 .kh(ks)
3033 .kw(1)
3034 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3035 tester
3036 .kh(1)
3037 .kw(ks)
3038 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3039 }
3040 }
3041
3042 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_eq_1_multipass_fulltile) {
3043 for (size_t ks : std::vector<size_t>{{25, 49}}) {
3044 auto tester = AvgPoolMicrokernelTester()
3045 .mr(9)
3046 .qr(8)
3047 .kc(1);
3048 for (size_t kh = 1; kh <= ks; kh++) {
3049 for (size_t kw = 1; kw <= ks; kw++) {
3050 if (kh * kw == ks) {
3051 tester
3052 .kh(kh)
3053 .kw(kw)
3054 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3055 }
3056 }
3057 }
3058 }
3059 }
3060
3061 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_eq_1_multipass_subtile) {
3062 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
3063 auto tester = AvgPoolMicrokernelTester()
3064 .mr(9)
3065 .qr(8)
3066 .kc(1);
3067 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
3068 tester
3069 .kh(ks)
3070 .kw(1)
3071 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3072 tester
3073 .kh(1)
3074 .kw(ks)
3075 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3076 }
3077 }
3078 }
3079
3080 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_gt_1_twopass_fulltile) {
3081 auto tester = AvgPoolMicrokernelTester()
3082 .mr(9)
3083 .qr(8)
3084 .iterations(3);
3085 const size_t ks = tester.mr() + tester.qr();
3086 for (size_t kh = 1; kh <= ks; kh++) {
3087 for (size_t kw = 1; kw <= ks; kw++) {
3088 if (kh * kw == ks) {
3089 for (size_t kc = 2; kc < 8; kc++) {
3090 tester
3091 .kh(kh)
3092 .kw(kw)
3093 .kc(kc)
3094 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3095 }
3096 }
3097 }
3098 }
3099 }
3100
3101 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_gt_1_twopass_subtile) {
3102 auto tester = AvgPoolMicrokernelTester()
3103 .mr(9)
3104 .qr(8)
3105 .iterations(3);
3106 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
3107 for (size_t kc = 2; kc < 8; kc++) {
3108 tester
3109 .kc(kc)
3110 .kh(ks)
3111 .kw(1)
3112 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3113 tester
3114 .kc(kc)
3115 .kh(1)
3116 .kw(ks)
3117 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3118 }
3119 }
3120 }
3121
3122 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_gt_1_twopass_fulltile_with_x_stride) {
3123 auto tester = AvgPoolMicrokernelTester()
3124 .mr(9)
3125 .qr(8)
3126 .iterations(3);
3127 const size_t ks = tester.mr() + tester.qr();
3128 for (size_t kh = 1; kh <= ks; kh++) {
3129 for (size_t kw = 1; kw <= ks; kw++) {
3130 if (kh * kw == ks) {
3131 for (size_t kc = 2; kc < 8; kc++) {
3132 tester
3133 .kh(kh)
3134 .kw(kw)
3135 .kc(kc)
3136 .x_stride(23)
3137 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3138 }
3139 }
3140 }
3141 }
3142 }
3143
3144 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_gt_1_multipass_fulltile) {
3145 for (size_t ks : std::vector<size_t>{{25, 49}}) {
3146 auto tester = AvgPoolMicrokernelTester()
3147 .mr(9)
3148 .qr(8)
3149 .iterations(3);
3150 for (size_t kh = 1; kh <= ks; kh++) {
3151 for (size_t kw = 1; kw <= ks; kw++) {
3152 if (kh * kw == ks) {
3153 for (size_t kc = 2; kc < 8; kc++) {
3154 tester
3155 .kh(kh)
3156 .kw(kw)
3157 .kc(kc)
3158 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3159 }
3160 }
3161 }
3162 }
3163 }
3164 }
3165
3166 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_gt_1_multipass_subtile) {
3167 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
3168 auto tester = AvgPoolMicrokernelTester()
3169 .mr(9)
3170 .qr(8)
3171 .iterations(3);
3172 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
3173 for (size_t kc = 2; kc < 8; kc++) {
3174 tester
3175 .kc(kc)
3176 .kh(ks)
3177 .kw(1)
3178 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3179 tester
3180 .kc(kc)
3181 .kh(1)
3182 .kw(ks)
3183 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3184 }
3185 }
3186 }
3187 }
3188
3189 TEST(F32_PAVGPOOL_MP9P8Q__WASM, kc_gt_1_multipass_fulltile_with_x_stride) {
3190 for (size_t ks : std::vector<size_t>{{25, 49}}) {
3191 auto tester = AvgPoolMicrokernelTester()
3192 .mr(9)
3193 .qr(8)
3194 .iterations(3);
3195 for (size_t kh = 1; kh <= ks; kh++) {
3196 for (size_t kw = 1; kw <= ks; kw++) {
3197 if (kh * kw == ks) {
3198 for (size_t kc = 2; kc < 8; kc++) {
3199 tester
3200 .kh(kh)
3201 .kw(kw)
3202 .kc(kc)
3203 .x_stride(23)
3204 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3205 }
3206 }
3207 }
3208 }
3209 }
3210 }
3211
3212 TEST(F32_PAVGPOOL_MP9P8Q__WASM, qmax) {
3213 for (size_t n = 1; n <= 5; n += 2) {
3214 for (size_t kc = 1; kc < 8; kc += 3) {
3215 AvgPoolMicrokernelTester()
3216 .mr(9)
3217 .qr(8)
3218 .n(n)
3219 .kh(5)
3220 .kw(5)
3221 .kc(kc)
3222 .qmax(128)
3223 .iterations(3)
3224 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3225 }
3226 }
3227 }
3228
3229 TEST(F32_PAVGPOOL_MP9P8Q__WASM, qmin) {
3230 for (size_t n = 1; n <= 5; n += 2) {
3231 for (size_t kc = 1; kc < 8; kc += 3) {
3232 AvgPoolMicrokernelTester()
3233 .mr(9)
3234 .qr(8)
3235 .n(n)
3236 .kh(5)
3237 .kw(5)
3238 .kc(kc)
3239 .qmin(128)
3240 .iterations(3)
3241 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3242 }
3243 }
3244 }
3245
3246 TEST(F32_PAVGPOOL_MP9P8Q__WASM, small_n) {
3247 for (size_t n = 2; n < 5; n++) {
3248 for (size_t ks : std::vector<size_t>{{5, 7}}) {
3249 for (size_t kc = 1; kc < 8; kc += 3) {
3250 AvgPoolMicrokernelTester()
3251 .mr(9)
3252 .qr(8)
3253 .n(n)
3254 .kh(ks)
3255 .kw(ks)
3256 .kc(kc)
3257 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3258 }
3259 }
3260 }
3261 }
3262
3263 TEST(F32_PAVGPOOL_MP9P8Q__WASM, small_n_with_x_stride) {
3264 for (size_t n = 2; n < 5; n++) {
3265 for (size_t ks : std::vector<size_t>{{5, 7}}) {
3266 for (size_t kc = 1; kc < 8; kc += 3) {
3267 AvgPoolMicrokernelTester()
3268 .mr(9)
3269 .qr(8)
3270 .n(n)
3271 .kh(ks)
3272 .kw(ks)
3273 .kc(kc)
3274 .x_stride(29)
3275 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3276 }
3277 }
3278 }
3279 }
3280
3281 TEST(F32_PAVGPOOL_MP9P8Q__WASM, small_n_with_y_stride) {
3282 for (size_t n = 2; n < 5; n++) {
3283 for (size_t ks : std::vector<size_t>{{5, 7}}) {
3284 for (size_t kc = 1; kc < 8; kc += 3) {
3285 AvgPoolMicrokernelTester()
3286 .mr(9)
3287 .qr(8)
3288 .n(n)
3289 .kh(ks)
3290 .kw(ks)
3291 .kc(kc)
3292 .y_stride(31)
3293 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3294 }
3295 }
3296 }
3297 }
3298
3299 TEST(F32_PAVGPOOL_MP9P8Q__WASM, small_n_with_s) {
3300 for (size_t n = 2; n < 5; n++) {
3301 for (size_t ks : std::vector<size_t>{{5, 7}}) {
3302 for (size_t s = 2; s <= 5; s++) {
3303 for (size_t kc = 1; kc < 8; kc += 3) {
3304 AvgPoolMicrokernelTester()
3305 .mr(9)
3306 .qr(8)
3307 .n(n)
3308 .kh(ks)
3309 .kw(ks)
3310 .kc(kc)
3311 .s(s)
3312 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__wasm, AvgPoolMicrokernelTester::Variant::Scalar);
3313 }
3314 }
3315 }
3316 }
3317 }
3318#endif // XNN_ARCH_WASM
3319
3320
XNNPACK Teamb455b122019-09-27 18:10:33 -07003321TEST(F32_PAVGPOOL_UP9__SCALAR, kc_eq_1_fulltile) {
3322 auto tester = AvgPoolMicrokernelTester()
3323 .mr(9)
3324 .kc(1);
3325 for (size_t kh = 1; kh <= tester.mr(); kh++) {
3326 for (size_t kw = 1; kw <= tester.mr(); kw++) {
3327 if (kh * kw == tester.mr()) {
3328 tester
3329 .kh(kh)
3330 .kw(kw)
3331 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3332 }
3333 }
3334 }
3335}
3336
3337TEST(F32_PAVGPOOL_UP9__SCALAR, kc_eq_1_subtile) {
3338 auto tester = AvgPoolMicrokernelTester()
3339 .mr(9)
3340 .kc(1);
3341 for (size_t ks = 2; ks < tester.mr(); ks++) {
3342 for (size_t kh = 1; kh <= ks; kh++) {
3343 for (size_t kw = 1; kw <= ks; kw++) {
3344 if (kh * kw == ks) {
3345 tester
3346 .kh(kh)
3347 .kw(kw)
3348 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3349 }
3350 }
3351 }
3352 }
3353}
3354
3355TEST(F32_PAVGPOOL_UP9__SCALAR, kc_gt_1_fulltile) {
3356 auto tester = AvgPoolMicrokernelTester()
3357 .mr(9);
3358 for (size_t kh = 1; kh <= tester.mr(); kh++) {
3359 for (size_t kw = 1; kw <= tester.mr(); kw++) {
3360 if (kh * kw == tester.mr()) {
3361 for (size_t kc = 2; kc < 8; kc++) {
3362 tester
3363 .kh(kh)
3364 .kw(kw)
3365 .kc(kc)
3366 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3367 }
3368 }
3369 }
3370 }
3371}
3372
3373TEST(F32_PAVGPOOL_UP9__SCALAR, kc_gt_1_subtile) {
3374 auto tester = AvgPoolMicrokernelTester()
3375 .mr(9)
3376 .iterations(3);
3377 for (size_t ks = 2; ks < tester.mr(); ks++) {
3378 for (size_t kh = 1; kh <= ks; kh++) {
3379 for (size_t kw = 1; kw <= ks; kw++) {
3380 if (kh * kw == ks) {
3381 for (size_t kc = 2; kc < 8; kc++) {
3382 tester
3383 .kh(kh)
3384 .kw(kw)
3385 .kc(kc)
3386 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3387 }
3388 }
3389 }
3390 }
3391 }
3392}
3393
3394TEST(F32_PAVGPOOL_UP9__SCALAR, kc_gt_1_fulltile_with_x_stride) {
3395 auto tester = AvgPoolMicrokernelTester()
3396 .mr(9)
3397 .iterations(3);
3398 for (size_t kh = 1; kh <= tester.mr(); kh++) {
3399 for (size_t kw = 1; kw <= tester.mr(); kw++) {
3400 if (kh * kw == tester.mr()) {
3401 for (size_t kc = 2; kc < 8; kc++) {
3402 tester
3403 .kh(kh)
3404 .kw(kw)
3405 .kc(kc)
3406 .x_stride(23)
3407 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3408 }
3409 }
3410 }
3411 }
3412}
3413
3414TEST(F32_PAVGPOOL_UP9__SCALAR, qmax) {
3415 for (size_t n = 1; n <= 5; n += 2) {
3416 for (size_t kc = 1; kc < 8; kc += 3) {
3417 AvgPoolMicrokernelTester()
3418 .mr(9)
3419 .n(n)
3420 .kh(3)
3421 .kw(3)
3422 .kc(kc)
3423 .qmax(128)
3424 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3425 }
3426 }
3427}
3428
3429TEST(F32_PAVGPOOL_UP9__SCALAR, qmin) {
3430 for (size_t n = 1; n <= 5; n += 2) {
3431 for (size_t kc = 1; kc < 8; kc += 3) {
3432 AvgPoolMicrokernelTester()
3433 .mr(9)
3434 .n(n)
3435 .kh(3)
3436 .kw(3)
3437 .kc(kc)
3438 .qmin(128)
3439 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3440 }
3441 }
3442}
3443
3444TEST(F32_PAVGPOOL_UP9__SCALAR, small_n) {
3445 for (size_t n = 2; n < 5; n++) {
3446 for (size_t ks : std::vector<size_t>{{2, 3}}) {
3447 for (size_t kc = 1; kc < 8; kc += 3) {
3448 AvgPoolMicrokernelTester()
3449 .mr(9)
3450 .n(n)
3451 .kh(ks)
3452 .kw(ks)
3453 .kc(kc)
3454 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3455 }
3456 }
3457 }
3458}
3459
3460TEST(F32_PAVGPOOL_UP9__SCALAR, small_n_with_x_stride) {
3461 for (size_t n = 2; n < 5; n++) {
3462 for (size_t ks : std::vector<size_t>{{2, 3}}) {
3463 for (size_t kc = 1; kc < 8; kc += 3) {
3464 AvgPoolMicrokernelTester()
3465 .mr(9)
3466 .n(n)
3467 .kh(ks)
3468 .kw(ks)
3469 .kc(kc)
3470 .x_stride(29)
3471 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3472 }
3473 }
3474 }
3475}
3476
3477TEST(F32_PAVGPOOL_UP9__SCALAR, small_n_with_y_stride) {
3478 for (size_t n = 2; n < 5; n++) {
3479 for (size_t ks : std::vector<size_t>{{2, 3}}) {
3480 for (size_t kc = 1; kc < 8; kc += 3) {
3481 AvgPoolMicrokernelTester()
3482 .mr(9)
3483 .n(n)
3484 .kh(ks)
3485 .kw(ks)
3486 .kc(kc)
3487 .y_stride(31)
3488 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3489 }
3490 }
3491 }
3492}
3493
3494TEST(F32_PAVGPOOL_UP9__SCALAR, small_n_with_s) {
3495 for (size_t n = 2; n < 5; n++) {
3496 for (size_t ks : std::vector<size_t>{{2, 3}}) {
3497 for (size_t kc = 1; kc < 8; kc += 3) {
3498 for (size_t s = 2; s <= ks; s++) {
3499 AvgPoolMicrokernelTester()
3500 .mr(9)
3501 .n(n)
3502 .kh(ks)
3503 .kw(ks)
3504 .kc(kc)
3505 .s(s)
3506 .Test(xnn_f32_pavgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3507 }
3508 }
3509 }
3510 }
3511}
3512
3513TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_eq_1_twopass_fulltile) {
3514 auto tester = AvgPoolMicrokernelTester()
3515 .mr(9)
3516 .qr(8)
3517 .kc(1);
3518 const size_t ks = tester.mr() + tester.qr();
3519 for (size_t kh = 1; kh <= ks; kh++) {
3520 for (size_t kw = 1; kw <= ks; kw++) {
3521 if (kh * kw == ks) {
3522 tester
3523 .kh(kh)
3524 .kw(kw)
3525 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3526 }
3527 }
3528 }
3529}
3530
3531TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_eq_1_twopass_subtile) {
3532 auto tester = AvgPoolMicrokernelTester()
3533 .mr(9)
3534 .qr(8)
3535 .kc(1);
3536 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
3537 tester
3538 .kh(ks)
3539 .kw(1)
3540 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3541 tester
3542 .kh(1)
3543 .kw(ks)
3544 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3545 }
3546}
3547
3548TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_eq_1_multipass_fulltile) {
3549 for (size_t ks : std::vector<size_t>{{25, 49}}) {
3550 auto tester = AvgPoolMicrokernelTester()
3551 .mr(9)
3552 .qr(8)
3553 .kc(1);
3554 for (size_t kh = 1; kh <= ks; kh++) {
3555 for (size_t kw = 1; kw <= ks; kw++) {
3556 if (kh * kw == ks) {
3557 tester
3558 .kh(kh)
3559 .kw(kw)
3560 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3561 }
3562 }
3563 }
3564 }
3565}
3566
3567TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_eq_1_multipass_subtile) {
3568 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
3569 auto tester = AvgPoolMicrokernelTester()
3570 .mr(9)
3571 .qr(8)
3572 .kc(1);
3573 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
3574 tester
3575 .kh(ks)
3576 .kw(1)
3577 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3578 tester
3579 .kh(1)
3580 .kw(ks)
3581 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3582 }
3583 }
3584}
3585
3586TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_gt_1_twopass_fulltile) {
3587 auto tester = AvgPoolMicrokernelTester()
3588 .mr(9)
3589 .qr(8)
3590 .iterations(3);
3591 const size_t ks = tester.mr() + tester.qr();
3592 for (size_t kh = 1; kh <= ks; kh++) {
3593 for (size_t kw = 1; kw <= ks; kw++) {
3594 if (kh * kw == ks) {
3595 for (size_t kc = 2; kc < 8; kc++) {
3596 tester
3597 .kh(kh)
3598 .kw(kw)
3599 .kc(kc)
3600 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3601 }
3602 }
3603 }
3604 }
3605}
3606
3607TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_gt_1_twopass_subtile) {
3608 auto tester = AvgPoolMicrokernelTester()
3609 .mr(9)
3610 .qr(8)
3611 .iterations(3);
3612 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
3613 for (size_t kc = 2; kc < 8; kc++) {
3614 tester
3615 .kc(kc)
3616 .kh(ks)
3617 .kw(1)
3618 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3619 tester
3620 .kc(kc)
3621 .kh(1)
3622 .kw(ks)
3623 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3624 }
3625 }
3626}
3627
3628TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_gt_1_twopass_fulltile_with_x_stride) {
3629 auto tester = AvgPoolMicrokernelTester()
3630 .mr(9)
3631 .qr(8)
3632 .iterations(3);
3633 const size_t ks = tester.mr() + tester.qr();
3634 for (size_t kh = 1; kh <= ks; kh++) {
3635 for (size_t kw = 1; kw <= ks; kw++) {
3636 if (kh * kw == ks) {
3637 for (size_t kc = 2; kc < 8; kc++) {
3638 tester
3639 .kh(kh)
3640 .kw(kw)
3641 .kc(kc)
3642 .x_stride(23)
3643 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3644 }
3645 }
3646 }
3647 }
3648}
3649
3650TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_gt_1_multipass_fulltile) {
3651 for (size_t ks : std::vector<size_t>{{25, 49}}) {
3652 auto tester = AvgPoolMicrokernelTester()
3653 .mr(9)
3654 .qr(8)
3655 .iterations(3);
3656 for (size_t kh = 1; kh <= ks; kh++) {
3657 for (size_t kw = 1; kw <= ks; kw++) {
3658 if (kh * kw == ks) {
3659 for (size_t kc = 2; kc < 8; kc++) {
3660 tester
3661 .kh(kh)
3662 .kw(kw)
3663 .kc(kc)
3664 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3665 }
3666 }
3667 }
3668 }
3669 }
3670}
3671
3672TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_gt_1_multipass_subtile) {
3673 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
3674 auto tester = AvgPoolMicrokernelTester()
3675 .mr(9)
3676 .qr(8)
3677 .iterations(3);
3678 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
3679 for (size_t kc = 2; kc < 8; kc++) {
3680 tester
3681 .kc(kc)
3682 .kh(ks)
3683 .kw(1)
3684 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3685 tester
3686 .kc(kc)
3687 .kh(1)
3688 .kw(ks)
3689 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3690 }
3691 }
3692 }
3693}
3694
3695TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, kc_gt_1_multipass_fulltile_with_x_stride) {
3696 for (size_t ks : std::vector<size_t>{{25, 49}}) {
3697 auto tester = AvgPoolMicrokernelTester()
3698 .mr(9)
3699 .qr(8)
3700 .iterations(3);
3701 for (size_t kh = 1; kh <= ks; kh++) {
3702 for (size_t kw = 1; kw <= ks; kw++) {
3703 if (kh * kw == ks) {
3704 for (size_t kc = 2; kc < 8; kc++) {
3705 tester
3706 .kh(kh)
3707 .kw(kw)
3708 .kc(kc)
3709 .x_stride(23)
3710 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3711 }
3712 }
3713 }
3714 }
3715 }
3716}
3717
3718TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, qmax) {
3719 for (size_t n = 1; n <= 5; n += 2) {
3720 for (size_t kc = 1; kc < 8; kc += 3) {
3721 AvgPoolMicrokernelTester()
3722 .mr(9)
3723 .qr(8)
3724 .n(n)
3725 .kh(5)
3726 .kw(5)
3727 .kc(kc)
3728 .qmax(128)
3729 .iterations(3)
3730 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3731 }
3732 }
3733}
3734
3735TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, qmin) {
3736 for (size_t n = 1; n <= 5; n += 2) {
3737 for (size_t kc = 1; kc < 8; kc += 3) {
3738 AvgPoolMicrokernelTester()
3739 .mr(9)
3740 .qr(8)
3741 .n(n)
3742 .kh(5)
3743 .kw(5)
3744 .kc(kc)
3745 .qmin(128)
3746 .iterations(3)
3747 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3748 }
3749 }
3750}
3751
3752TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, small_n) {
3753 for (size_t n = 2; n < 5; n++) {
3754 for (size_t ks : std::vector<size_t>{{5, 7}}) {
3755 for (size_t kc = 1; kc < 8; kc += 3) {
3756 AvgPoolMicrokernelTester()
3757 .mr(9)
3758 .qr(8)
3759 .n(n)
3760 .kh(ks)
3761 .kw(ks)
3762 .kc(kc)
3763 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3764 }
3765 }
3766 }
3767}
3768
3769TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, small_n_with_x_stride) {
3770 for (size_t n = 2; n < 5; n++) {
3771 for (size_t ks : std::vector<size_t>{{5, 7}}) {
3772 for (size_t kc = 1; kc < 8; kc += 3) {
3773 AvgPoolMicrokernelTester()
3774 .mr(9)
3775 .qr(8)
3776 .n(n)
3777 .kh(ks)
3778 .kw(ks)
3779 .kc(kc)
3780 .x_stride(29)
3781 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3782 }
3783 }
3784 }
3785}
3786
3787TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, small_n_with_y_stride) {
3788 for (size_t n = 2; n < 5; n++) {
3789 for (size_t ks : std::vector<size_t>{{5, 7}}) {
3790 for (size_t kc = 1; kc < 8; kc += 3) {
3791 AvgPoolMicrokernelTester()
3792 .mr(9)
3793 .qr(8)
3794 .n(n)
3795 .kh(ks)
3796 .kw(ks)
3797 .kc(kc)
3798 .y_stride(31)
3799 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3800 }
3801 }
3802 }
3803}
3804
3805TEST(F32_PAVGPOOL_MP9P8Q__SCALAR, small_n_with_s) {
3806 for (size_t n = 2; n < 5; n++) {
3807 for (size_t ks : std::vector<size_t>{{5, 7}}) {
3808 for (size_t s = 2; s <= 5; s++) {
3809 for (size_t kc = 1; kc < 8; kc += 3) {
3810 AvgPoolMicrokernelTester()
3811 .mr(9)
3812 .qr(8)
3813 .n(n)
3814 .kh(ks)
3815 .kw(ks)
3816 .kc(kc)
3817 .s(s)
3818 .Test(xnn_f32_pavgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
3819 }
3820 }
3821 }
3822 }
3823}