blob: 86104cd30c0efbec8f403cf5b5f65bff9249aac6 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
XNNPACK Teamb455b122019-09-27 18:10:33 -07009#include <gtest/gtest.h>
10
Marat Dukhan1dadbf72019-10-01 10:46:20 -070011#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070013
Marat Dukhan1dadbf72019-10-01 10:46:20 -070014#include <xnnpack/avgpool.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070015#include "avgpool-microkernel-tester.h"
16
17
Marat Dukhan1dadbf72019-10-01 10:46:20 -070018#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070019 TEST(Q8_AVGPOOL_UP9__NEON, kc_eq_8_fulltile) {
20 TEST_REQUIRES_ARM_NEON;
21 auto tester = AvgPoolMicrokernelTester()
22 .mr(9)
23 .kc(8);
24 for (size_t kh = 1; kh <= tester.mr(); kh++) {
25 for (size_t kw = 1; kw <= tester.mr(); kw++) {
26 if (kh * kw == tester.mr()) {
27 tester
28 .kh(kh)
29 .kw(kw)
30 .Test(xnn_q8_avgpool_ukernel_up9__neon);
31 }
32 }
33 }
34 }
35
36 TEST(Q8_AVGPOOL_UP9__NEON, kc_eq_8_subtile) {
37 TEST_REQUIRES_ARM_NEON;
38 auto tester = AvgPoolMicrokernelTester()
39 .mr(9)
40 .kc(8);
41 for (size_t ks = 2; ks < tester.mr(); ks++) {
42 for (size_t kh = 1; kh <= ks; kh++) {
43 for (size_t kw = 1; kw <= ks; kw++) {
44 if (kh * kw == ks) {
45 tester
46 .kh(kh)
47 .kw(kw)
48 .Test(xnn_q8_avgpool_ukernel_up9__neon);
49 }
50 }
51 }
52 }
53 }
54
55 TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_fulltile) {
56 TEST_REQUIRES_ARM_NEON;
57 auto tester = AvgPoolMicrokernelTester()
58 .mr(9);
59 for (size_t kh = 1; kh <= tester.mr(); kh++) {
60 for (size_t kw = 1; kw <= tester.mr(); kw++) {
61 if (kh * kw == tester.mr()) {
62 for (size_t kc = 8; kc < 128; kc += 24) {
63 tester
64 .kh(kh)
65 .kw(kw)
66 .kc(kc)
67 .Test(xnn_q8_avgpool_ukernel_up9__neon);
68 }
69 }
70 }
71 }
72 }
73
74 TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_subtile) {
75 TEST_REQUIRES_ARM_NEON;
76 auto tester = AvgPoolMicrokernelTester()
77 .mr(9)
78 .iterations(3);
79 for (size_t ks = 2; ks < tester.mr(); ks++) {
80 for (size_t kh = 1; kh <= ks; kh++) {
81 for (size_t kw = 1; kw <= ks; kw++) {
82 if (kh * kw == ks) {
83 for (size_t kc = 8; kc < 128; kc += 24) {
84 tester
85 .kh(kh)
86 .kw(kw)
87 .kc(kc)
88 .Test(xnn_q8_avgpool_ukernel_up9__neon);
89 }
90 }
91 }
92 }
93 }
94 }
95
96 TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_fulltile_with_x_stride) {
97 TEST_REQUIRES_ARM_NEON;
98 auto tester = AvgPoolMicrokernelTester()
99 .mr(9)
100 .iterations(3);
101 for (size_t kh = 1; kh <= tester.mr(); kh++) {
102 for (size_t kw = 1; kw <= tester.mr(); kw++) {
103 if (kh * kw == tester.mr()) {
104 for (size_t kc = 8; kc < 128; kc += 24) {
105 tester
106 .kh(kh)
107 .kw(kw)
108 .kc(kc)
109 .x_stride(131)
110 .Test(xnn_q8_avgpool_ukernel_up9__neon);
111 }
112 }
113 }
114 }
115 }
116
117 TEST(Q8_AVGPOOL_UP9__NEON, kc_lt_8_fulltile) {
118 TEST_REQUIRES_ARM_NEON;
119 auto tester = AvgPoolMicrokernelTester()
120 .mr(9);
121 for (size_t kh = 1; kh <= tester.mr(); kh++) {
122 for (size_t kw = 1; kw <= tester.mr(); kw++) {
123 if (kh * kw == tester.mr()) {
124 for (size_t kc = 1; kc < 8; kc++) {
125 tester
126 .kh(kh)
127 .kw(kw)
128 .kc(kc)
129 .Test(xnn_q8_avgpool_ukernel_up9__neon);
130 }
131 }
132 }
133 }
134 }
135
136 TEST(Q8_AVGPOOL_UP9__NEON, kc_lt_8_subtile) {
137 TEST_REQUIRES_ARM_NEON;
138 auto tester = AvgPoolMicrokernelTester()
139 .mr(9)
140 .iterations(3);
141 for (size_t ks = 2; ks < tester.mr(); ks++) {
142 for (size_t kh = 1; kh <= ks; kh++) {
143 for (size_t kw = 1; kw <= ks; kw++) {
144 if (kh * kw == ks) {
145 for (size_t kc = 1; kc < 8; kc++) {
146 tester
147 .kh(kh)
148 .kw(kw)
149 .kc(kc)
150 .Test(xnn_q8_avgpool_ukernel_up9__neon);
151 }
152 }
153 }
154 }
155 }
156 }
157
158 TEST(Q8_AVGPOOL_UP9__NEON, kc_lt_8_fulltile_with_x_stride) {
159 TEST_REQUIRES_ARM_NEON;
160 auto tester = AvgPoolMicrokernelTester()
161 .mr(9)
162 .iterations(3);
163 for (size_t kh = 1; kh <= tester.mr(); kh++) {
164 for (size_t kw = 1; kw <= tester.mr(); kw++) {
165 if (kh * kw == tester.mr()) {
166 for (size_t kc = 1; kc < 8; kc++) {
167 tester
168 .kh(kh)
169 .kw(kw)
170 .kc(kc)
171 .x_stride(23)
172 .Test(xnn_q8_avgpool_ukernel_up9__neon);
173 }
174 }
175 }
176 }
177 }
178
179 TEST(Q8_AVGPOOL_UP9__NEON, kc_gt_8_fulltile) {
180 TEST_REQUIRES_ARM_NEON;
181 auto tester = AvgPoolMicrokernelTester()
182 .mr(9);
183 for (size_t kh = 1; kh <= tester.mr(); kh++) {
184 for (size_t kw = 1; kw <= tester.mr(); kw++) {
185 if (kh * kw == tester.mr()) {
186 for (size_t kc = 9; kc < 16; kc++) {
187 tester
188 .kh(kh)
189 .kw(kw)
190 .kc(kc)
191 .Test(xnn_q8_avgpool_ukernel_up9__neon);
192 }
193 }
194 }
195 }
196 }
197
198 TEST(Q8_AVGPOOL_UP9__NEON, kc_gt_8_subtile) {
199 TEST_REQUIRES_ARM_NEON;
200 auto tester = AvgPoolMicrokernelTester()
201 .mr(9)
202 .iterations(3);
203 for (size_t ks = 2; ks < tester.mr(); ks++) {
204 for (size_t kh = 1; kh <= ks; kh++) {
205 for (size_t kw = 1; kw <= ks; kw++) {
206 if (kh * kw == ks) {
207 for (size_t kc = 9; kc < 16; kc++) {
208 tester
209 .kh(kh)
210 .kw(kw)
211 .kc(kc)
212 .Test(xnn_q8_avgpool_ukernel_up9__neon);
213 }
214 }
215 }
216 }
217 }
218 }
219
220 TEST(Q8_AVGPOOL_UP9__NEON, kc_gt_8_fulltile_with_x_stride) {
221 TEST_REQUIRES_ARM_NEON;
222 auto tester = AvgPoolMicrokernelTester()
223 .mr(9)
224 .iterations(3);
225 for (size_t kh = 1; kh <= tester.mr(); kh++) {
226 for (size_t kw = 1; kw <= tester.mr(); kw++) {
227 if (kh * kw == tester.mr()) {
228 for (size_t kc = 9; kc < 16; kc++) {
229 tester
230 .kh(kh)
231 .kw(kw)
232 .kc(kc)
233 .x_stride(23)
234 .Test(xnn_q8_avgpool_ukernel_up9__neon);
235 }
236 }
237 }
238 }
239 }
240
241 TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_x_scale) {
242 TEST_REQUIRES_ARM_NEON;
243 for (size_t n = 1; n <= 5; n += 2) {
244 for (size_t kc = 8; kc < 128; kc += 24) {
245 for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
246 AvgPoolMicrokernelTester()
247 .mr(9)
248 .n(n)
249 .kh(3)
250 .kw(3)
251 .kc(kc)
252 .x_scale(x_scale)
253 .iterations(2)
254 .Test(xnn_q8_avgpool_ukernel_up9__neon);
255 }
256 }
257 }
258 }
259
260 TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_x_zero_point) {
261 TEST_REQUIRES_ARM_NEON;
262 for (size_t n = 1; n <= 5; n += 2) {
263 for (size_t kc = 8; kc < 128; kc += 24) {
264 for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
265 AvgPoolMicrokernelTester()
266 .mr(9)
267 .n(n)
268 .kh(3)
269 .kw(3)
270 .kc(kc)
271 .x_zero_point(uint8_t(x_zero_point))
272 .iterations(3)
273 .Test(xnn_q8_avgpool_ukernel_up9__neon);
274 }
275 }
276 }
277 }
278
279 TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_y_scale) {
280 TEST_REQUIRES_ARM_NEON;
281 for (size_t n = 1; n <= 5; n += 2) {
282 for (size_t kc = 8; kc < 128; kc += 24) {
283 for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
284 AvgPoolMicrokernelTester()
285 .mr(9)
286 .n(n)
287 .kh(3)
288 .kw(3)
289 .kc(kc)
290 .y_scale(y_scale)
291 .iterations(2)
292 .Test(xnn_q8_avgpool_ukernel_up9__neon);
293 }
294 }
295 }
296 }
297
298 TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_y_zero_point) {
299 TEST_REQUIRES_ARM_NEON;
300 for (size_t n = 1; n <= 5; n += 2) {
301 for (size_t kc = 8; kc < 128; kc += 24) {
302 for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
303 AvgPoolMicrokernelTester()
304 .mr(9)
305 .n(n)
306 .kh(3)
307 .kw(3)
308 .kc(kc)
309 .y_zero_point(uint8_t(y_zero_point))
310 .iterations(3)
311 .Test(xnn_q8_avgpool_ukernel_up9__neon);
312 }
313 }
314 }
315 }
316
317 TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_qmax) {
318 TEST_REQUIRES_ARM_NEON;
319 for (size_t n = 1; n <= 5; n += 2) {
320 for (size_t kc = 8; kc < 128; kc += 24) {
321 AvgPoolMicrokernelTester()
322 .mr(9)
323 .n(n)
324 .kh(3)
325 .kw(3)
326 .kc(kc)
327 .x_zero_point(128)
328 .y_zero_point(128)
329 .x_scale(1.0f)
330 .y_scale(1.0f)
331 .qmax(128)
332 .Test(xnn_q8_avgpool_ukernel_up9__neon);
333 }
334 }
335 }
336
337 TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_qmin) {
338 TEST_REQUIRES_ARM_NEON;
339 for (size_t n = 1; n <= 5; n += 2) {
340 for (size_t kc = 8; kc < 128; kc += 24) {
341 AvgPoolMicrokernelTester()
342 .mr(9)
343 .n(n)
344 .kh(3)
345 .kw(3)
346 .kc(kc)
347 .x_zero_point(128)
348 .y_zero_point(128)
349 .x_scale(1.0f)
350 .y_scale(1.0f)
351 .qmin(128)
352 .Test(xnn_q8_avgpool_ukernel_up9__neon);
353 }
354 }
355 }
356
357 TEST(Q8_AVGPOOL_UP9__NEON, small_n) {
358 TEST_REQUIRES_ARM_NEON;
359 for (size_t n = 2; n < 5; n++) {
360 for (size_t ks : std::vector<size_t>{{2, 3}}) {
361 for (size_t kc = 8; kc < 25; kc += 5) {
362 AvgPoolMicrokernelTester()
363 .mr(9)
364 .n(n)
365 .kh(ks)
366 .kw(ks)
367 .kc(kc)
368 .Test(xnn_q8_avgpool_ukernel_up9__neon);
369 }
370 }
371 }
372 }
373
374 TEST(Q8_AVGPOOL_UP9__NEON, small_n_with_x_stride) {
375 TEST_REQUIRES_ARM_NEON;
376 for (size_t n = 2; n < 5; n++) {
377 for (size_t ks : std::vector<size_t>{{2, 3}}) {
378 for (size_t kc = 8; kc < 25; kc += 5) {
379 AvgPoolMicrokernelTester()
380 .mr(9)
381 .n(n)
382 .kh(ks)
383 .kw(ks)
384 .kc(kc)
385 .x_stride(29)
386 .Test(xnn_q8_avgpool_ukernel_up9__neon);
387 }
388 }
389 }
390 }
391
392 TEST(Q8_AVGPOOL_UP9__NEON, small_n_with_y_stride) {
393 TEST_REQUIRES_ARM_NEON;
394 for (size_t n = 2; n < 5; n++) {
395 for (size_t ks : std::vector<size_t>{{2, 3}}) {
396 for (size_t kc = 8; kc < 25; kc += 5) {
397 AvgPoolMicrokernelTester()
398 .mr(9)
399 .n(n)
400 .kh(ks)
401 .kw(ks)
402 .kc(kc)
403 .y_stride(31)
404 .Test(xnn_q8_avgpool_ukernel_up9__neon);
405 }
406 }
407 }
408 }
409
410 TEST(Q8_AVGPOOL_UP9__NEON, small_n_with_s) {
411 TEST_REQUIRES_ARM_NEON;
412 for (size_t n = 2; n < 5; n++) {
413 for (size_t ks : std::vector<size_t>{{2, 3}}) {
414 for (size_t kc = 8; kc < 25; kc += 5) {
415 for (size_t s = 2; s <= ks; s++) {
416 AvgPoolMicrokernelTester()
417 .mr(9)
418 .n(n)
419 .kh(ks)
420 .kw(ks)
421 .kc(kc)
422 .s(s)
423 .Test(xnn_q8_avgpool_ukernel_up9__neon);
424 }
425 }
426 }
427 }
428 }
429
430 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_eq_8_twopass_fulltile) {
431 TEST_REQUIRES_ARM_NEON;
432 auto tester = AvgPoolMicrokernelTester()
433 .mr(9)
434 .qr(8)
435 .kc(8);
436 const size_t ks = tester.mr() + tester.qr();
437 for (size_t kh = 1; kh <= ks; kh++) {
438 for (size_t kw = 1; kw <= ks; kw++) {
439 if (kh * kw == ks) {
440 tester
441 .kh(kh)
442 .kw(kw)
443 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
444 }
445 }
446 }
447 }
448
449 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_eq_8_twopass_subtile) {
450 TEST_REQUIRES_ARM_NEON;
451 auto tester = AvgPoolMicrokernelTester()
452 .mr(9)
453 .qr(8)
454 .kc(8);
455 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
456 tester
457 .kh(ks)
458 .kw(1)
459 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
460 tester
461 .kh(1)
462 .kw(ks)
463 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
464 }
465 }
466
467 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_eq_8_multipass_fulltile) {
468 TEST_REQUIRES_ARM_NEON;
469 for (size_t ks : std::vector<size_t>{{25, 49}}) {
470 auto tester = AvgPoolMicrokernelTester()
471 .mr(9)
472 .qr(8)
473 .kc(8);
474 for (size_t kh = 1; kh <= ks; kh++) {
475 for (size_t kw = 1; kw <= ks; kw++) {
476 if (kh * kw == ks) {
477 tester
478 .kh(kh)
479 .kw(kw)
480 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
481 }
482 }
483 }
484 }
485 }
486
487 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_eq_8_multipass_subtile) {
488 TEST_REQUIRES_ARM_NEON;
489 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
490 auto tester = AvgPoolMicrokernelTester()
491 .mr(9)
492 .qr(8)
493 .kc(8);
494 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
495 tester
496 .kh(ks)
497 .kw(1)
498 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
499 tester
500 .kh(1)
501 .kw(ks)
502 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
503 }
504 }
505 }
506
507 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_twopass_fulltile) {
508 TEST_REQUIRES_ARM_NEON;
509 auto tester = AvgPoolMicrokernelTester()
510 .mr(9)
511 .qr(8)
512 .iterations(3);
513 const size_t ks = 17;
514 for (size_t kc = 8; kc < 128; kc += 24) {
515 tester
516 .kc(kc)
517 .kh(ks)
518 .kw(1)
519 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
520 tester
521 .kc(kc)
522 .kh(1)
523 .kw(ks)
524 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
525 }
526 }
527
528 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_twopass_subtile) {
529 TEST_REQUIRES_ARM_NEON;
530 auto tester = AvgPoolMicrokernelTester()
531 .mr(9)
532 .qr(8)
533 .iterations(3);
534 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
535 for (size_t kc = 8; kc < 128; kc += 24) {
536 tester
537 .kc(kc)
538 .kh(ks)
539 .kw(1)
540 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
541 tester
542 .kc(kc)
543 .kh(1)
544 .kw(ks)
545 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
546 }
547 }
548 }
549
550 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_twopass_fulltile_with_x_stride) {
551 TEST_REQUIRES_ARM_NEON;
552 auto tester = AvgPoolMicrokernelTester()
553 .mr(9)
554 .qr(8)
555 .iterations(3);
556 const size_t ks = tester.mr() + tester.qr();
557 for (size_t kh = 1; kh <= ks; kh++) {
558 for (size_t kw = 1; kw <= ks; kw++) {
559 if (kh * kw == ks) {
560 for (size_t kc = 8; kc < 128; kc += 24) {
561 tester
562 .kh(kh)
563 .kw(kw)
564 .kc(kc)
565 .x_stride(131)
566 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
567 }
568 }
569 }
570 }
571 }
572
573 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_multipass_fulltile) {
574 TEST_REQUIRES_ARM_NEON;
575 for (size_t ks : std::vector<size_t>{{25, 49}}) {
576 auto tester = AvgPoolMicrokernelTester()
577 .mr(9)
578 .qr(8)
579 .iterations(3);
580 for (size_t kh = 1; kh <= ks; kh++) {
581 for (size_t kw = 1; kw <= ks; kw++) {
582 if (kh * kw == ks) {
583 for (size_t kc = 8; kc < 128; kc += 24) {
584 tester
585 .kh(kh)
586 .kw(kw)
587 .kc(kc)
588 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
589 }
590 }
591 }
592 }
593 }
594 }
595
596 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_multipass_subtile) {
597 TEST_REQUIRES_ARM_NEON;
598 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
599 auto tester = AvgPoolMicrokernelTester()
600 .mr(9)
601 .qr(8)
602 .iterations(3);
603 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
604 for (size_t kc = 8; kc < 128; kc += 24) {
605 tester
606 .kc(kc)
607 .kh(ks)
608 .kw(1)
609 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
610 tester
611 .kc(kc)
612 .kh(1)
613 .kw(ks)
614 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
615 }
616 }
617 }
618 }
619
620 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_multipass_fulltile_with_x_stride) {
621 TEST_REQUIRES_ARM_NEON;
622 for (size_t ks : std::vector<size_t>{{25, 49}}) {
623 auto tester = AvgPoolMicrokernelTester()
624 .mr(9)
625 .qr(8)
626 .iterations(3);
627 for (size_t kh = 1; kh <= ks; kh++) {
628 for (size_t kw = 1; kw <= ks; kw++) {
629 if (kh * kw == ks) {
630 for (size_t kc = 8; kc < 128; kc += 24) {
631 tester
632 .kh(kh)
633 .kw(kw)
634 .kc(kc)
635 .x_stride(131)
636 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
637 }
638 }
639 }
640 }
641 }
642 }
643
644 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_twopass_fulltile) {
645 TEST_REQUIRES_ARM_NEON;
646 auto tester = AvgPoolMicrokernelTester()
647 .mr(9)
648 .qr(8)
649 .iterations(3);
650 const size_t ks = tester.mr() + tester.qr();
651 for (size_t kh = 1; kh <= ks; kh++) {
652 for (size_t kw = 1; kw <= ks; kw++) {
653 if (kh * kw == ks) {
654 for (size_t kc = 1; kc < 8; kc++) {
655 tester
656 .kh(kh)
657 .kw(kw)
658 .kc(kc)
659 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
660 }
661 }
662 }
663 }
664 }
665
666 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_twopass_subtile) {
667 TEST_REQUIRES_ARM_NEON;
668 auto tester = AvgPoolMicrokernelTester()
669 .mr(9)
670 .qr(8)
671 .iterations(3);
672 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
673 for (size_t kc = 1; kc < 8; kc++) {
674 tester
675 .kc(kc)
676 .kh(ks)
677 .kw(1)
678 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
679 tester
680 .kc(kc)
681 .kh(1)
682 .kw(ks)
683 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
684 }
685 }
686 }
687
688 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_twopass_fulltile_with_x_stride) {
689 TEST_REQUIRES_ARM_NEON;
690 auto tester = AvgPoolMicrokernelTester()
691 .mr(9)
692 .qr(8)
693 .iterations(3);
694 const size_t ks = tester.mr() + tester.qr();
695 for (size_t kh = 1; kh <= ks; kh++) {
696 for (size_t kw = 1; kw <= ks; kw++) {
697 if (kh * kw == ks) {
698 for (size_t kc = 1; kc < 8; kc++) {
699 tester
700 .kh(kh)
701 .kw(kw)
702 .kc(kc)
703 .x_stride(23)
704 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
705 }
706 }
707 }
708 }
709 }
710
711 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_multipass_fulltile) {
712 TEST_REQUIRES_ARM_NEON;
713 for (size_t ks : std::vector<size_t>{{25, 49}}) {
714 auto tester = AvgPoolMicrokernelTester()
715 .mr(9)
716 .qr(8)
717 .iterations(3);
718 for (size_t kh = 1; kh <= ks; kh++) {
719 for (size_t kw = 1; kw <= ks; kw++) {
720 if (kh * kw == ks) {
721 for (size_t kc = 1; kc < 8; kc++) {
722 tester
723 .kh(kh)
724 .kw(kw)
725 .kc(kc)
726 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
727 }
728 }
729 }
730 }
731 }
732 }
733
734 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_multipass_subtile) {
735 TEST_REQUIRES_ARM_NEON;
736 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
737 auto tester = AvgPoolMicrokernelTester()
738 .mr(9)
739 .qr(8)
740 .iterations(3);
741 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
742 for (size_t kc = 1; kc < 8; kc++) {
743 tester
744 .kc(kc)
745 .kh(ks)
746 .kw(1)
747 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
748 tester
749 .kc(kc)
750 .kh(1)
751 .kw(ks)
752 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
753 }
754 }
755 }
756 }
757
758 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_multipass_fulltile_with_x_stride) {
759 TEST_REQUIRES_ARM_NEON;
760 for (size_t ks : std::vector<size_t>{{25, 49}}) {
761 auto tester = AvgPoolMicrokernelTester()
762 .mr(9)
763 .qr(8)
764 .iterations(3);
765 for (size_t kh = 1; kh <= ks; kh++) {
766 for (size_t kw = 1; kw <= ks; kw++) {
767 if (kh * kw == ks) {
768 for (size_t kc = 1; kc < 8; kc++) {
769 tester
770 .kh(kh)
771 .kw(kw)
772 .kc(kc)
773 .x_stride(23)
774 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
775 }
776 }
777 }
778 }
779 }
780 }
781
782 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_twopass_fulltile) {
783 TEST_REQUIRES_ARM_NEON;
784 auto tester = AvgPoolMicrokernelTester()
785 .mr(9)
786 .qr(8)
787 .iterations(3);
788 const size_t ks = tester.mr() + tester.qr();
789 for (size_t kh = 1; kh <= ks; kh++) {
790 for (size_t kw = 1; kw <= ks; kw++) {
791 if (kh * kw == ks) {
792 for (size_t kc = 9; kc < 16; kc++) {
793 tester
794 .kh(kh)
795 .kw(kw)
796 .kc(kc)
797 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
798 }
799 }
800 }
801 }
802 }
803
804 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_twopass_subtile) {
805 TEST_REQUIRES_ARM_NEON;
806 auto tester = AvgPoolMicrokernelTester()
807 .mr(9)
808 .qr(8)
809 .iterations(3);
810 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
811 for (size_t kc = 9; kc < 16; kc++) {
812 tester
813 .kc(kc)
814 .kh(ks)
815 .kw(1)
816 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
817 tester
818 .kc(kc)
819 .kh(1)
820 .kw(ks)
821 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
822 }
823 }
824 }
825
826 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_twopass_fulltile_with_x_stride) {
827 TEST_REQUIRES_ARM_NEON;
828 auto tester = AvgPoolMicrokernelTester()
829 .mr(9)
830 .qr(8)
831 .iterations(3);
832 const size_t ks = tester.mr() + tester.qr();
833 for (size_t kh = 1; kh <= ks; kh++) {
834 for (size_t kw = 1; kw <= ks; kw++) {
835 if (kh * kw == ks) {
836 for (size_t kc = 9; kc < 16; kc++) {
837 tester
838 .kh(kh)
839 .kw(kw)
840 .kc(kc)
841 .x_stride(23)
842 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
843 }
844 }
845 }
846 }
847 }
848
849 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_multipass_fulltile) {
850 TEST_REQUIRES_ARM_NEON;
851 for (size_t ks : std::vector<size_t>{{25, 49}}) {
852 auto tester = AvgPoolMicrokernelTester()
853 .mr(9)
854 .qr(8)
855 .iterations(3);
856 for (size_t kh = 1; kh <= ks; kh++) {
857 for (size_t kw = 1; kw <= ks; kw++) {
858 if (kh * kw == ks) {
859 for (size_t kc = 9; kc < 16; kc++) {
860 tester
861 .kh(kh)
862 .kw(kw)
863 .kc(kc)
864 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
865 }
866 }
867 }
868 }
869 }
870 }
871
872 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_multipass_subtile) {
873 TEST_REQUIRES_ARM_NEON;
874 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
875 auto tester = AvgPoolMicrokernelTester()
876 .mr(9)
877 .qr(8)
878 .iterations(3);
879 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
880 for (size_t kc = 9; kc < 16; kc++) {
881 tester
882 .kc(kc)
883 .kh(ks)
884 .kw(1)
885 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
886 tester
887 .kc(kc)
888 .kh(1)
889 .kw(ks)
890 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
891 }
892 }
893 }
894 }
895
896 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_multipass_fulltile_with_x_stride) {
897 TEST_REQUIRES_ARM_NEON;
898 for (size_t ks : std::vector<size_t>{{25, 49}}) {
899 auto tester = AvgPoolMicrokernelTester()
900 .mr(9)
901 .qr(8)
902 .iterations(3);
903 for (size_t kh = 1; kh <= ks; kh++) {
904 for (size_t kw = 1; kw <= ks; kw++) {
905 if (kh * kw == ks) {
906 for (size_t kc = 9; kc < 16; kc++) {
907 tester
908 .kh(kh)
909 .kw(kw)
910 .kc(kc)
911 .x_stride(23)
912 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
913 }
914 }
915 }
916 }
917 }
918 }
919
920 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_x_scale) {
921 TEST_REQUIRES_ARM_NEON;
922 for (size_t n = 1; n <= 5; n += 2) {
923 for (size_t kc = 8; kc < 128; kc += 24) {
924 for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
925 AvgPoolMicrokernelTester()
926 .mr(9)
927 .qr(8)
928 .n(n)
929 .kh(5)
930 .kw(5)
931 .kc(kc)
932 .x_scale(x_scale)
933 .iterations(1)
934 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
935 }
936 }
937 }
938 }
939
940 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_x_zero_point) {
941 TEST_REQUIRES_ARM_NEON;
942 for (size_t n = 1; n <= 5; n += 2) {
943 for (size_t kc = 8; kc < 128; kc += 24) {
944 for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
945 AvgPoolMicrokernelTester()
946 .mr(9)
947 .qr(8)
948 .n(n)
949 .kh(5)
950 .kw(5)
951 .kc(kc)
952 .x_zero_point(uint8_t(x_zero_point))
953 .iterations(1)
954 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
955 }
956 }
957 }
958 }
959
960 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_y_scale) {
961 TEST_REQUIRES_ARM_NEON;
962 for (size_t n = 1; n <= 5; n += 2) {
963 for (size_t kc = 8; kc < 128; kc += 24) {
964 for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
965 AvgPoolMicrokernelTester()
966 .mr(9)
967 .qr(8)
968 .n(n)
969 .kh(5)
970 .kw(5)
971 .kc(kc)
972 .y_scale(y_scale)
973 .iterations(1)
974 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
975 }
976 }
977 }
978 }
979
980 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_y_zero_point) {
981 TEST_REQUIRES_ARM_NEON;
982 for (size_t n = 1; n <= 5; n += 2) {
983 for (size_t kc = 8; kc < 128; kc += 24) {
984 for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
985 AvgPoolMicrokernelTester()
986 .mr(9)
987 .qr(8)
988 .n(n)
989 .kh(5)
990 .kw(5)
991 .kc(kc)
992 .y_zero_point(uint8_t(y_zero_point))
993 .iterations(1)
994 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
995 }
996 }
997 }
998 }
999
1000 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_qmax) {
1001 TEST_REQUIRES_ARM_NEON;
1002 for (size_t n = 1; n <= 5; n += 2) {
1003 for (size_t kc = 8; kc < 128; kc += 24) {
1004 AvgPoolMicrokernelTester()
1005 .mr(9)
1006 .qr(8)
1007 .n(n)
1008 .kh(5)
1009 .kw(5)
1010 .kc(kc)
1011 .x_zero_point(128)
1012 .y_zero_point(128)
1013 .x_scale(1.0f)
1014 .y_scale(1.0f)
1015 .qmax(128)
1016 .iterations(3)
1017 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1018 }
1019 }
1020 }
1021
1022 TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_qmin) {
1023 TEST_REQUIRES_ARM_NEON;
1024 for (size_t n = 1; n <= 5; n += 2) {
1025 for (size_t kc = 8; kc < 128; kc += 24) {
1026 AvgPoolMicrokernelTester()
1027 .mr(9)
1028 .qr(8)
1029 .n(n)
1030 .kh(5)
1031 .kw(5)
1032 .kc(kc)
1033 .x_zero_point(128)
1034 .y_zero_point(128)
1035 .x_scale(1.0f)
1036 .y_scale(1.0f)
1037 .qmin(128)
1038 .iterations(3)
1039 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1040 }
1041 }
1042 }
1043
1044 TEST(Q8_AVGPOOL_MP9P8Q__NEON, small_n) {
1045 TEST_REQUIRES_ARM_NEON;
1046 for (size_t n = 2; n < 5; n++) {
1047 for (size_t ks : std::vector<size_t>{{5, 7}}) {
1048 for (size_t kc = 8; kc < 25; kc += 5) {
1049 AvgPoolMicrokernelTester()
1050 .mr(9)
1051 .qr(8)
1052 .n(n)
1053 .kh(ks)
1054 .kw(ks)
1055 .kc(kc)
1056 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1057 }
1058 }
1059 }
1060 }
1061
1062 TEST(Q8_AVGPOOL_MP9P8Q__NEON, small_n_with_x_stride) {
1063 TEST_REQUIRES_ARM_NEON;
1064 for (size_t n = 2; n < 5; n++) {
1065 for (size_t ks : std::vector<size_t>{{5, 7}}) {
1066 for (size_t kc = 8; kc < 25; kc += 5) {
1067 AvgPoolMicrokernelTester()
1068 .mr(9)
1069 .qr(8)
1070 .n(n)
1071 .kh(ks)
1072 .kw(ks)
1073 .kc(kc)
1074 .x_stride(29)
1075 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1076 }
1077 }
1078 }
1079 }
1080
1081 TEST(Q8_AVGPOOL_MP9P8Q__NEON, small_n_with_y_stride) {
1082 TEST_REQUIRES_ARM_NEON;
1083 for (size_t n = 2; n < 5; n++) {
1084 for (size_t ks : std::vector<size_t>{{5, 7}}) {
1085 for (size_t kc = 8; kc < 25; kc += 5) {
1086 AvgPoolMicrokernelTester()
1087 .mr(9)
1088 .qr(8)
1089 .n(n)
1090 .kh(ks)
1091 .kw(ks)
1092 .kc(kc)
1093 .y_stride(31)
1094 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1095 }
1096 }
1097 }
1098 }
1099
1100 TEST(Q8_AVGPOOL_MP9P8Q__NEON, small_n_with_s) {
1101 TEST_REQUIRES_ARM_NEON;
1102 for (size_t n = 2; n < 5; n++) {
1103 for (size_t ks : std::vector<size_t>{{5, 7}}) {
1104 for (size_t s = 2; s <= 5; s++) {
1105 for (size_t kc = 8; kc < 25; kc += 5) {
1106 AvgPoolMicrokernelTester()
1107 .mr(9)
1108 .qr(8)
1109 .n(n)
1110 .kh(ks)
1111 .kw(ks)
1112 .kc(kc)
1113 .s(s)
1114 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1115 }
1116 }
1117 }
1118 }
1119 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001120#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001121
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001122#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001123 TEST(Q8_AVGPOOL_UP9__SSE2, kc_eq_8_fulltile) {
1124 TEST_REQUIRES_X86_SSE2;
1125 auto tester = AvgPoolMicrokernelTester()
1126 .mr(9)
1127 .kc(8);
1128 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1129 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1130 if (kh * kw == tester.mr()) {
1131 tester
1132 .kh(kh)
1133 .kw(kw)
1134 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1135 }
1136 }
1137 }
1138 }
1139
1140 TEST(Q8_AVGPOOL_UP9__SSE2, kc_eq_8_subtile) {
1141 TEST_REQUIRES_X86_SSE2;
1142 auto tester = AvgPoolMicrokernelTester()
1143 .mr(9)
1144 .kc(8);
1145 for (size_t ks = 2; ks < tester.mr(); ks++) {
1146 for (size_t kh = 1; kh <= ks; kh++) {
1147 for (size_t kw = 1; kw <= ks; kw++) {
1148 if (kh * kw == ks) {
1149 tester
1150 .kh(kh)
1151 .kw(kw)
1152 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1153 }
1154 }
1155 }
1156 }
1157 }
1158
1159 TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_fulltile) {
1160 TEST_REQUIRES_X86_SSE2;
1161 auto tester = AvgPoolMicrokernelTester()
1162 .mr(9);
1163 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1164 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1165 if (kh * kw == tester.mr()) {
1166 for (size_t kc = 8; kc < 128; kc += 24) {
1167 tester
1168 .kh(kh)
1169 .kw(kw)
1170 .kc(kc)
1171 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1172 }
1173 }
1174 }
1175 }
1176 }
1177
1178 TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_subtile) {
1179 TEST_REQUIRES_X86_SSE2;
1180 auto tester = AvgPoolMicrokernelTester()
1181 .mr(9)
1182 .iterations(3);
1183 for (size_t ks = 2; ks < tester.mr(); ks++) {
1184 for (size_t kh = 1; kh <= ks; kh++) {
1185 for (size_t kw = 1; kw <= ks; kw++) {
1186 if (kh * kw == ks) {
1187 for (size_t kc = 8; kc < 128; kc += 24) {
1188 tester
1189 .kh(kh)
1190 .kw(kw)
1191 .kc(kc)
1192 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1193 }
1194 }
1195 }
1196 }
1197 }
1198 }
1199
1200 TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_fulltile_with_x_stride) {
1201 TEST_REQUIRES_X86_SSE2;
1202 auto tester = AvgPoolMicrokernelTester()
1203 .mr(9)
1204 .iterations(3);
1205 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1206 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1207 if (kh * kw == tester.mr()) {
1208 for (size_t kc = 8; kc < 128; kc += 24) {
1209 tester
1210 .kh(kh)
1211 .kw(kw)
1212 .kc(kc)
1213 .x_stride(131)
1214 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1215 }
1216 }
1217 }
1218 }
1219 }
1220
1221 TEST(Q8_AVGPOOL_UP9__SSE2, kc_lt_8_fulltile) {
1222 TEST_REQUIRES_X86_SSE2;
1223 auto tester = AvgPoolMicrokernelTester()
1224 .mr(9);
1225 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1226 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1227 if (kh * kw == tester.mr()) {
1228 for (size_t kc = 1; kc < 8; kc++) {
1229 tester
1230 .kh(kh)
1231 .kw(kw)
1232 .kc(kc)
1233 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1234 }
1235 }
1236 }
1237 }
1238 }
1239
1240 TEST(Q8_AVGPOOL_UP9__SSE2, kc_lt_8_subtile) {
1241 TEST_REQUIRES_X86_SSE2;
1242 auto tester = AvgPoolMicrokernelTester()
1243 .mr(9)
1244 .iterations(3);
1245 for (size_t ks = 2; ks < tester.mr(); ks++) {
1246 for (size_t kh = 1; kh <= ks; kh++) {
1247 for (size_t kw = 1; kw <= ks; kw++) {
1248 if (kh * kw == ks) {
1249 for (size_t kc = 1; kc < 8; kc++) {
1250 tester
1251 .kh(kh)
1252 .kw(kw)
1253 .kc(kc)
1254 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1255 }
1256 }
1257 }
1258 }
1259 }
1260 }
1261
1262 TEST(Q8_AVGPOOL_UP9__SSE2, kc_lt_8_fulltile_with_x_stride) {
1263 TEST_REQUIRES_X86_SSE2;
1264 auto tester = AvgPoolMicrokernelTester()
1265 .mr(9)
1266 .iterations(3);
1267 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1268 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1269 if (kh * kw == tester.mr()) {
1270 for (size_t kc = 1; kc < 8; kc++) {
1271 tester
1272 .kh(kh)
1273 .kw(kw)
1274 .kc(kc)
1275 .x_stride(23)
1276 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1277 }
1278 }
1279 }
1280 }
1281 }
1282
1283 TEST(Q8_AVGPOOL_UP9__SSE2, kc_gt_8_fulltile) {
1284 TEST_REQUIRES_X86_SSE2;
1285 auto tester = AvgPoolMicrokernelTester()
1286 .mr(9);
1287 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1288 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1289 if (kh * kw == tester.mr()) {
1290 for (size_t kc = 9; kc < 16; kc++) {
1291 tester
1292 .kh(kh)
1293 .kw(kw)
1294 .kc(kc)
1295 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1296 }
1297 }
1298 }
1299 }
1300 }
1301
1302 TEST(Q8_AVGPOOL_UP9__SSE2, kc_gt_8_subtile) {
1303 TEST_REQUIRES_X86_SSE2;
1304 auto tester = AvgPoolMicrokernelTester()
1305 .mr(9)
1306 .iterations(3);
1307 for (size_t ks = 2; ks < tester.mr(); ks++) {
1308 for (size_t kh = 1; kh <= ks; kh++) {
1309 for (size_t kw = 1; kw <= ks; kw++) {
1310 if (kh * kw == ks) {
1311 for (size_t kc = 9; kc < 16; kc++) {
1312 tester
1313 .kh(kh)
1314 .kw(kw)
1315 .kc(kc)
1316 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1317 }
1318 }
1319 }
1320 }
1321 }
1322 }
1323
1324 TEST(Q8_AVGPOOL_UP9__SSE2, kc_gt_8_fulltile_with_x_stride) {
1325 TEST_REQUIRES_X86_SSE2;
1326 auto tester = AvgPoolMicrokernelTester()
1327 .mr(9)
1328 .iterations(3);
1329 for (size_t kh = 1; kh <= tester.mr(); kh++) {
1330 for (size_t kw = 1; kw <= tester.mr(); kw++) {
1331 if (kh * kw == tester.mr()) {
1332 for (size_t kc = 9; kc < 16; kc++) {
1333 tester
1334 .kh(kh)
1335 .kw(kw)
1336 .kc(kc)
1337 .x_stride(23)
1338 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1339 }
1340 }
1341 }
1342 }
1343 }
1344
1345 TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_x_scale) {
1346 TEST_REQUIRES_X86_SSE2;
1347 for (size_t n = 1; n <= 5; n += 2) {
1348 for (size_t kc = 8; kc < 128; kc += 24) {
1349 for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
1350 AvgPoolMicrokernelTester()
1351 .mr(9)
1352 .n(n)
1353 .kh(3)
1354 .kw(3)
1355 .kc(kc)
1356 .x_scale(x_scale)
1357 .iterations(2)
1358 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1359 }
1360 }
1361 }
1362 }
1363
1364 TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_x_zero_point) {
1365 TEST_REQUIRES_X86_SSE2;
1366 for (size_t n = 1; n <= 5; n += 2) {
1367 for (size_t kc = 8; kc < 128; kc += 24) {
1368 for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
1369 AvgPoolMicrokernelTester()
1370 .mr(9)
1371 .n(n)
1372 .kh(3)
1373 .kw(3)
1374 .kc(kc)
1375 .x_zero_point(uint8_t(x_zero_point))
1376 .iterations(3)
1377 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1378 }
1379 }
1380 }
1381 }
1382
1383 TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_y_scale) {
1384 TEST_REQUIRES_X86_SSE2;
1385 for (size_t n = 1; n <= 5; n += 2) {
1386 for (size_t kc = 8; kc < 128; kc += 24) {
1387 for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
1388 AvgPoolMicrokernelTester()
1389 .mr(9)
1390 .n(n)
1391 .kh(3)
1392 .kw(3)
1393 .kc(kc)
1394 .y_scale(y_scale)
1395 .iterations(2)
1396 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1397 }
1398 }
1399 }
1400 }
1401
1402 TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_y_zero_point) {
1403 TEST_REQUIRES_X86_SSE2;
1404 for (size_t n = 1; n <= 5; n += 2) {
1405 for (size_t kc = 8; kc < 128; kc += 24) {
1406 for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
1407 AvgPoolMicrokernelTester()
1408 .mr(9)
1409 .n(n)
1410 .kh(3)
1411 .kw(3)
1412 .kc(kc)
1413 .y_zero_point(uint8_t(y_zero_point))
1414 .iterations(3)
1415 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1416 }
1417 }
1418 }
1419 }
1420
1421 TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_qmax) {
1422 TEST_REQUIRES_X86_SSE2;
1423 for (size_t n = 1; n <= 5; n += 2) {
1424 for (size_t kc = 8; kc < 128; kc += 24) {
1425 AvgPoolMicrokernelTester()
1426 .mr(9)
1427 .n(n)
1428 .kh(3)
1429 .kw(3)
1430 .kc(kc)
1431 .x_zero_point(128)
1432 .y_zero_point(128)
1433 .x_scale(1.0f)
1434 .y_scale(1.0f)
1435 .qmax(128)
1436 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1437 }
1438 }
1439 }
1440
1441 TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_qmin) {
1442 TEST_REQUIRES_X86_SSE2;
1443 for (size_t n = 1; n <= 5; n += 2) {
1444 for (size_t kc = 8; kc < 128; kc += 24) {
1445 AvgPoolMicrokernelTester()
1446 .mr(9)
1447 .n(n)
1448 .kh(3)
1449 .kw(3)
1450 .kc(kc)
1451 .x_zero_point(128)
1452 .y_zero_point(128)
1453 .x_scale(1.0f)
1454 .y_scale(1.0f)
1455 .qmin(128)
1456 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1457 }
1458 }
1459 }
1460
1461 TEST(Q8_AVGPOOL_UP9__SSE2, small_n) {
1462 TEST_REQUIRES_X86_SSE2;
1463 for (size_t n = 2; n < 5; n++) {
1464 for (size_t ks : std::vector<size_t>{{2, 3}}) {
1465 for (size_t kc = 8; kc < 25; kc += 5) {
1466 AvgPoolMicrokernelTester()
1467 .mr(9)
1468 .n(n)
1469 .kh(ks)
1470 .kw(ks)
1471 .kc(kc)
1472 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1473 }
1474 }
1475 }
1476 }
1477
1478 TEST(Q8_AVGPOOL_UP9__SSE2, small_n_with_x_stride) {
1479 TEST_REQUIRES_X86_SSE2;
1480 for (size_t n = 2; n < 5; n++) {
1481 for (size_t ks : std::vector<size_t>{{2, 3}}) {
1482 for (size_t kc = 8; kc < 25; kc += 5) {
1483 AvgPoolMicrokernelTester()
1484 .mr(9)
1485 .n(n)
1486 .kh(ks)
1487 .kw(ks)
1488 .kc(kc)
1489 .x_stride(29)
1490 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1491 }
1492 }
1493 }
1494 }
1495
1496 TEST(Q8_AVGPOOL_UP9__SSE2, small_n_with_y_stride) {
1497 TEST_REQUIRES_X86_SSE2;
1498 for (size_t n = 2; n < 5; n++) {
1499 for (size_t ks : std::vector<size_t>{{2, 3}}) {
1500 for (size_t kc = 8; kc < 25; kc += 5) {
1501 AvgPoolMicrokernelTester()
1502 .mr(9)
1503 .n(n)
1504 .kh(ks)
1505 .kw(ks)
1506 .kc(kc)
1507 .y_stride(31)
1508 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1509 }
1510 }
1511 }
1512 }
1513
1514 TEST(Q8_AVGPOOL_UP9__SSE2, small_n_with_s) {
1515 TEST_REQUIRES_X86_SSE2;
1516 for (size_t n = 2; n < 5; n++) {
1517 for (size_t ks : std::vector<size_t>{{2, 3}}) {
1518 for (size_t kc = 8; kc < 25; kc += 5) {
1519 for (size_t s = 2; s <= ks; s++) {
1520 AvgPoolMicrokernelTester()
1521 .mr(9)
1522 .n(n)
1523 .kh(ks)
1524 .kw(ks)
1525 .kc(kc)
1526 .s(s)
1527 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1528 }
1529 }
1530 }
1531 }
1532 }
1533
1534 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_eq_8_twopass_fulltile) {
1535 TEST_REQUIRES_X86_SSE2;
1536 auto tester = AvgPoolMicrokernelTester()
1537 .mr(9)
1538 .qr(8)
1539 .kc(8);
1540 const size_t ks = tester.mr() + tester.qr();
1541 for (size_t kh = 1; kh <= ks; kh++) {
1542 for (size_t kw = 1; kw <= ks; kw++) {
1543 if (kh * kw == ks) {
1544 tester
1545 .kh(kh)
1546 .kw(kw)
1547 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1548 }
1549 }
1550 }
1551 }
1552
1553 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_eq_8_twopass_subtile) {
1554 TEST_REQUIRES_X86_SSE2;
1555 auto tester = AvgPoolMicrokernelTester()
1556 .mr(9)
1557 .qr(8)
1558 .kc(8);
1559 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1560 tester
1561 .kh(ks)
1562 .kw(1)
1563 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1564 tester
1565 .kh(1)
1566 .kw(ks)
1567 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1568 }
1569 }
1570
1571 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_eq_8_multipass_fulltile) {
1572 TEST_REQUIRES_X86_SSE2;
1573 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1574 auto tester = AvgPoolMicrokernelTester()
1575 .mr(9)
1576 .qr(8)
1577 .kc(8);
1578 for (size_t kh = 1; kh <= ks; kh++) {
1579 for (size_t kw = 1; kw <= ks; kw++) {
1580 if (kh * kw == ks) {
1581 tester
1582 .kh(kh)
1583 .kw(kw)
1584 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1585 }
1586 }
1587 }
1588 }
1589 }
1590
1591 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_eq_8_multipass_subtile) {
1592 TEST_REQUIRES_X86_SSE2;
1593 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1594 auto tester = AvgPoolMicrokernelTester()
1595 .mr(9)
1596 .qr(8)
1597 .kc(8);
1598 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1599 tester
1600 .kh(ks)
1601 .kw(1)
1602 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1603 tester
1604 .kh(1)
1605 .kw(ks)
1606 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1607 }
1608 }
1609 }
1610
1611 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_twopass_fulltile) {
1612 TEST_REQUIRES_X86_SSE2;
1613 auto tester = AvgPoolMicrokernelTester()
1614 .mr(9)
1615 .qr(8)
1616 .iterations(3);
1617 const size_t ks = 17;
1618 for (size_t kc = 8; kc < 128; kc += 24) {
1619 tester
1620 .kc(kc)
1621 .kh(ks)
1622 .kw(1)
1623 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1624 tester
1625 .kc(kc)
1626 .kh(1)
1627 .kw(ks)
1628 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1629 }
1630 }
1631
1632 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_twopass_subtile) {
1633 TEST_REQUIRES_X86_SSE2;
1634 auto tester = AvgPoolMicrokernelTester()
1635 .mr(9)
1636 .qr(8)
1637 .iterations(3);
1638 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1639 for (size_t kc = 8; kc < 128; kc += 24) {
1640 tester
1641 .kc(kc)
1642 .kh(ks)
1643 .kw(1)
1644 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1645 tester
1646 .kc(kc)
1647 .kh(1)
1648 .kw(ks)
1649 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1650 }
1651 }
1652 }
1653
1654 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_twopass_fulltile_with_x_stride) {
1655 TEST_REQUIRES_X86_SSE2;
1656 auto tester = AvgPoolMicrokernelTester()
1657 .mr(9)
1658 .qr(8)
1659 .iterations(3);
1660 const size_t ks = tester.mr() + tester.qr();
1661 for (size_t kh = 1; kh <= ks; kh++) {
1662 for (size_t kw = 1; kw <= ks; kw++) {
1663 if (kh * kw == ks) {
1664 for (size_t kc = 8; kc < 128; kc += 24) {
1665 tester
1666 .kh(kh)
1667 .kw(kw)
1668 .kc(kc)
1669 .x_stride(131)
1670 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1671 }
1672 }
1673 }
1674 }
1675 }
1676
1677 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_multipass_fulltile) {
1678 TEST_REQUIRES_X86_SSE2;
1679 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1680 auto tester = AvgPoolMicrokernelTester()
1681 .mr(9)
1682 .qr(8)
1683 .iterations(3);
1684 for (size_t kh = 1; kh <= ks; kh++) {
1685 for (size_t kw = 1; kw <= ks; kw++) {
1686 if (kh * kw == ks) {
1687 for (size_t kc = 8; kc < 128; kc += 24) {
1688 tester
1689 .kh(kh)
1690 .kw(kw)
1691 .kc(kc)
1692 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1693 }
1694 }
1695 }
1696 }
1697 }
1698 }
1699
1700 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_multipass_subtile) {
1701 TEST_REQUIRES_X86_SSE2;
1702 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1703 auto tester = AvgPoolMicrokernelTester()
1704 .mr(9)
1705 .qr(8)
1706 .iterations(3);
1707 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1708 for (size_t kc = 8; kc < 128; kc += 24) {
1709 tester
1710 .kc(kc)
1711 .kh(ks)
1712 .kw(1)
1713 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1714 tester
1715 .kc(kc)
1716 .kh(1)
1717 .kw(ks)
1718 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1719 }
1720 }
1721 }
1722 }
1723
1724 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_multipass_fulltile_with_x_stride) {
1725 TEST_REQUIRES_X86_SSE2;
1726 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1727 auto tester = AvgPoolMicrokernelTester()
1728 .mr(9)
1729 .qr(8)
1730 .iterations(3);
1731 for (size_t kh = 1; kh <= ks; kh++) {
1732 for (size_t kw = 1; kw <= ks; kw++) {
1733 if (kh * kw == ks) {
1734 for (size_t kc = 8; kc < 128; kc += 24) {
1735 tester
1736 .kh(kh)
1737 .kw(kw)
1738 .kc(kc)
1739 .x_stride(131)
1740 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1741 }
1742 }
1743 }
1744 }
1745 }
1746 }
1747
1748 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_twopass_fulltile) {
1749 TEST_REQUIRES_X86_SSE2;
1750 auto tester = AvgPoolMicrokernelTester()
1751 .mr(9)
1752 .qr(8)
1753 .iterations(3);
1754 const size_t ks = tester.mr() + tester.qr();
1755 for (size_t kh = 1; kh <= ks; kh++) {
1756 for (size_t kw = 1; kw <= ks; kw++) {
1757 if (kh * kw == ks) {
1758 for (size_t kc = 1; kc < 8; kc++) {
1759 tester
1760 .kh(kh)
1761 .kw(kw)
1762 .kc(kc)
1763 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1764 }
1765 }
1766 }
1767 }
1768 }
1769
1770 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_twopass_subtile) {
1771 TEST_REQUIRES_X86_SSE2;
1772 auto tester = AvgPoolMicrokernelTester()
1773 .mr(9)
1774 .qr(8)
1775 .iterations(3);
1776 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1777 for (size_t kc = 1; kc < 8; kc++) {
1778 tester
1779 .kc(kc)
1780 .kh(ks)
1781 .kw(1)
1782 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1783 tester
1784 .kc(kc)
1785 .kh(1)
1786 .kw(ks)
1787 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1788 }
1789 }
1790 }
1791
1792 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_twopass_fulltile_with_x_stride) {
1793 TEST_REQUIRES_X86_SSE2;
1794 auto tester = AvgPoolMicrokernelTester()
1795 .mr(9)
1796 .qr(8)
1797 .iterations(3);
1798 const size_t ks = tester.mr() + tester.qr();
1799 for (size_t kh = 1; kh <= ks; kh++) {
1800 for (size_t kw = 1; kw <= ks; kw++) {
1801 if (kh * kw == ks) {
1802 for (size_t kc = 1; kc < 8; kc++) {
1803 tester
1804 .kh(kh)
1805 .kw(kw)
1806 .kc(kc)
1807 .x_stride(23)
1808 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1809 }
1810 }
1811 }
1812 }
1813 }
1814
1815 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_multipass_fulltile) {
1816 TEST_REQUIRES_X86_SSE2;
1817 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1818 auto tester = AvgPoolMicrokernelTester()
1819 .mr(9)
1820 .qr(8)
1821 .iterations(3);
1822 for (size_t kh = 1; kh <= ks; kh++) {
1823 for (size_t kw = 1; kw <= ks; kw++) {
1824 if (kh * kw == ks) {
1825 for (size_t kc = 1; kc < 8; kc++) {
1826 tester
1827 .kh(kh)
1828 .kw(kw)
1829 .kc(kc)
1830 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1831 }
1832 }
1833 }
1834 }
1835 }
1836 }
1837
1838 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_multipass_subtile) {
1839 TEST_REQUIRES_X86_SSE2;
1840 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1841 auto tester = AvgPoolMicrokernelTester()
1842 .mr(9)
1843 .qr(8)
1844 .iterations(3);
1845 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1846 for (size_t kc = 1; kc < 8; kc++) {
1847 tester
1848 .kc(kc)
1849 .kh(ks)
1850 .kw(1)
1851 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1852 tester
1853 .kc(kc)
1854 .kh(1)
1855 .kw(ks)
1856 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1857 }
1858 }
1859 }
1860 }
1861
1862 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_multipass_fulltile_with_x_stride) {
1863 TEST_REQUIRES_X86_SSE2;
1864 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1865 auto tester = AvgPoolMicrokernelTester()
1866 .mr(9)
1867 .qr(8)
1868 .iterations(3);
1869 for (size_t kh = 1; kh <= ks; kh++) {
1870 for (size_t kw = 1; kw <= ks; kw++) {
1871 if (kh * kw == ks) {
1872 for (size_t kc = 1; kc < 8; kc++) {
1873 tester
1874 .kh(kh)
1875 .kw(kw)
1876 .kc(kc)
1877 .x_stride(23)
1878 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1879 }
1880 }
1881 }
1882 }
1883 }
1884 }
1885
1886 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_twopass_fulltile) {
1887 TEST_REQUIRES_X86_SSE2;
1888 auto tester = AvgPoolMicrokernelTester()
1889 .mr(9)
1890 .qr(8)
1891 .iterations(3);
1892 const size_t ks = tester.mr() + tester.qr();
1893 for (size_t kh = 1; kh <= ks; kh++) {
1894 for (size_t kw = 1; kw <= ks; kw++) {
1895 if (kh * kw == ks) {
1896 for (size_t kc = 9; kc < 16; kc++) {
1897 tester
1898 .kh(kh)
1899 .kw(kw)
1900 .kc(kc)
1901 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1902 }
1903 }
1904 }
1905 }
1906 }
1907
1908 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_twopass_subtile) {
1909 TEST_REQUIRES_X86_SSE2;
1910 auto tester = AvgPoolMicrokernelTester()
1911 .mr(9)
1912 .qr(8)
1913 .iterations(3);
1914 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1915 for (size_t kc = 9; kc < 16; kc++) {
1916 tester
1917 .kc(kc)
1918 .kh(ks)
1919 .kw(1)
1920 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1921 tester
1922 .kc(kc)
1923 .kh(1)
1924 .kw(ks)
1925 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1926 }
1927 }
1928 }
1929
1930 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_twopass_fulltile_with_x_stride) {
1931 TEST_REQUIRES_X86_SSE2;
1932 auto tester = AvgPoolMicrokernelTester()
1933 .mr(9)
1934 .qr(8)
1935 .iterations(3);
1936 const size_t ks = tester.mr() + tester.qr();
1937 for (size_t kh = 1; kh <= ks; kh++) {
1938 for (size_t kw = 1; kw <= ks; kw++) {
1939 if (kh * kw == ks) {
1940 for (size_t kc = 9; kc < 16; kc++) {
1941 tester
1942 .kh(kh)
1943 .kw(kw)
1944 .kc(kc)
1945 .x_stride(23)
1946 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1947 }
1948 }
1949 }
1950 }
1951 }
1952
1953 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_multipass_fulltile) {
1954 TEST_REQUIRES_X86_SSE2;
1955 for (size_t ks : std::vector<size_t>{{25, 49}}) {
1956 auto tester = AvgPoolMicrokernelTester()
1957 .mr(9)
1958 .qr(8)
1959 .iterations(3);
1960 for (size_t kh = 1; kh <= ks; kh++) {
1961 for (size_t kw = 1; kw <= ks; kw++) {
1962 if (kh * kw == ks) {
1963 for (size_t kc = 9; kc < 16; kc++) {
1964 tester
1965 .kh(kh)
1966 .kw(kw)
1967 .kc(kc)
1968 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1969 }
1970 }
1971 }
1972 }
1973 }
1974 }
1975
1976 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_multipass_subtile) {
1977 TEST_REQUIRES_X86_SSE2;
1978 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1979 auto tester = AvgPoolMicrokernelTester()
1980 .mr(9)
1981 .qr(8)
1982 .iterations(3);
1983 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1984 for (size_t kc = 9; kc < 16; kc++) {
1985 tester
1986 .kc(kc)
1987 .kh(ks)
1988 .kw(1)
1989 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1990 tester
1991 .kc(kc)
1992 .kh(1)
1993 .kw(ks)
1994 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1995 }
1996 }
1997 }
1998 }
1999
2000 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_multipass_fulltile_with_x_stride) {
2001 TEST_REQUIRES_X86_SSE2;
2002 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2003 auto tester = AvgPoolMicrokernelTester()
2004 .mr(9)
2005 .qr(8)
2006 .iterations(3);
2007 for (size_t kh = 1; kh <= ks; kh++) {
2008 for (size_t kw = 1; kw <= ks; kw++) {
2009 if (kh * kw == ks) {
2010 for (size_t kc = 9; kc < 16; kc++) {
2011 tester
2012 .kh(kh)
2013 .kw(kw)
2014 .kc(kc)
2015 .x_stride(23)
2016 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2017 }
2018 }
2019 }
2020 }
2021 }
2022 }
2023
2024 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_x_scale) {
2025 TEST_REQUIRES_X86_SSE2;
2026 for (size_t n = 1; n <= 5; n += 2) {
2027 for (size_t kc = 8; kc < 128; kc += 24) {
2028 for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
2029 AvgPoolMicrokernelTester()
2030 .mr(9)
2031 .qr(8)
2032 .n(n)
2033 .kh(5)
2034 .kw(5)
2035 .kc(kc)
2036 .x_scale(x_scale)
2037 .iterations(1)
2038 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2039 }
2040 }
2041 }
2042 }
2043
2044 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_x_zero_point) {
2045 TEST_REQUIRES_X86_SSE2;
2046 for (size_t n = 1; n <= 5; n += 2) {
2047 for (size_t kc = 8; kc < 128; kc += 24) {
2048 for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
2049 AvgPoolMicrokernelTester()
2050 .mr(9)
2051 .qr(8)
2052 .n(n)
2053 .kh(5)
2054 .kw(5)
2055 .kc(kc)
2056 .x_zero_point(uint8_t(x_zero_point))
2057 .iterations(1)
2058 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2059 }
2060 }
2061 }
2062 }
2063
2064 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_y_scale) {
2065 TEST_REQUIRES_X86_SSE2;
2066 for (size_t n = 1; n <= 5; n += 2) {
2067 for (size_t kc = 8; kc < 128; kc += 24) {
2068 for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
2069 AvgPoolMicrokernelTester()
2070 .mr(9)
2071 .qr(8)
2072 .n(n)
2073 .kh(5)
2074 .kw(5)
2075 .kc(kc)
2076 .y_scale(y_scale)
2077 .iterations(1)
2078 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2079 }
2080 }
2081 }
2082 }
2083
2084 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_y_zero_point) {
2085 TEST_REQUIRES_X86_SSE2;
2086 for (size_t n = 1; n <= 5; n += 2) {
2087 for (size_t kc = 8; kc < 128; kc += 24) {
2088 for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
2089 AvgPoolMicrokernelTester()
2090 .mr(9)
2091 .qr(8)
2092 .n(n)
2093 .kh(5)
2094 .kw(5)
2095 .kc(kc)
2096 .y_zero_point(uint8_t(y_zero_point))
2097 .iterations(1)
2098 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2099 }
2100 }
2101 }
2102 }
2103
2104 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_qmax) {
2105 TEST_REQUIRES_X86_SSE2;
2106 for (size_t n = 1; n <= 5; n += 2) {
2107 for (size_t kc = 8; kc < 128; kc += 24) {
2108 AvgPoolMicrokernelTester()
2109 .mr(9)
2110 .qr(8)
2111 .n(n)
2112 .kh(5)
2113 .kw(5)
2114 .kc(kc)
2115 .x_zero_point(128)
2116 .y_zero_point(128)
2117 .x_scale(1.0f)
2118 .y_scale(1.0f)
2119 .qmax(128)
2120 .iterations(3)
2121 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2122 }
2123 }
2124 }
2125
2126 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_qmin) {
2127 TEST_REQUIRES_X86_SSE2;
2128 for (size_t n = 1; n <= 5; n += 2) {
2129 for (size_t kc = 8; kc < 128; kc += 24) {
2130 AvgPoolMicrokernelTester()
2131 .mr(9)
2132 .qr(8)
2133 .n(n)
2134 .kh(5)
2135 .kw(5)
2136 .kc(kc)
2137 .x_zero_point(128)
2138 .y_zero_point(128)
2139 .x_scale(1.0f)
2140 .y_scale(1.0f)
2141 .qmin(128)
2142 .iterations(3)
2143 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2144 }
2145 }
2146 }
2147
2148 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, small_n) {
2149 TEST_REQUIRES_X86_SSE2;
2150 for (size_t n = 2; n < 5; n++) {
2151 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2152 for (size_t kc = 8; kc < 25; kc += 5) {
2153 AvgPoolMicrokernelTester()
2154 .mr(9)
2155 .qr(8)
2156 .n(n)
2157 .kh(ks)
2158 .kw(ks)
2159 .kc(kc)
2160 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2161 }
2162 }
2163 }
2164 }
2165
2166 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, small_n_with_x_stride) {
2167 TEST_REQUIRES_X86_SSE2;
2168 for (size_t n = 2; n < 5; n++) {
2169 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2170 for (size_t kc = 8; kc < 25; kc += 5) {
2171 AvgPoolMicrokernelTester()
2172 .mr(9)
2173 .qr(8)
2174 .n(n)
2175 .kh(ks)
2176 .kw(ks)
2177 .kc(kc)
2178 .x_stride(29)
2179 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2180 }
2181 }
2182 }
2183 }
2184
2185 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, small_n_with_y_stride) {
2186 TEST_REQUIRES_X86_SSE2;
2187 for (size_t n = 2; n < 5; n++) {
2188 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2189 for (size_t kc = 8; kc < 25; kc += 5) {
2190 AvgPoolMicrokernelTester()
2191 .mr(9)
2192 .qr(8)
2193 .n(n)
2194 .kh(ks)
2195 .kw(ks)
2196 .kc(kc)
2197 .y_stride(31)
2198 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2199 }
2200 }
2201 }
2202 }
2203
2204 TEST(Q8_AVGPOOL_MP9P8Q__SSE2, small_n_with_s) {
2205 TEST_REQUIRES_X86_SSE2;
2206 for (size_t n = 2; n < 5; n++) {
2207 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2208 for (size_t s = 2; s <= 5; s++) {
2209 for (size_t kc = 8; kc < 25; kc += 5) {
2210 AvgPoolMicrokernelTester()
2211 .mr(9)
2212 .qr(8)
2213 .n(n)
2214 .kh(ks)
2215 .kw(ks)
2216 .kc(kc)
2217 .s(s)
2218 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2219 }
2220 }
2221 }
2222 }
2223 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002224#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002225
2226TEST(Q8_AVGPOOL_UP9__SCALAR, kc_eq_1_fulltile) {
2227 auto tester = AvgPoolMicrokernelTester()
2228 .mr(9)
2229 .kc(1);
2230 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2231 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2232 if (kh * kw == tester.mr()) {
2233 tester
2234 .kh(kh)
2235 .kw(kw)
2236 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2237 }
2238 }
2239 }
2240}
2241
2242TEST(Q8_AVGPOOL_UP9__SCALAR, kc_eq_1_subtile) {
2243 auto tester = AvgPoolMicrokernelTester()
2244 .mr(9)
2245 .kc(1);
2246 for (size_t ks = 2; ks < tester.mr(); ks++) {
2247 for (size_t kh = 1; kh <= ks; kh++) {
2248 for (size_t kw = 1; kw <= ks; kw++) {
2249 if (kh * kw == ks) {
2250 tester
2251 .kh(kh)
2252 .kw(kw)
2253 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2254 }
2255 }
2256 }
2257 }
2258}
2259
2260TEST(Q8_AVGPOOL_UP9__SCALAR, kc_gt_1_fulltile) {
2261 auto tester = AvgPoolMicrokernelTester()
2262 .mr(9);
2263 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2264 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2265 if (kh * kw == tester.mr()) {
2266 for (size_t kc = 2; kc < 8; kc++) {
2267 tester
2268 .kh(kh)
2269 .kw(kw)
2270 .kc(kc)
2271 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2272 }
2273 }
2274 }
2275 }
2276}
2277
2278TEST(Q8_AVGPOOL_UP9__SCALAR, kc_gt_1_subtile) {
2279 auto tester = AvgPoolMicrokernelTester()
2280 .mr(9)
2281 .iterations(3);
2282 for (size_t ks = 2; ks < tester.mr(); ks++) {
2283 for (size_t kh = 1; kh <= ks; kh++) {
2284 for (size_t kw = 1; kw <= ks; kw++) {
2285 if (kh * kw == ks) {
2286 for (size_t kc = 2; kc < 8; kc++) {
2287 tester
2288 .kh(kh)
2289 .kw(kw)
2290 .kc(kc)
2291 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2292 }
2293 }
2294 }
2295 }
2296 }
2297}
2298
2299TEST(Q8_AVGPOOL_UP9__SCALAR, kc_gt_1_fulltile_with_x_stride) {
2300 auto tester = AvgPoolMicrokernelTester()
2301 .mr(9)
2302 .iterations(3);
2303 for (size_t kh = 1; kh <= tester.mr(); kh++) {
2304 for (size_t kw = 1; kw <= tester.mr(); kw++) {
2305 if (kh * kw == tester.mr()) {
2306 for (size_t kc = 2; kc < 8; kc++) {
2307 tester
2308 .kh(kh)
2309 .kw(kw)
2310 .kc(kc)
2311 .x_stride(23)
2312 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2313 }
2314 }
2315 }
2316 }
2317}
2318
2319TEST(Q8_AVGPOOL_UP9__SCALAR, x_scale) {
2320 for (size_t n = 1; n <= 5; n += 2) {
2321 for (size_t kc = 1; kc < 8; kc += 3) {
2322 for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
2323 AvgPoolMicrokernelTester()
2324 .mr(9)
2325 .n(n)
2326 .kh(3)
2327 .kw(3)
2328 .kc(kc)
2329 .x_scale(x_scale)
2330 .iterations(2)
2331 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2332 }
2333 }
2334 }
2335}
2336
2337TEST(Q8_AVGPOOL_UP9__SCALAR, x_zero_point) {
2338 for (size_t n = 1; n <= 5; n += 2) {
2339 for (size_t kc = 1; kc < 8; kc += 3) {
2340 for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
2341 AvgPoolMicrokernelTester()
2342 .mr(9)
2343 .n(n)
2344 .kh(3)
2345 .kw(3)
2346 .kc(kc)
2347 .x_zero_point(uint8_t(x_zero_point))
2348 .iterations(3)
2349 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2350 }
2351 }
2352 }
2353}
2354
2355TEST(Q8_AVGPOOL_UP9__SCALAR, y_scale) {
2356 for (size_t n = 1; n <= 5; n += 2) {
2357 for (size_t kc = 1; kc < 8; kc += 3) {
2358 for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
2359 AvgPoolMicrokernelTester()
2360 .mr(9)
2361 .n(n)
2362 .kh(3)
2363 .kw(3)
2364 .kc(kc)
2365 .y_scale(y_scale)
2366 .iterations(2)
2367 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2368 }
2369 }
2370 }
2371}
2372
2373TEST(Q8_AVGPOOL_UP9__SCALAR, y_zero_point) {
2374 for (size_t n = 1; n <= 5; n += 2) {
2375 for (size_t kc = 1; kc < 8; kc += 3) {
2376 for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
2377 AvgPoolMicrokernelTester()
2378 .mr(9)
2379 .n(n)
2380 .kh(3)
2381 .kw(3)
2382 .kc(kc)
2383 .y_zero_point(uint8_t(y_zero_point))
2384 .iterations(3)
2385 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2386 }
2387 }
2388 }
2389}
2390
2391TEST(Q8_AVGPOOL_UP9__SCALAR, qmax) {
2392 for (size_t n = 1; n <= 5; n += 2) {
2393 for (size_t kc = 1; kc < 8; kc += 3) {
2394 AvgPoolMicrokernelTester()
2395 .mr(9)
2396 .n(n)
2397 .kh(3)
2398 .kw(3)
2399 .kc(kc)
2400 .x_zero_point(128)
2401 .y_zero_point(128)
2402 .x_scale(1.0f)
2403 .y_scale(1.0f)
2404 .qmax(128)
2405 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2406 }
2407 }
2408}
2409
2410TEST(Q8_AVGPOOL_UP9__SCALAR, qmin) {
2411 for (size_t n = 1; n <= 5; n += 2) {
2412 for (size_t kc = 1; kc < 8; kc += 3) {
2413 AvgPoolMicrokernelTester()
2414 .mr(9)
2415 .n(n)
2416 .kh(3)
2417 .kw(3)
2418 .kc(kc)
2419 .x_zero_point(128)
2420 .y_zero_point(128)
2421 .x_scale(1.0f)
2422 .y_scale(1.0f)
2423 .qmin(128)
2424 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2425 }
2426 }
2427}
2428
2429TEST(Q8_AVGPOOL_UP9__SCALAR, small_n) {
2430 for (size_t n = 2; n < 5; n++) {
2431 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2432 for (size_t kc = 1; kc < 8; kc += 3) {
2433 AvgPoolMicrokernelTester()
2434 .mr(9)
2435 .n(n)
2436 .kh(ks)
2437 .kw(ks)
2438 .kc(kc)
2439 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2440 }
2441 }
2442 }
2443}
2444
2445TEST(Q8_AVGPOOL_UP9__SCALAR, small_n_with_x_stride) {
2446 for (size_t n = 2; n < 5; n++) {
2447 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2448 for (size_t kc = 1; kc < 8; kc += 3) {
2449 AvgPoolMicrokernelTester()
2450 .mr(9)
2451 .n(n)
2452 .kh(ks)
2453 .kw(ks)
2454 .kc(kc)
2455 .x_stride(29)
2456 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2457 }
2458 }
2459 }
2460}
2461
2462TEST(Q8_AVGPOOL_UP9__SCALAR, small_n_with_y_stride) {
2463 for (size_t n = 2; n < 5; n++) {
2464 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2465 for (size_t kc = 1; kc < 8; kc += 3) {
2466 AvgPoolMicrokernelTester()
2467 .mr(9)
2468 .n(n)
2469 .kh(ks)
2470 .kw(ks)
2471 .kc(kc)
2472 .y_stride(31)
2473 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2474 }
2475 }
2476 }
2477}
2478
2479TEST(Q8_AVGPOOL_UP9__SCALAR, small_n_with_s) {
2480 for (size_t n = 2; n < 5; n++) {
2481 for (size_t ks : std::vector<size_t>{{2, 3}}) {
2482 for (size_t kc = 1; kc < 8; kc += 3) {
2483 for (size_t s = 2; s <= ks; s++) {
2484 AvgPoolMicrokernelTester()
2485 .mr(9)
2486 .n(n)
2487 .kh(ks)
2488 .kw(ks)
2489 .kc(kc)
2490 .s(s)
2491 .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2492 }
2493 }
2494 }
2495 }
2496}
2497
2498TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_eq_1_twopass_fulltile) {
2499 auto tester = AvgPoolMicrokernelTester()
2500 .mr(9)
2501 .qr(8)
2502 .kc(1);
2503 const size_t ks = tester.mr() + tester.qr();
2504 for (size_t kh = 1; kh <= ks; kh++) {
2505 for (size_t kw = 1; kw <= ks; kw++) {
2506 if (kh * kw == ks) {
2507 tester
2508 .kh(kh)
2509 .kw(kw)
2510 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2511 }
2512 }
2513 }
2514}
2515
2516TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_eq_1_twopass_subtile) {
2517 auto tester = AvgPoolMicrokernelTester()
2518 .mr(9)
2519 .qr(8)
2520 .kc(1);
2521 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
2522 tester
2523 .kh(ks)
2524 .kw(1)
2525 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2526 tester
2527 .kh(1)
2528 .kw(ks)
2529 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2530 }
2531}
2532
2533TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_eq_1_multipass_fulltile) {
2534 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2535 auto tester = AvgPoolMicrokernelTester()
2536 .mr(9)
2537 .qr(8)
2538 .kc(1);
2539 for (size_t kh = 1; kh <= ks; kh++) {
2540 for (size_t kw = 1; kw <= ks; kw++) {
2541 if (kh * kw == ks) {
2542 tester
2543 .kh(kh)
2544 .kw(kw)
2545 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2546 }
2547 }
2548 }
2549 }
2550}
2551
2552TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_eq_1_multipass_subtile) {
2553 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
2554 auto tester = AvgPoolMicrokernelTester()
2555 .mr(9)
2556 .qr(8)
2557 .kc(1);
2558 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
2559 tester
2560 .kh(ks)
2561 .kw(1)
2562 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2563 tester
2564 .kh(1)
2565 .kw(ks)
2566 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2567 }
2568 }
2569}
2570
2571TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_twopass_fulltile) {
2572 auto tester = AvgPoolMicrokernelTester()
2573 .mr(9)
2574 .qr(8)
2575 .iterations(3);
2576 const size_t ks = tester.mr() + tester.qr();
2577 for (size_t kh = 1; kh <= ks; kh++) {
2578 for (size_t kw = 1; kw <= ks; kw++) {
2579 if (kh * kw == ks) {
2580 for (size_t kc = 2; kc < 8; kc++) {
2581 tester
2582 .kh(kh)
2583 .kw(kw)
2584 .kc(kc)
2585 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2586 }
2587 }
2588 }
2589 }
2590}
2591
2592TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_twopass_subtile) {
2593 auto tester = AvgPoolMicrokernelTester()
2594 .mr(9)
2595 .qr(8)
2596 .iterations(3);
2597 for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
2598 for (size_t kc = 2; kc < 8; kc++) {
2599 tester
2600 .kc(kc)
2601 .kh(ks)
2602 .kw(1)
2603 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2604 tester
2605 .kc(kc)
2606 .kh(1)
2607 .kw(ks)
2608 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2609 }
2610 }
2611}
2612
2613TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_twopass_fulltile_with_x_stride) {
2614 auto tester = AvgPoolMicrokernelTester()
2615 .mr(9)
2616 .qr(8)
2617 .iterations(3);
2618 const size_t ks = tester.mr() + tester.qr();
2619 for (size_t kh = 1; kh <= ks; kh++) {
2620 for (size_t kw = 1; kw <= ks; kw++) {
2621 if (kh * kw == ks) {
2622 for (size_t kc = 2; kc < 8; kc++) {
2623 tester
2624 .kh(kh)
2625 .kw(kw)
2626 .kc(kc)
2627 .x_stride(23)
2628 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2629 }
2630 }
2631 }
2632 }
2633}
2634
2635TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_multipass_fulltile) {
2636 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2637 auto tester = AvgPoolMicrokernelTester()
2638 .mr(9)
2639 .qr(8)
2640 .iterations(3);
2641 for (size_t kh = 1; kh <= ks; kh++) {
2642 for (size_t kw = 1; kw <= ks; kw++) {
2643 if (kh * kw == ks) {
2644 for (size_t kc = 2; kc < 8; kc++) {
2645 tester
2646 .kh(kh)
2647 .kw(kw)
2648 .kc(kc)
2649 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2650 }
2651 }
2652 }
2653 }
2654 }
2655}
2656
2657TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_multipass_subtile) {
2658 for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
2659 auto tester = AvgPoolMicrokernelTester()
2660 .mr(9)
2661 .qr(8)
2662 .iterations(3);
2663 for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
2664 for (size_t kc = 2; kc < 8; kc++) {
2665 tester
2666 .kc(kc)
2667 .kh(ks)
2668 .kw(1)
2669 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2670 tester
2671 .kc(kc)
2672 .kh(1)
2673 .kw(ks)
2674 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2675 }
2676 }
2677 }
2678}
2679
2680TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_multipass_fulltile_with_x_stride) {
2681 for (size_t ks : std::vector<size_t>{{25, 49}}) {
2682 auto tester = AvgPoolMicrokernelTester()
2683 .mr(9)
2684 .qr(8)
2685 .iterations(3);
2686 for (size_t kh = 1; kh <= ks; kh++) {
2687 for (size_t kw = 1; kw <= ks; kw++) {
2688 if (kh * kw == ks) {
2689 for (size_t kc = 2; kc < 8; kc++) {
2690 tester
2691 .kh(kh)
2692 .kw(kw)
2693 .kc(kc)
2694 .x_stride(23)
2695 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2696 }
2697 }
2698 }
2699 }
2700 }
2701}
2702
2703TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, x_scale) {
2704 for (size_t n = 1; n <= 5; n += 2) {
2705 for (size_t kc = 1; kc < 8; kc += 3) {
2706 for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
2707 AvgPoolMicrokernelTester()
2708 .mr(9)
2709 .qr(8)
2710 .n(n)
2711 .kh(5)
2712 .kw(5)
2713 .kc(kc)
2714 .x_scale(x_scale)
2715 .iterations(1)
2716 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2717 }
2718 }
2719 }
2720}
2721
2722TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, x_zero_point) {
2723 for (size_t n = 1; n <= 5; n += 2) {
2724 for (size_t kc = 1; kc < 8; kc += 3) {
2725 for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
2726 AvgPoolMicrokernelTester()
2727 .mr(9)
2728 .qr(8)
2729 .n(n)
2730 .kh(5)
2731 .kw(5)
2732 .kc(kc)
2733 .x_zero_point(uint8_t(x_zero_point))
2734 .iterations(1)
2735 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2736 }
2737 }
2738 }
2739}
2740
2741TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, y_scale) {
2742 for (size_t n = 1; n <= 5; n += 2) {
2743 for (size_t kc = 1; kc < 8; kc += 3) {
2744 for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
2745 AvgPoolMicrokernelTester()
2746 .mr(9)
2747 .qr(8)
2748 .n(n)
2749 .kh(5)
2750 .kw(5)
2751 .kc(kc)
2752 .y_scale(y_scale)
2753 .iterations(1)
2754 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2755 }
2756 }
2757 }
2758}
2759
2760TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, y_zero_point) {
2761 for (size_t n = 1; n <= 5; n += 2) {
2762 for (size_t kc = 1; kc < 8; kc += 3) {
2763 for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
2764 AvgPoolMicrokernelTester()
2765 .mr(9)
2766 .qr(8)
2767 .n(n)
2768 .kh(5)
2769 .kw(5)
2770 .kc(kc)
2771 .y_zero_point(uint8_t(y_zero_point))
2772 .iterations(1)
2773 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2774 }
2775 }
2776 }
2777}
2778
2779TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, qmax) {
2780 for (size_t n = 1; n <= 5; n += 2) {
2781 for (size_t kc = 1; kc < 8; kc += 3) {
2782 AvgPoolMicrokernelTester()
2783 .mr(9)
2784 .qr(8)
2785 .n(n)
2786 .kh(5)
2787 .kw(5)
2788 .kc(kc)
2789 .x_zero_point(128)
2790 .y_zero_point(128)
2791 .x_scale(1.0f)
2792 .y_scale(1.0f)
2793 .qmax(128)
2794 .iterations(3)
2795 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2796 }
2797 }
2798}
2799
2800TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, qmin) {
2801 for (size_t n = 1; n <= 5; n += 2) {
2802 for (size_t kc = 1; kc < 8; kc += 3) {
2803 AvgPoolMicrokernelTester()
2804 .mr(9)
2805 .qr(8)
2806 .n(n)
2807 .kh(5)
2808 .kw(5)
2809 .kc(kc)
2810 .x_zero_point(128)
2811 .y_zero_point(128)
2812 .x_scale(1.0f)
2813 .y_scale(1.0f)
2814 .qmin(128)
2815 .iterations(3)
2816 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2817 }
2818 }
2819}
2820
2821TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, small_n) {
2822 for (size_t n = 2; n < 5; n++) {
2823 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2824 for (size_t kc = 1; kc < 8; kc += 3) {
2825 AvgPoolMicrokernelTester()
2826 .mr(9)
2827 .qr(8)
2828 .n(n)
2829 .kh(ks)
2830 .kw(ks)
2831 .kc(kc)
2832 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2833 }
2834 }
2835 }
2836}
2837
2838TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, small_n_with_x_stride) {
2839 for (size_t n = 2; n < 5; n++) {
2840 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2841 for (size_t kc = 1; kc < 8; kc += 3) {
2842 AvgPoolMicrokernelTester()
2843 .mr(9)
2844 .qr(8)
2845 .n(n)
2846 .kh(ks)
2847 .kw(ks)
2848 .kc(kc)
2849 .x_stride(29)
2850 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2851 }
2852 }
2853 }
2854}
2855
2856TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, small_n_with_y_stride) {
2857 for (size_t n = 2; n < 5; n++) {
2858 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2859 for (size_t kc = 1; kc < 8; kc += 3) {
2860 AvgPoolMicrokernelTester()
2861 .mr(9)
2862 .qr(8)
2863 .n(n)
2864 .kh(ks)
2865 .kw(ks)
2866 .kc(kc)
2867 .y_stride(31)
2868 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2869 }
2870 }
2871 }
2872}
2873
2874TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, small_n_with_s) {
2875 for (size_t n = 2; n < 5; n++) {
2876 for (size_t ks : std::vector<size_t>{{5, 7}}) {
2877 for (size_t s = 2; s <= 5; s++) {
2878 for (size_t kc = 1; kc < 8; kc += 3) {
2879 AvgPoolMicrokernelTester()
2880 .mr(9)
2881 .qr(8)
2882 .n(n)
2883 .kh(ks)
2884 .kw(ks)
2885 .kc(kc)
2886 .s(s)
2887 .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2888 }
2889 }
2890 }
2891 }
2892}