blob: 38f4a61f776e8c57f38f737b432798d9b8aa6de9 [file] [log] [blame]
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001// Copyright 2020 Google LLC
XNNPACK Teamb455b122019-09-27 18:10:33 -07002//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005//
6// Auto-generated file. Do not edit!
Marat Dukhanbf715f92020-10-23 20:17:00 -07007// Specification: test/f32-dwconv2d-chw.yaml
8// Generator: tools/generate-dwconv2d-chw-test.py
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009
XNNPACK Teamb455b122019-09-27 18:10:33 -070010
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <gtest/gtest.h>
12
Marat Dukhan1dadbf72019-10-01 10:46:20 -070013#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/isa-checks.h>
15
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/dwconv.h>
Marat Dukhanbf715f92020-10-23 20:17:00 -070017#include "dwconv2d-microkernel-tester.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
XNNPACK Teamb455b122019-09-27 18:10:33 -070019
Marat Dukhanc581e482020-10-24 01:28:11 -070020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
21 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4, output_width_eq_4) {
22 TEST_REQUIRES_ARM_NEON;
23 DWConv2DMicrokernelTester()
24 .input_width(4)
25 .input_height(1)
26 .kernel_height(3)
27 .kernel_width(3)
28 .subsampling(1)
29 .padding_left(1)
30 .padding_right(1)
31 .padding_top(1)
32 .padding_bottom(1)
33 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4);
34 }
35
36 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4, output_width_div_4) {
37 TEST_REQUIRES_ARM_NEON;
38 for (size_t input_width = 8; input_width < 32; input_width += 4) {
39 DWConv2DMicrokernelTester()
40 .input_width(input_width)
41 .input_height(1)
42 .kernel_height(3)
43 .kernel_width(3)
44 .subsampling(1)
45 .padding_left(1)
46 .padding_right(1)
47 .padding_top(1)
48 .padding_bottom(1)
49 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4);
50 }
51 }
52
53 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4, output_width_lt_4) {
54 TEST_REQUIRES_ARM_NEON;
55 for (size_t input_width = 1; input_width < 4; input_width++) {
56 DWConv2DMicrokernelTester()
57 .input_width(4)
58 .input_height(1)
59 .kernel_height(3)
60 .kernel_width(3)
61 .subsampling(1)
62 .padding_left(1)
63 .padding_right(1)
64 .padding_top(1)
65 .padding_bottom(1)
66 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4);
67 }
68 }
69
70 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4, output_width_gt_4) {
71 TEST_REQUIRES_ARM_NEON;
72 for (size_t input_width = 5; input_width < 9; input_width++) {
73 DWConv2DMicrokernelTester()
74 .input_width(input_width)
75 .input_height(1)
76 .kernel_height(3)
77 .kernel_width(3)
78 .subsampling(1)
79 .padding_left(1)
80 .padding_right(1)
81 .padding_top(1)
82 .padding_bottom(1)
83 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4);
84 }
85 }
86
87 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4, output_height_gt_1) {
88 TEST_REQUIRES_ARM_NEON;
89 for (size_t input_height = 2; input_height < 3; input_height++) {
90 for (size_t input_width = 1; input_width < 21; input_width += 3) {
91 DWConv2DMicrokernelTester()
92 .input_width(input_width)
93 .input_height(input_height)
94 .kernel_height(3)
95 .kernel_width(3)
96 .subsampling(1)
97 .padding_left(1)
98 .padding_right(1)
99 .padding_top(1)
100 .padding_bottom(1)
101 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4);
102 }
103 }
104 }
105#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
106
107
108#if XNN_ARCH_ARM || XNN_ARCH_ARM64
109 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4, output_width_eq_4) {
110 TEST_REQUIRES_ARM_NEON;
111 DWConv2DMicrokernelTester()
112 .input_width(4)
113 .input_height(2)
114 .kernel_height(3)
115 .kernel_width(3)
116 .subsampling(1)
117 .padding_left(1)
118 .padding_right(1)
119 .padding_top(1)
120 .padding_bottom(1)
121 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4);
122 }
123
124 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4, output_width_div_4) {
125 TEST_REQUIRES_ARM_NEON;
126 for (size_t input_width = 8; input_width < 32; input_width += 4) {
127 DWConv2DMicrokernelTester()
128 .input_width(input_width)
129 .input_height(2)
130 .kernel_height(3)
131 .kernel_width(3)
132 .subsampling(1)
133 .padding_left(1)
134 .padding_right(1)
135 .padding_top(1)
136 .padding_bottom(1)
137 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4);
138 }
139 }
140
141 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4, output_width_lt_4) {
142 TEST_REQUIRES_ARM_NEON;
143 for (size_t input_width = 1; input_width < 4; input_width++) {
144 DWConv2DMicrokernelTester()
145 .input_width(4)
146 .input_height(2)
147 .kernel_height(3)
148 .kernel_width(3)
149 .subsampling(1)
150 .padding_left(1)
151 .padding_right(1)
152 .padding_top(1)
153 .padding_bottom(1)
154 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4);
155 }
156 }
157
158 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4, output_width_gt_4) {
159 TEST_REQUIRES_ARM_NEON;
160 for (size_t input_width = 5; input_width < 9; input_width++) {
161 DWConv2DMicrokernelTester()
162 .input_width(input_width)
163 .input_height(2)
164 .kernel_height(3)
165 .kernel_width(3)
166 .subsampling(1)
167 .padding_left(1)
168 .padding_right(1)
169 .padding_top(1)
170 .padding_bottom(1)
171 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4);
172 }
173 }
174
175 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4, output_height_div_2) {
176 TEST_REQUIRES_ARM_NEON;
177 for (size_t input_height = 4; input_height < 16; input_height += 2) {
178 for (size_t input_width = 1; input_width < 21; input_width += 3) {
179 DWConv2DMicrokernelTester()
180 .input_width(input_width)
181 .input_height(input_height)
182 .kernel_height(3)
183 .kernel_width(3)
184 .subsampling(1)
185 .padding_left(1)
186 .padding_right(1)
187 .padding_top(1)
188 .padding_bottom(1)
189 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4);
190 }
191 }
192 }
193
194 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4, output_height_lt_2) {
195 TEST_REQUIRES_ARM_NEON;
196 for (size_t input_height = 1; input_height < 2; input_height++) {
197 for (size_t input_width = 1; input_width < 21; input_width += 3) {
198 DWConv2DMicrokernelTester()
199 .input_width(input_width)
200 .input_height(input_height)
201 .kernel_height(3)
202 .kernel_width(3)
203 .subsampling(1)
204 .padding_left(1)
205 .padding_right(1)
206 .padding_top(1)
207 .padding_bottom(1)
208 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4);
209 }
210 }
211 }
212
213 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4, output_height_gt_2) {
214 TEST_REQUIRES_ARM_NEON;
215 for (size_t input_height = 3; input_height < 5; input_height++) {
216 for (size_t input_width = 1; input_width < 21; input_width += 3) {
217 DWConv2DMicrokernelTester()
218 .input_width(input_width)
219 .input_height(input_height)
220 .kernel_height(3)
221 .kernel_width(3)
222 .subsampling(1)
223 .padding_left(1)
224 .padding_right(1)
225 .padding_top(1)
226 .padding_bottom(1)
227 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4);
228 }
229 }
230 }
231#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
232
233
234#if XNN_ARCH_ARM || XNN_ARCH_ARM64
235 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_3X4, output_width_eq_4) {
236 TEST_REQUIRES_ARM_NEON;
237 DWConv2DMicrokernelTester()
238 .input_width(4)
239 .input_height(3)
240 .kernel_height(3)
241 .kernel_width(3)
242 .subsampling(1)
243 .padding_left(1)
244 .padding_right(1)
245 .padding_top(1)
246 .padding_bottom(1)
247 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4);
248 }
249
250 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_3X4, output_width_div_4) {
251 TEST_REQUIRES_ARM_NEON;
252 for (size_t input_width = 8; input_width < 32; input_width += 4) {
253 DWConv2DMicrokernelTester()
254 .input_width(input_width)
255 .input_height(3)
256 .kernel_height(3)
257 .kernel_width(3)
258 .subsampling(1)
259 .padding_left(1)
260 .padding_right(1)
261 .padding_top(1)
262 .padding_bottom(1)
263 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4);
264 }
265 }
266
267 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_3X4, output_width_lt_4) {
268 TEST_REQUIRES_ARM_NEON;
269 for (size_t input_width = 1; input_width < 4; input_width++) {
270 DWConv2DMicrokernelTester()
271 .input_width(4)
272 .input_height(3)
273 .kernel_height(3)
274 .kernel_width(3)
275 .subsampling(1)
276 .padding_left(1)
277 .padding_right(1)
278 .padding_top(1)
279 .padding_bottom(1)
280 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4);
281 }
282 }
283
284 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_3X4, output_width_gt_4) {
285 TEST_REQUIRES_ARM_NEON;
286 for (size_t input_width = 5; input_width < 9; input_width++) {
287 DWConv2DMicrokernelTester()
288 .input_width(input_width)
289 .input_height(3)
290 .kernel_height(3)
291 .kernel_width(3)
292 .subsampling(1)
293 .padding_left(1)
294 .padding_right(1)
295 .padding_top(1)
296 .padding_bottom(1)
297 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4);
298 }
299 }
300
301 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_3X4, output_height_div_3) {
302 TEST_REQUIRES_ARM_NEON;
303 for (size_t input_height = 6; input_height < 24; input_height += 3) {
304 for (size_t input_width = 1; input_width < 21; input_width += 3) {
305 DWConv2DMicrokernelTester()
306 .input_width(input_width)
307 .input_height(input_height)
308 .kernel_height(3)
309 .kernel_width(3)
310 .subsampling(1)
311 .padding_left(1)
312 .padding_right(1)
313 .padding_top(1)
314 .padding_bottom(1)
315 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4);
316 }
317 }
318 }
319
320 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_3X4, output_height_lt_3) {
321 TEST_REQUIRES_ARM_NEON;
322 for (size_t input_height = 1; input_height < 3; input_height++) {
323 for (size_t input_width = 1; input_width < 21; input_width += 3) {
324 DWConv2DMicrokernelTester()
325 .input_width(input_width)
326 .input_height(input_height)
327 .kernel_height(3)
328 .kernel_width(3)
329 .subsampling(1)
330 .padding_left(1)
331 .padding_right(1)
332 .padding_top(1)
333 .padding_bottom(1)
334 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4);
335 }
336 }
337 }
338
339 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_3X4, output_height_gt_3) {
340 TEST_REQUIRES_ARM_NEON;
341 for (size_t input_height = 4; input_height < 7; input_height++) {
342 for (size_t input_width = 1; input_width < 21; input_width += 3) {
343 DWConv2DMicrokernelTester()
344 .input_width(input_width)
345 .input_height(input_height)
346 .kernel_height(3)
347 .kernel_width(3)
348 .subsampling(1)
349 .padding_left(1)
350 .padding_right(1)
351 .padding_top(1)
352 .padding_bottom(1)
353 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4);
354 }
355 }
356 }
357#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
358
359
360#if XNN_ARCH_ARM || XNN_ARCH_ARM64
361 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_4X4, output_width_eq_4) {
362 TEST_REQUIRES_ARM_NEON;
363 DWConv2DMicrokernelTester()
364 .input_width(4)
365 .input_height(4)
366 .kernel_height(3)
367 .kernel_width(3)
368 .subsampling(1)
369 .padding_left(1)
370 .padding_right(1)
371 .padding_top(1)
372 .padding_bottom(1)
373 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4);
374 }
375
376 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_4X4, output_width_div_4) {
377 TEST_REQUIRES_ARM_NEON;
378 for (size_t input_width = 8; input_width < 32; input_width += 4) {
379 DWConv2DMicrokernelTester()
380 .input_width(input_width)
381 .input_height(4)
382 .kernel_height(3)
383 .kernel_width(3)
384 .subsampling(1)
385 .padding_left(1)
386 .padding_right(1)
387 .padding_top(1)
388 .padding_bottom(1)
389 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4);
390 }
391 }
392
393 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_4X4, output_width_lt_4) {
394 TEST_REQUIRES_ARM_NEON;
395 for (size_t input_width = 1; input_width < 4; input_width++) {
396 DWConv2DMicrokernelTester()
397 .input_width(4)
398 .input_height(4)
399 .kernel_height(3)
400 .kernel_width(3)
401 .subsampling(1)
402 .padding_left(1)
403 .padding_right(1)
404 .padding_top(1)
405 .padding_bottom(1)
406 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4);
407 }
408 }
409
410 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_4X4, output_width_gt_4) {
411 TEST_REQUIRES_ARM_NEON;
412 for (size_t input_width = 5; input_width < 9; input_width++) {
413 DWConv2DMicrokernelTester()
414 .input_width(input_width)
415 .input_height(4)
416 .kernel_height(3)
417 .kernel_width(3)
418 .subsampling(1)
419 .padding_left(1)
420 .padding_right(1)
421 .padding_top(1)
422 .padding_bottom(1)
423 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4);
424 }
425 }
426
427 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_4X4, output_height_div_4) {
428 TEST_REQUIRES_ARM_NEON;
429 for (size_t input_height = 8; input_height < 32; input_height += 4) {
430 for (size_t input_width = 1; input_width < 21; input_width += 3) {
431 DWConv2DMicrokernelTester()
432 .input_width(input_width)
433 .input_height(input_height)
434 .kernel_height(3)
435 .kernel_width(3)
436 .subsampling(1)
437 .padding_left(1)
438 .padding_right(1)
439 .padding_top(1)
440 .padding_bottom(1)
441 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4);
442 }
443 }
444 }
445
446 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_4X4, output_height_lt_4) {
447 TEST_REQUIRES_ARM_NEON;
448 for (size_t input_height = 1; input_height < 4; input_height++) {
449 for (size_t input_width = 1; input_width < 21; input_width += 3) {
450 DWConv2DMicrokernelTester()
451 .input_width(input_width)
452 .input_height(input_height)
453 .kernel_height(3)
454 .kernel_width(3)
455 .subsampling(1)
456 .padding_left(1)
457 .padding_right(1)
458 .padding_top(1)
459 .padding_bottom(1)
460 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4);
461 }
462 }
463 }
464
465 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_4X4, output_height_gt_4) {
466 TEST_REQUIRES_ARM_NEON;
467 for (size_t input_height = 5; input_height < 9; input_height++) {
468 for (size_t input_width = 1; input_width < 21; input_width += 3) {
469 DWConv2DMicrokernelTester()
470 .input_width(input_width)
471 .input_height(input_height)
472 .kernel_height(3)
473 .kernel_width(3)
474 .subsampling(1)
475 .padding_left(1)
476 .padding_right(1)
477 .padding_top(1)
478 .padding_bottom(1)
479 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4);
480 }
481 }
482 }
483#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
484
485
486#if XNN_ARCH_ARM || XNN_ARCH_ARM64
487 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_5X4, output_width_eq_4) {
488 TEST_REQUIRES_ARM_NEON;
489 DWConv2DMicrokernelTester()
490 .input_width(4)
491 .input_height(5)
492 .kernel_height(3)
493 .kernel_width(3)
494 .subsampling(1)
495 .padding_left(1)
496 .padding_right(1)
497 .padding_top(1)
498 .padding_bottom(1)
499 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4);
500 }
501
502 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_5X4, output_width_div_4) {
503 TEST_REQUIRES_ARM_NEON;
504 for (size_t input_width = 8; input_width < 32; input_width += 4) {
505 DWConv2DMicrokernelTester()
506 .input_width(input_width)
507 .input_height(5)
508 .kernel_height(3)
509 .kernel_width(3)
510 .subsampling(1)
511 .padding_left(1)
512 .padding_right(1)
513 .padding_top(1)
514 .padding_bottom(1)
515 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4);
516 }
517 }
518
519 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_5X4, output_width_lt_4) {
520 TEST_REQUIRES_ARM_NEON;
521 for (size_t input_width = 1; input_width < 4; input_width++) {
522 DWConv2DMicrokernelTester()
523 .input_width(4)
524 .input_height(5)
525 .kernel_height(3)
526 .kernel_width(3)
527 .subsampling(1)
528 .padding_left(1)
529 .padding_right(1)
530 .padding_top(1)
531 .padding_bottom(1)
532 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4);
533 }
534 }
535
536 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_5X4, output_width_gt_4) {
537 TEST_REQUIRES_ARM_NEON;
538 for (size_t input_width = 5; input_width < 9; input_width++) {
539 DWConv2DMicrokernelTester()
540 .input_width(input_width)
541 .input_height(5)
542 .kernel_height(3)
543 .kernel_width(3)
544 .subsampling(1)
545 .padding_left(1)
546 .padding_right(1)
547 .padding_top(1)
548 .padding_bottom(1)
549 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4);
550 }
551 }
552
553 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_5X4, output_height_div_5) {
554 TEST_REQUIRES_ARM_NEON;
555 for (size_t input_height = 10; input_height < 40; input_height += 5) {
556 for (size_t input_width = 1; input_width < 21; input_width += 3) {
557 DWConv2DMicrokernelTester()
558 .input_width(input_width)
559 .input_height(input_height)
560 .kernel_height(3)
561 .kernel_width(3)
562 .subsampling(1)
563 .padding_left(1)
564 .padding_right(1)
565 .padding_top(1)
566 .padding_bottom(1)
567 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4);
568 }
569 }
570 }
571
572 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_5X4, output_height_lt_5) {
573 TEST_REQUIRES_ARM_NEON;
574 for (size_t input_height = 1; input_height < 5; input_height++) {
575 for (size_t input_width = 1; input_width < 21; input_width += 3) {
576 DWConv2DMicrokernelTester()
577 .input_width(input_width)
578 .input_height(input_height)
579 .kernel_height(3)
580 .kernel_width(3)
581 .subsampling(1)
582 .padding_left(1)
583 .padding_right(1)
584 .padding_top(1)
585 .padding_bottom(1)
586 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4);
587 }
588 }
589 }
590
591 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_5X4, output_height_gt_5) {
592 TEST_REQUIRES_ARM_NEON;
593 for (size_t input_height = 6; input_height < 11; input_height++) {
594 for (size_t input_width = 1; input_width < 21; input_width += 3) {
595 DWConv2DMicrokernelTester()
596 .input_width(input_width)
597 .input_height(input_height)
598 .kernel_height(3)
599 .kernel_width(3)
600 .subsampling(1)
601 .padding_left(1)
602 .padding_right(1)
603 .padding_top(1)
604 .padding_bottom(1)
605 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4);
606 }
607 }
608 }
609#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
610
611
612#if XNN_ARCH_ARM || XNN_ARCH_ARM64
613 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_6X4, output_width_eq_4) {
614 TEST_REQUIRES_ARM_NEON;
615 DWConv2DMicrokernelTester()
616 .input_width(4)
617 .input_height(6)
618 .kernel_height(3)
619 .kernel_width(3)
620 .subsampling(1)
621 .padding_left(1)
622 .padding_right(1)
623 .padding_top(1)
624 .padding_bottom(1)
625 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4);
626 }
627
628 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_6X4, output_width_div_4) {
629 TEST_REQUIRES_ARM_NEON;
630 for (size_t input_width = 8; input_width < 32; input_width += 4) {
631 DWConv2DMicrokernelTester()
632 .input_width(input_width)
633 .input_height(6)
634 .kernel_height(3)
635 .kernel_width(3)
636 .subsampling(1)
637 .padding_left(1)
638 .padding_right(1)
639 .padding_top(1)
640 .padding_bottom(1)
641 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4);
642 }
643 }
644
645 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_6X4, output_width_lt_4) {
646 TEST_REQUIRES_ARM_NEON;
647 for (size_t input_width = 1; input_width < 4; input_width++) {
648 DWConv2DMicrokernelTester()
649 .input_width(4)
650 .input_height(6)
651 .kernel_height(3)
652 .kernel_width(3)
653 .subsampling(1)
654 .padding_left(1)
655 .padding_right(1)
656 .padding_top(1)
657 .padding_bottom(1)
658 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4);
659 }
660 }
661
662 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_6X4, output_width_gt_4) {
663 TEST_REQUIRES_ARM_NEON;
664 for (size_t input_width = 5; input_width < 9; input_width++) {
665 DWConv2DMicrokernelTester()
666 .input_width(input_width)
667 .input_height(6)
668 .kernel_height(3)
669 .kernel_width(3)
670 .subsampling(1)
671 .padding_left(1)
672 .padding_right(1)
673 .padding_top(1)
674 .padding_bottom(1)
675 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4);
676 }
677 }
678
679 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_6X4, output_height_div_6) {
680 TEST_REQUIRES_ARM_NEON;
681 for (size_t input_height = 12; input_height < 48; input_height += 6) {
682 for (size_t input_width = 1; input_width < 21; input_width += 3) {
683 DWConv2DMicrokernelTester()
684 .input_width(input_width)
685 .input_height(input_height)
686 .kernel_height(3)
687 .kernel_width(3)
688 .subsampling(1)
689 .padding_left(1)
690 .padding_right(1)
691 .padding_top(1)
692 .padding_bottom(1)
693 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4);
694 }
695 }
696 }
697
698 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_6X4, output_height_lt_6) {
699 TEST_REQUIRES_ARM_NEON;
700 for (size_t input_height = 1; input_height < 6; input_height++) {
701 for (size_t input_width = 1; input_width < 21; input_width += 3) {
702 DWConv2DMicrokernelTester()
703 .input_width(input_width)
704 .input_height(input_height)
705 .kernel_height(3)
706 .kernel_width(3)
707 .subsampling(1)
708 .padding_left(1)
709 .padding_right(1)
710 .padding_top(1)
711 .padding_bottom(1)
712 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4);
713 }
714 }
715 }
716
717 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_6X4, output_height_gt_6) {
718 TEST_REQUIRES_ARM_NEON;
719 for (size_t input_height = 7; input_height < 13; input_height++) {
720 for (size_t input_width = 1; input_width < 21; input_width += 3) {
721 DWConv2DMicrokernelTester()
722 .input_width(input_width)
723 .input_height(input_height)
724 .kernel_height(3)
725 .kernel_width(3)
726 .subsampling(1)
727 .padding_left(1)
728 .padding_right(1)
729 .padding_top(1)
730 .padding_bottom(1)
731 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4);
732 }
733 }
734 }
735#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
736
737
738#if XNN_ARCH_ARM || XNN_ARCH_ARM64
739 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC2, output_width_eq_4) {
740 TEST_REQUIRES_ARM_NEON;
741 DWConv2DMicrokernelTester()
742 .input_width(4)
743 .input_height(1)
744 .kernel_height(3)
745 .kernel_width(3)
746 .subsampling(1)
747 .padding_left(1)
748 .padding_right(1)
749 .padding_top(1)
750 .padding_bottom(1)
751 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2);
752 }
753
754 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC2, output_width_div_4) {
755 TEST_REQUIRES_ARM_NEON;
756 for (size_t input_width = 8; input_width < 32; input_width += 4) {
757 DWConv2DMicrokernelTester()
758 .input_width(input_width)
759 .input_height(1)
760 .kernel_height(3)
761 .kernel_width(3)
762 .subsampling(1)
763 .padding_left(1)
764 .padding_right(1)
765 .padding_top(1)
766 .padding_bottom(1)
767 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2);
768 }
769 }
770
771 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC2, output_width_lt_4) {
772 TEST_REQUIRES_ARM_NEON;
773 for (size_t input_width = 1; input_width < 4; input_width++) {
774 DWConv2DMicrokernelTester()
775 .input_width(4)
776 .input_height(1)
777 .kernel_height(3)
778 .kernel_width(3)
779 .subsampling(1)
780 .padding_left(1)
781 .padding_right(1)
782 .padding_top(1)
783 .padding_bottom(1)
784 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2);
785 }
786 }
787
788 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC2, output_width_gt_4) {
789 TEST_REQUIRES_ARM_NEON;
790 for (size_t input_width = 5; input_width < 9; input_width++) {
791 DWConv2DMicrokernelTester()
792 .input_width(input_width)
793 .input_height(1)
794 .kernel_height(3)
795 .kernel_width(3)
796 .subsampling(1)
797 .padding_left(1)
798 .padding_right(1)
799 .padding_top(1)
800 .padding_bottom(1)
801 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2);
802 }
803 }
804
805 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC2, output_height_gt_1) {
806 TEST_REQUIRES_ARM_NEON;
807 for (size_t input_height = 2; input_height < 3; input_height++) {
808 for (size_t input_width = 1; input_width < 21; input_width += 3) {
809 DWConv2DMicrokernelTester()
810 .input_width(input_width)
811 .input_height(input_height)
812 .kernel_height(3)
813 .kernel_width(3)
814 .subsampling(1)
815 .padding_left(1)
816 .padding_right(1)
817 .padding_top(1)
818 .padding_bottom(1)
819 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2);
820 }
821 }
822 }
823#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
824
825
826#if XNN_ARCH_ARM || XNN_ARCH_ARM64
827 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC3, output_width_eq_4) {
828 TEST_REQUIRES_ARM_NEON;
829 DWConv2DMicrokernelTester()
830 .input_width(4)
831 .input_height(1)
832 .kernel_height(3)
833 .kernel_width(3)
834 .subsampling(1)
835 .padding_left(1)
836 .padding_right(1)
837 .padding_top(1)
838 .padding_bottom(1)
839 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3);
840 }
841
842 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC3, output_width_div_4) {
843 TEST_REQUIRES_ARM_NEON;
844 for (size_t input_width = 8; input_width < 32; input_width += 4) {
845 DWConv2DMicrokernelTester()
846 .input_width(input_width)
847 .input_height(1)
848 .kernel_height(3)
849 .kernel_width(3)
850 .subsampling(1)
851 .padding_left(1)
852 .padding_right(1)
853 .padding_top(1)
854 .padding_bottom(1)
855 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3);
856 }
857 }
858
859 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC3, output_width_lt_4) {
860 TEST_REQUIRES_ARM_NEON;
861 for (size_t input_width = 1; input_width < 4; input_width++) {
862 DWConv2DMicrokernelTester()
863 .input_width(4)
864 .input_height(1)
865 .kernel_height(3)
866 .kernel_width(3)
867 .subsampling(1)
868 .padding_left(1)
869 .padding_right(1)
870 .padding_top(1)
871 .padding_bottom(1)
872 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3);
873 }
874 }
875
876 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC3, output_width_gt_4) {
877 TEST_REQUIRES_ARM_NEON;
878 for (size_t input_width = 5; input_width < 9; input_width++) {
879 DWConv2DMicrokernelTester()
880 .input_width(input_width)
881 .input_height(1)
882 .kernel_height(3)
883 .kernel_width(3)
884 .subsampling(1)
885 .padding_left(1)
886 .padding_right(1)
887 .padding_top(1)
888 .padding_bottom(1)
889 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3);
890 }
891 }
892
893 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC3, output_height_gt_1) {
894 TEST_REQUIRES_ARM_NEON;
895 for (size_t input_height = 2; input_height < 3; input_height++) {
896 for (size_t input_width = 1; input_width < 21; input_width += 3) {
897 DWConv2DMicrokernelTester()
898 .input_width(input_width)
899 .input_height(input_height)
900 .kernel_height(3)
901 .kernel_width(3)
902 .subsampling(1)
903 .padding_left(1)
904 .padding_right(1)
905 .padding_top(1)
906 .padding_bottom(1)
907 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3);
908 }
909 }
910 }
911#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
912
913
914#if XNN_ARCH_ARM || XNN_ARCH_ARM64
915 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC4, output_width_eq_4) {
916 TEST_REQUIRES_ARM_NEON;
917 DWConv2DMicrokernelTester()
918 .input_width(4)
919 .input_height(1)
920 .kernel_height(3)
921 .kernel_width(3)
922 .subsampling(1)
923 .padding_left(1)
924 .padding_right(1)
925 .padding_top(1)
926 .padding_bottom(1)
927 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4);
928 }
929
930 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC4, output_width_div_4) {
931 TEST_REQUIRES_ARM_NEON;
932 for (size_t input_width = 8; input_width < 32; input_width += 4) {
933 DWConv2DMicrokernelTester()
934 .input_width(input_width)
935 .input_height(1)
936 .kernel_height(3)
937 .kernel_width(3)
938 .subsampling(1)
939 .padding_left(1)
940 .padding_right(1)
941 .padding_top(1)
942 .padding_bottom(1)
943 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4);
944 }
945 }
946
947 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC4, output_width_lt_4) {
948 TEST_REQUIRES_ARM_NEON;
949 for (size_t input_width = 1; input_width < 4; input_width++) {
950 DWConv2DMicrokernelTester()
951 .input_width(4)
952 .input_height(1)
953 .kernel_height(3)
954 .kernel_width(3)
955 .subsampling(1)
956 .padding_left(1)
957 .padding_right(1)
958 .padding_top(1)
959 .padding_bottom(1)
960 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4);
961 }
962 }
963
964 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC4, output_width_gt_4) {
965 TEST_REQUIRES_ARM_NEON;
966 for (size_t input_width = 5; input_width < 9; input_width++) {
967 DWConv2DMicrokernelTester()
968 .input_width(input_width)
969 .input_height(1)
970 .kernel_height(3)
971 .kernel_width(3)
972 .subsampling(1)
973 .padding_left(1)
974 .padding_right(1)
975 .padding_top(1)
976 .padding_bottom(1)
977 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4);
978 }
979 }
980
981 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_1X4_ACC4, output_height_gt_1) {
982 TEST_REQUIRES_ARM_NEON;
983 for (size_t input_height = 2; input_height < 3; input_height++) {
984 for (size_t input_width = 1; input_width < 21; input_width += 3) {
985 DWConv2DMicrokernelTester()
986 .input_width(input_width)
987 .input_height(input_height)
988 .kernel_height(3)
989 .kernel_width(3)
990 .subsampling(1)
991 .padding_left(1)
992 .padding_right(1)
993 .padding_top(1)
994 .padding_bottom(1)
995 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4);
996 }
997 }
998 }
999#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1000
1001
1002#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1003 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4_ACC2, output_width_eq_4) {
1004 TEST_REQUIRES_ARM_NEON;
1005 DWConv2DMicrokernelTester()
1006 .input_width(4)
1007 .input_height(2)
1008 .kernel_height(3)
1009 .kernel_width(3)
1010 .subsampling(1)
1011 .padding_left(1)
1012 .padding_right(1)
1013 .padding_top(1)
1014 .padding_bottom(1)
1015 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2);
1016 }
1017
1018 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4_ACC2, output_width_div_4) {
1019 TEST_REQUIRES_ARM_NEON;
1020 for (size_t input_width = 8; input_width < 32; input_width += 4) {
1021 DWConv2DMicrokernelTester()
1022 .input_width(input_width)
1023 .input_height(2)
1024 .kernel_height(3)
1025 .kernel_width(3)
1026 .subsampling(1)
1027 .padding_left(1)
1028 .padding_right(1)
1029 .padding_top(1)
1030 .padding_bottom(1)
1031 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2);
1032 }
1033 }
1034
1035 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4_ACC2, output_width_lt_4) {
1036 TEST_REQUIRES_ARM_NEON;
1037 for (size_t input_width = 1; input_width < 4; input_width++) {
1038 DWConv2DMicrokernelTester()
1039 .input_width(4)
1040 .input_height(2)
1041 .kernel_height(3)
1042 .kernel_width(3)
1043 .subsampling(1)
1044 .padding_left(1)
1045 .padding_right(1)
1046 .padding_top(1)
1047 .padding_bottom(1)
1048 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2);
1049 }
1050 }
1051
1052 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4_ACC2, output_width_gt_4) {
1053 TEST_REQUIRES_ARM_NEON;
1054 for (size_t input_width = 5; input_width < 9; input_width++) {
1055 DWConv2DMicrokernelTester()
1056 .input_width(input_width)
1057 .input_height(2)
1058 .kernel_height(3)
1059 .kernel_width(3)
1060 .subsampling(1)
1061 .padding_left(1)
1062 .padding_right(1)
1063 .padding_top(1)
1064 .padding_bottom(1)
1065 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2);
1066 }
1067 }
1068
1069 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4_ACC2, output_height_div_2) {
1070 TEST_REQUIRES_ARM_NEON;
1071 for (size_t input_height = 4; input_height < 16; input_height += 2) {
1072 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1073 DWConv2DMicrokernelTester()
1074 .input_width(input_width)
1075 .input_height(input_height)
1076 .kernel_height(3)
1077 .kernel_width(3)
1078 .subsampling(1)
1079 .padding_left(1)
1080 .padding_right(1)
1081 .padding_top(1)
1082 .padding_bottom(1)
1083 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2);
1084 }
1085 }
1086 }
1087
1088 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4_ACC2, output_height_lt_2) {
1089 TEST_REQUIRES_ARM_NEON;
1090 for (size_t input_height = 1; input_height < 2; input_height++) {
1091 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1092 DWConv2DMicrokernelTester()
1093 .input_width(input_width)
1094 .input_height(input_height)
1095 .kernel_height(3)
1096 .kernel_width(3)
1097 .subsampling(1)
1098 .padding_left(1)
1099 .padding_right(1)
1100 .padding_top(1)
1101 .padding_bottom(1)
1102 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2);
1103 }
1104 }
1105 }
1106
1107 TEST(F32_DWCONV2D_CHW_3X3P1__NEON_2X4_ACC2, output_height_gt_2) {
1108 TEST_REQUIRES_ARM_NEON;
1109 for (size_t input_height = 3; input_height < 5; input_height++) {
1110 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1111 DWConv2DMicrokernelTester()
1112 .input_width(input_width)
1113 .input_height(input_height)
1114 .kernel_height(3)
1115 .kernel_width(3)
1116 .subsampling(1)
1117 .padding_left(1)
1118 .padding_right(1)
1119 .padding_top(1)
1120 .padding_bottom(1)
1121 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2);
1122 }
1123 }
1124 }
1125#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1126
1127
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001128#if XNN_ARCH_ARM64
Marat Dukhan1268a242020-10-24 00:36:32 -07001129 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4, output_width_eq_4) {
1130 TEST_REQUIRES_ARM_NEON_FMA;
1131 DWConv2DMicrokernelTester()
1132 .input_width(4)
1133 .input_height(1)
1134 .kernel_height(3)
1135 .kernel_width(3)
1136 .subsampling(1)
1137 .padding_left(1)
1138 .padding_right(1)
1139 .padding_top(1)
1140 .padding_bottom(1)
1141 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4);
1142 }
1143
1144 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4, output_width_div_4) {
1145 TEST_REQUIRES_ARM_NEON_FMA;
1146 for (size_t input_width = 8; input_width < 32; input_width += 4) {
1147 DWConv2DMicrokernelTester()
1148 .input_width(input_width)
1149 .input_height(1)
1150 .kernel_height(3)
1151 .kernel_width(3)
1152 .subsampling(1)
1153 .padding_left(1)
1154 .padding_right(1)
1155 .padding_top(1)
1156 .padding_bottom(1)
1157 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4);
1158 }
1159 }
1160
1161 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4, output_width_lt_4) {
1162 TEST_REQUIRES_ARM_NEON_FMA;
1163 for (size_t input_width = 1; input_width < 4; input_width++) {
1164 DWConv2DMicrokernelTester()
1165 .input_width(4)
1166 .input_height(1)
1167 .kernel_height(3)
1168 .kernel_width(3)
1169 .subsampling(1)
1170 .padding_left(1)
1171 .padding_right(1)
1172 .padding_top(1)
1173 .padding_bottom(1)
1174 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4);
1175 }
1176 }
1177
1178 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4, output_width_gt_4) {
1179 TEST_REQUIRES_ARM_NEON_FMA;
1180 for (size_t input_width = 5; input_width < 9; input_width++) {
1181 DWConv2DMicrokernelTester()
1182 .input_width(input_width)
1183 .input_height(1)
1184 .kernel_height(3)
1185 .kernel_width(3)
1186 .subsampling(1)
1187 .padding_left(1)
1188 .padding_right(1)
1189 .padding_top(1)
1190 .padding_bottom(1)
1191 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4);
1192 }
1193 }
1194
1195 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4, output_height_gt_1) {
1196 TEST_REQUIRES_ARM_NEON_FMA;
1197 for (size_t input_height = 2; input_height < 3; input_height++) {
1198 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1199 DWConv2DMicrokernelTester()
1200 .input_width(input_width)
1201 .input_height(input_height)
1202 .kernel_height(3)
1203 .kernel_width(3)
1204 .subsampling(1)
1205 .padding_left(1)
1206 .padding_right(1)
1207 .padding_top(1)
1208 .padding_bottom(1)
1209 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4);
1210 }
1211 }
1212 }
1213#endif // XNN_ARCH_ARM64
1214
1215
1216#if XNN_ARCH_ARM64
1217 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4, output_width_eq_4) {
1218 TEST_REQUIRES_ARM_NEON_FMA;
1219 DWConv2DMicrokernelTester()
1220 .input_width(4)
1221 .input_height(2)
1222 .kernel_height(3)
1223 .kernel_width(3)
1224 .subsampling(1)
1225 .padding_left(1)
1226 .padding_right(1)
1227 .padding_top(1)
1228 .padding_bottom(1)
1229 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4);
1230 }
1231
1232 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4, output_width_div_4) {
1233 TEST_REQUIRES_ARM_NEON_FMA;
1234 for (size_t input_width = 8; input_width < 32; input_width += 4) {
1235 DWConv2DMicrokernelTester()
1236 .input_width(input_width)
1237 .input_height(2)
1238 .kernel_height(3)
1239 .kernel_width(3)
1240 .subsampling(1)
1241 .padding_left(1)
1242 .padding_right(1)
1243 .padding_top(1)
1244 .padding_bottom(1)
1245 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4);
1246 }
1247 }
1248
1249 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4, output_width_lt_4) {
1250 TEST_REQUIRES_ARM_NEON_FMA;
1251 for (size_t input_width = 1; input_width < 4; input_width++) {
1252 DWConv2DMicrokernelTester()
1253 .input_width(4)
1254 .input_height(2)
1255 .kernel_height(3)
1256 .kernel_width(3)
1257 .subsampling(1)
1258 .padding_left(1)
1259 .padding_right(1)
1260 .padding_top(1)
1261 .padding_bottom(1)
1262 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4);
1263 }
1264 }
1265
1266 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4, output_width_gt_4) {
1267 TEST_REQUIRES_ARM_NEON_FMA;
1268 for (size_t input_width = 5; input_width < 9; input_width++) {
1269 DWConv2DMicrokernelTester()
1270 .input_width(input_width)
1271 .input_height(2)
1272 .kernel_height(3)
1273 .kernel_width(3)
1274 .subsampling(1)
1275 .padding_left(1)
1276 .padding_right(1)
1277 .padding_top(1)
1278 .padding_bottom(1)
1279 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4);
1280 }
1281 }
1282
1283 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4, output_height_div_2) {
1284 TEST_REQUIRES_ARM_NEON_FMA;
1285 for (size_t input_height = 4; input_height < 16; input_height += 2) {
1286 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1287 DWConv2DMicrokernelTester()
1288 .input_width(input_width)
1289 .input_height(input_height)
1290 .kernel_height(3)
1291 .kernel_width(3)
1292 .subsampling(1)
1293 .padding_left(1)
1294 .padding_right(1)
1295 .padding_top(1)
1296 .padding_bottom(1)
1297 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4);
1298 }
1299 }
1300 }
1301
1302 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4, output_height_lt_2) {
1303 TEST_REQUIRES_ARM_NEON_FMA;
1304 for (size_t input_height = 1; input_height < 2; input_height++) {
1305 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1306 DWConv2DMicrokernelTester()
1307 .input_width(input_width)
1308 .input_height(input_height)
1309 .kernel_height(3)
1310 .kernel_width(3)
1311 .subsampling(1)
1312 .padding_left(1)
1313 .padding_right(1)
1314 .padding_top(1)
1315 .padding_bottom(1)
1316 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4);
1317 }
1318 }
1319 }
1320
1321 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4, output_height_gt_2) {
1322 TEST_REQUIRES_ARM_NEON_FMA;
1323 for (size_t input_height = 3; input_height < 5; input_height++) {
1324 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1325 DWConv2DMicrokernelTester()
1326 .input_width(input_width)
1327 .input_height(input_height)
1328 .kernel_height(3)
1329 .kernel_width(3)
1330 .subsampling(1)
1331 .padding_left(1)
1332 .padding_right(1)
1333 .padding_top(1)
1334 .padding_bottom(1)
1335 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4);
1336 }
1337 }
1338 }
1339#endif // XNN_ARCH_ARM64
1340
1341
1342#if XNN_ARCH_ARM64
Marat Dukhanbf715f92020-10-23 20:17:00 -07001343 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_3X4, output_width_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001344 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhanbf715f92020-10-23 20:17:00 -07001345 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07001346 .input_width(4)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001347 .input_height(3)
1348 .kernel_height(3)
1349 .kernel_width(3)
1350 .subsampling(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001351 .padding_left(1)
1352 .padding_right(1)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07001353 .padding_top(1)
1354 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07001355 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001356 }
1357
Marat Dukhanbf715f92020-10-23 20:17:00 -07001358 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_3X4, output_width_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001359 TEST_REQUIRES_ARM_NEON_FMA;
1360 for (size_t input_width = 8; input_width < 32; input_width += 4) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07001361 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07001362 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001363 .input_height(3)
1364 .kernel_height(3)
1365 .kernel_width(3)
1366 .subsampling(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001367 .padding_left(1)
1368 .padding_right(1)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07001369 .padding_top(1)
1370 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07001371 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001372 }
1373 }
1374
Marat Dukhanbf715f92020-10-23 20:17:00 -07001375 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_3X4, output_width_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001376 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001377 for (size_t input_width = 1; input_width < 4; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07001378 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001379 .input_width(4)
1380 .input_height(3)
1381 .kernel_height(3)
1382 .kernel_width(3)
1383 .subsampling(1)
1384 .padding_left(1)
1385 .padding_right(1)
1386 .padding_top(1)
1387 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07001388 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001389 }
1390 }
1391
Marat Dukhanbf715f92020-10-23 20:17:00 -07001392 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_3X4, output_width_gt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001393 TEST_REQUIRES_ARM_NEON_FMA;
1394 for (size_t input_width = 5; input_width < 9; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07001395 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001396 .input_width(input_width)
1397 .input_height(3)
1398 .kernel_height(3)
1399 .kernel_width(3)
1400 .subsampling(1)
1401 .padding_left(1)
1402 .padding_right(1)
1403 .padding_top(1)
1404 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07001405 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001406 }
1407 }
1408
Marat Dukhanbf715f92020-10-23 20:17:00 -07001409 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_3X4, output_height_div_3) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001410 TEST_REQUIRES_ARM_NEON_FMA;
1411 for (size_t input_height = 6; input_height < 24; input_height += 3) {
1412 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07001413 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07001414 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001415 .input_height(input_height)
1416 .kernel_height(3)
1417 .kernel_width(3)
1418 .subsampling(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001419 .padding_left(1)
1420 .padding_right(1)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07001421 .padding_top(1)
1422 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07001423 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001424 }
1425 }
1426 }
1427
Marat Dukhanbf715f92020-10-23 20:17:00 -07001428 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_3X4, output_height_lt_3) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001429 TEST_REQUIRES_ARM_NEON_FMA;
1430 for (size_t input_height = 1; input_height < 3; input_height++) {
1431 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07001432 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001433 .input_width(input_width)
1434 .input_height(input_height)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001435 .kernel_height(3)
1436 .kernel_width(3)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001437 .subsampling(1)
1438 .padding_left(1)
1439 .padding_right(1)
1440 .padding_top(1)
1441 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07001442 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001443 }
1444 }
1445 }
1446
Marat Dukhanbf715f92020-10-23 20:17:00 -07001447 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_3X4, output_height_gt_3) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001448 TEST_REQUIRES_ARM_NEON_FMA;
1449 for (size_t input_height = 4; input_height < 7; input_height++) {
1450 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07001451 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07001452 .input_width(input_width)
1453 .input_height(input_height)
1454 .kernel_height(3)
1455 .kernel_width(3)
1456 .subsampling(1)
1457 .padding_left(1)
1458 .padding_right(1)
1459 .padding_top(1)
1460 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07001461 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001462 }
1463 }
1464 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001465#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001466
1467
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001468#if XNN_ARCH_ARM64
Marat Dukhan1268a242020-10-24 00:36:32 -07001469 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_4X4, output_width_eq_4) {
1470 TEST_REQUIRES_ARM_NEON_FMA;
1471 DWConv2DMicrokernelTester()
1472 .input_width(4)
1473 .input_height(4)
1474 .kernel_height(3)
1475 .kernel_width(3)
1476 .subsampling(1)
1477 .padding_left(1)
1478 .padding_right(1)
1479 .padding_top(1)
1480 .padding_bottom(1)
1481 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4);
1482 }
1483
1484 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_4X4, output_width_div_4) {
1485 TEST_REQUIRES_ARM_NEON_FMA;
1486 for (size_t input_width = 8; input_width < 32; input_width += 4) {
1487 DWConv2DMicrokernelTester()
1488 .input_width(input_width)
1489 .input_height(4)
1490 .kernel_height(3)
1491 .kernel_width(3)
1492 .subsampling(1)
1493 .padding_left(1)
1494 .padding_right(1)
1495 .padding_top(1)
1496 .padding_bottom(1)
1497 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4);
1498 }
1499 }
1500
1501 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_4X4, output_width_lt_4) {
1502 TEST_REQUIRES_ARM_NEON_FMA;
1503 for (size_t input_width = 1; input_width < 4; input_width++) {
1504 DWConv2DMicrokernelTester()
1505 .input_width(4)
1506 .input_height(4)
1507 .kernel_height(3)
1508 .kernel_width(3)
1509 .subsampling(1)
1510 .padding_left(1)
1511 .padding_right(1)
1512 .padding_top(1)
1513 .padding_bottom(1)
1514 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4);
1515 }
1516 }
1517
1518 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_4X4, output_width_gt_4) {
1519 TEST_REQUIRES_ARM_NEON_FMA;
1520 for (size_t input_width = 5; input_width < 9; input_width++) {
1521 DWConv2DMicrokernelTester()
1522 .input_width(input_width)
1523 .input_height(4)
1524 .kernel_height(3)
1525 .kernel_width(3)
1526 .subsampling(1)
1527 .padding_left(1)
1528 .padding_right(1)
1529 .padding_top(1)
1530 .padding_bottom(1)
1531 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4);
1532 }
1533 }
1534
1535 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_4X4, output_height_div_4) {
1536 TEST_REQUIRES_ARM_NEON_FMA;
1537 for (size_t input_height = 8; input_height < 32; input_height += 4) {
1538 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1539 DWConv2DMicrokernelTester()
1540 .input_width(input_width)
1541 .input_height(input_height)
1542 .kernel_height(3)
1543 .kernel_width(3)
1544 .subsampling(1)
1545 .padding_left(1)
1546 .padding_right(1)
1547 .padding_top(1)
1548 .padding_bottom(1)
1549 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4);
1550 }
1551 }
1552 }
1553
1554 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_4X4, output_height_lt_4) {
1555 TEST_REQUIRES_ARM_NEON_FMA;
1556 for (size_t input_height = 1; input_height < 4; input_height++) {
1557 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1558 DWConv2DMicrokernelTester()
1559 .input_width(input_width)
1560 .input_height(input_height)
1561 .kernel_height(3)
1562 .kernel_width(3)
1563 .subsampling(1)
1564 .padding_left(1)
1565 .padding_right(1)
1566 .padding_top(1)
1567 .padding_bottom(1)
1568 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4);
1569 }
1570 }
1571 }
1572
1573 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_4X4, output_height_gt_4) {
1574 TEST_REQUIRES_ARM_NEON_FMA;
1575 for (size_t input_height = 5; input_height < 9; input_height++) {
1576 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1577 DWConv2DMicrokernelTester()
1578 .input_width(input_width)
1579 .input_height(input_height)
1580 .kernel_height(3)
1581 .kernel_width(3)
1582 .subsampling(1)
1583 .padding_left(1)
1584 .padding_right(1)
1585 .padding_top(1)
1586 .padding_bottom(1)
1587 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4);
1588 }
1589 }
1590 }
1591#endif // XNN_ARCH_ARM64
1592
1593
1594#if XNN_ARCH_ARM64
1595 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_5X4, output_width_eq_4) {
1596 TEST_REQUIRES_ARM_NEON_FMA;
1597 DWConv2DMicrokernelTester()
1598 .input_width(4)
1599 .input_height(5)
1600 .kernel_height(3)
1601 .kernel_width(3)
1602 .subsampling(1)
1603 .padding_left(1)
1604 .padding_right(1)
1605 .padding_top(1)
1606 .padding_bottom(1)
1607 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4);
1608 }
1609
1610 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_5X4, output_width_div_4) {
1611 TEST_REQUIRES_ARM_NEON_FMA;
1612 for (size_t input_width = 8; input_width < 32; input_width += 4) {
1613 DWConv2DMicrokernelTester()
1614 .input_width(input_width)
1615 .input_height(5)
1616 .kernel_height(3)
1617 .kernel_width(3)
1618 .subsampling(1)
1619 .padding_left(1)
1620 .padding_right(1)
1621 .padding_top(1)
1622 .padding_bottom(1)
1623 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4);
1624 }
1625 }
1626
1627 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_5X4, output_width_lt_4) {
1628 TEST_REQUIRES_ARM_NEON_FMA;
1629 for (size_t input_width = 1; input_width < 4; input_width++) {
1630 DWConv2DMicrokernelTester()
1631 .input_width(4)
1632 .input_height(5)
1633 .kernel_height(3)
1634 .kernel_width(3)
1635 .subsampling(1)
1636 .padding_left(1)
1637 .padding_right(1)
1638 .padding_top(1)
1639 .padding_bottom(1)
1640 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4);
1641 }
1642 }
1643
1644 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_5X4, output_width_gt_4) {
1645 TEST_REQUIRES_ARM_NEON_FMA;
1646 for (size_t input_width = 5; input_width < 9; input_width++) {
1647 DWConv2DMicrokernelTester()
1648 .input_width(input_width)
1649 .input_height(5)
1650 .kernel_height(3)
1651 .kernel_width(3)
1652 .subsampling(1)
1653 .padding_left(1)
1654 .padding_right(1)
1655 .padding_top(1)
1656 .padding_bottom(1)
1657 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4);
1658 }
1659 }
1660
1661 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_5X4, output_height_div_5) {
1662 TEST_REQUIRES_ARM_NEON_FMA;
1663 for (size_t input_height = 10; input_height < 40; input_height += 5) {
1664 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1665 DWConv2DMicrokernelTester()
1666 .input_width(input_width)
1667 .input_height(input_height)
1668 .kernel_height(3)
1669 .kernel_width(3)
1670 .subsampling(1)
1671 .padding_left(1)
1672 .padding_right(1)
1673 .padding_top(1)
1674 .padding_bottom(1)
1675 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4);
1676 }
1677 }
1678 }
1679
1680 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_5X4, output_height_lt_5) {
1681 TEST_REQUIRES_ARM_NEON_FMA;
1682 for (size_t input_height = 1; input_height < 5; input_height++) {
1683 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1684 DWConv2DMicrokernelTester()
1685 .input_width(input_width)
1686 .input_height(input_height)
1687 .kernel_height(3)
1688 .kernel_width(3)
1689 .subsampling(1)
1690 .padding_left(1)
1691 .padding_right(1)
1692 .padding_top(1)
1693 .padding_bottom(1)
1694 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4);
1695 }
1696 }
1697 }
1698
1699 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_5X4, output_height_gt_5) {
1700 TEST_REQUIRES_ARM_NEON_FMA;
1701 for (size_t input_height = 6; input_height < 11; input_height++) {
1702 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1703 DWConv2DMicrokernelTester()
1704 .input_width(input_width)
1705 .input_height(input_height)
1706 .kernel_height(3)
1707 .kernel_width(3)
1708 .subsampling(1)
1709 .padding_left(1)
1710 .padding_right(1)
1711 .padding_top(1)
1712 .padding_bottom(1)
1713 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4);
1714 }
1715 }
1716 }
1717#endif // XNN_ARCH_ARM64
1718
1719
1720#if XNN_ARCH_ARM64
1721 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_6X4, output_width_eq_4) {
1722 TEST_REQUIRES_ARM_NEON_FMA;
1723 DWConv2DMicrokernelTester()
1724 .input_width(4)
1725 .input_height(6)
1726 .kernel_height(3)
1727 .kernel_width(3)
1728 .subsampling(1)
1729 .padding_left(1)
1730 .padding_right(1)
1731 .padding_top(1)
1732 .padding_bottom(1)
1733 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4);
1734 }
1735
1736 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_6X4, output_width_div_4) {
1737 TEST_REQUIRES_ARM_NEON_FMA;
1738 for (size_t input_width = 8; input_width < 32; input_width += 4) {
1739 DWConv2DMicrokernelTester()
1740 .input_width(input_width)
1741 .input_height(6)
1742 .kernel_height(3)
1743 .kernel_width(3)
1744 .subsampling(1)
1745 .padding_left(1)
1746 .padding_right(1)
1747 .padding_top(1)
1748 .padding_bottom(1)
1749 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4);
1750 }
1751 }
1752
1753 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_6X4, output_width_lt_4) {
1754 TEST_REQUIRES_ARM_NEON_FMA;
1755 for (size_t input_width = 1; input_width < 4; input_width++) {
1756 DWConv2DMicrokernelTester()
1757 .input_width(4)
1758 .input_height(6)
1759 .kernel_height(3)
1760 .kernel_width(3)
1761 .subsampling(1)
1762 .padding_left(1)
1763 .padding_right(1)
1764 .padding_top(1)
1765 .padding_bottom(1)
1766 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4);
1767 }
1768 }
1769
1770 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_6X4, output_width_gt_4) {
1771 TEST_REQUIRES_ARM_NEON_FMA;
1772 for (size_t input_width = 5; input_width < 9; input_width++) {
1773 DWConv2DMicrokernelTester()
1774 .input_width(input_width)
1775 .input_height(6)
1776 .kernel_height(3)
1777 .kernel_width(3)
1778 .subsampling(1)
1779 .padding_left(1)
1780 .padding_right(1)
1781 .padding_top(1)
1782 .padding_bottom(1)
1783 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4);
1784 }
1785 }
1786
1787 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_6X4, output_height_div_6) {
1788 TEST_REQUIRES_ARM_NEON_FMA;
1789 for (size_t input_height = 12; input_height < 48; input_height += 6) {
1790 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1791 DWConv2DMicrokernelTester()
1792 .input_width(input_width)
1793 .input_height(input_height)
1794 .kernel_height(3)
1795 .kernel_width(3)
1796 .subsampling(1)
1797 .padding_left(1)
1798 .padding_right(1)
1799 .padding_top(1)
1800 .padding_bottom(1)
1801 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4);
1802 }
1803 }
1804 }
1805
1806 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_6X4, output_height_lt_6) {
1807 TEST_REQUIRES_ARM_NEON_FMA;
1808 for (size_t input_height = 1; input_height < 6; input_height++) {
1809 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1810 DWConv2DMicrokernelTester()
1811 .input_width(input_width)
1812 .input_height(input_height)
1813 .kernel_height(3)
1814 .kernel_width(3)
1815 .subsampling(1)
1816 .padding_left(1)
1817 .padding_right(1)
1818 .padding_top(1)
1819 .padding_bottom(1)
1820 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4);
1821 }
1822 }
1823 }
1824
1825 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_6X4, output_height_gt_6) {
1826 TEST_REQUIRES_ARM_NEON_FMA;
1827 for (size_t input_height = 7; input_height < 13; input_height++) {
1828 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1829 DWConv2DMicrokernelTester()
1830 .input_width(input_width)
1831 .input_height(input_height)
1832 .kernel_height(3)
1833 .kernel_width(3)
1834 .subsampling(1)
1835 .padding_left(1)
1836 .padding_right(1)
1837 .padding_top(1)
1838 .padding_bottom(1)
1839 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4);
1840 }
1841 }
1842 }
1843#endif // XNN_ARCH_ARM64
1844
1845
1846#if XNN_ARCH_ARM64
1847 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC2, output_width_eq_4) {
1848 TEST_REQUIRES_ARM_NEON_FMA;
1849 DWConv2DMicrokernelTester()
1850 .input_width(4)
1851 .input_height(1)
1852 .kernel_height(3)
1853 .kernel_width(3)
1854 .subsampling(1)
1855 .padding_left(1)
1856 .padding_right(1)
1857 .padding_top(1)
1858 .padding_bottom(1)
1859 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2);
1860 }
1861
1862 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC2, output_width_div_4) {
1863 TEST_REQUIRES_ARM_NEON_FMA;
1864 for (size_t input_width = 8; input_width < 32; input_width += 4) {
1865 DWConv2DMicrokernelTester()
1866 .input_width(input_width)
1867 .input_height(1)
1868 .kernel_height(3)
1869 .kernel_width(3)
1870 .subsampling(1)
1871 .padding_left(1)
1872 .padding_right(1)
1873 .padding_top(1)
1874 .padding_bottom(1)
1875 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2);
1876 }
1877 }
1878
1879 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC2, output_width_lt_4) {
1880 TEST_REQUIRES_ARM_NEON_FMA;
1881 for (size_t input_width = 1; input_width < 4; input_width++) {
1882 DWConv2DMicrokernelTester()
1883 .input_width(4)
1884 .input_height(1)
1885 .kernel_height(3)
1886 .kernel_width(3)
1887 .subsampling(1)
1888 .padding_left(1)
1889 .padding_right(1)
1890 .padding_top(1)
1891 .padding_bottom(1)
1892 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2);
1893 }
1894 }
1895
1896 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC2, output_width_gt_4) {
1897 TEST_REQUIRES_ARM_NEON_FMA;
1898 for (size_t input_width = 5; input_width < 9; input_width++) {
1899 DWConv2DMicrokernelTester()
1900 .input_width(input_width)
1901 .input_height(1)
1902 .kernel_height(3)
1903 .kernel_width(3)
1904 .subsampling(1)
1905 .padding_left(1)
1906 .padding_right(1)
1907 .padding_top(1)
1908 .padding_bottom(1)
1909 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2);
1910 }
1911 }
1912
1913 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC2, output_height_gt_1) {
1914 TEST_REQUIRES_ARM_NEON_FMA;
1915 for (size_t input_height = 2; input_height < 3; input_height++) {
1916 for (size_t input_width = 1; input_width < 21; input_width += 3) {
1917 DWConv2DMicrokernelTester()
1918 .input_width(input_width)
1919 .input_height(input_height)
1920 .kernel_height(3)
1921 .kernel_width(3)
1922 .subsampling(1)
1923 .padding_left(1)
1924 .padding_right(1)
1925 .padding_top(1)
1926 .padding_bottom(1)
1927 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2);
1928 }
1929 }
1930 }
1931#endif // XNN_ARCH_ARM64
1932
1933
1934#if XNN_ARCH_ARM64
1935 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC3, output_width_eq_4) {
1936 TEST_REQUIRES_ARM_NEON_FMA;
1937 DWConv2DMicrokernelTester()
1938 .input_width(4)
1939 .input_height(1)
1940 .kernel_height(3)
1941 .kernel_width(3)
1942 .subsampling(1)
1943 .padding_left(1)
1944 .padding_right(1)
1945 .padding_top(1)
1946 .padding_bottom(1)
1947 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3);
1948 }
1949
1950 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC3, output_width_div_4) {
1951 TEST_REQUIRES_ARM_NEON_FMA;
1952 for (size_t input_width = 8; input_width < 32; input_width += 4) {
1953 DWConv2DMicrokernelTester()
1954 .input_width(input_width)
1955 .input_height(1)
1956 .kernel_height(3)
1957 .kernel_width(3)
1958 .subsampling(1)
1959 .padding_left(1)
1960 .padding_right(1)
1961 .padding_top(1)
1962 .padding_bottom(1)
1963 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3);
1964 }
1965 }
1966
1967 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC3, output_width_lt_4) {
1968 TEST_REQUIRES_ARM_NEON_FMA;
1969 for (size_t input_width = 1; input_width < 4; input_width++) {
1970 DWConv2DMicrokernelTester()
1971 .input_width(4)
1972 .input_height(1)
1973 .kernel_height(3)
1974 .kernel_width(3)
1975 .subsampling(1)
1976 .padding_left(1)
1977 .padding_right(1)
1978 .padding_top(1)
1979 .padding_bottom(1)
1980 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3);
1981 }
1982 }
1983
1984 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC3, output_width_gt_4) {
1985 TEST_REQUIRES_ARM_NEON_FMA;
1986 for (size_t input_width = 5; input_width < 9; input_width++) {
1987 DWConv2DMicrokernelTester()
1988 .input_width(input_width)
1989 .input_height(1)
1990 .kernel_height(3)
1991 .kernel_width(3)
1992 .subsampling(1)
1993 .padding_left(1)
1994 .padding_right(1)
1995 .padding_top(1)
1996 .padding_bottom(1)
1997 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3);
1998 }
1999 }
2000
2001 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC3, output_height_gt_1) {
2002 TEST_REQUIRES_ARM_NEON_FMA;
2003 for (size_t input_height = 2; input_height < 3; input_height++) {
2004 for (size_t input_width = 1; input_width < 21; input_width += 3) {
2005 DWConv2DMicrokernelTester()
2006 .input_width(input_width)
2007 .input_height(input_height)
2008 .kernel_height(3)
2009 .kernel_width(3)
2010 .subsampling(1)
2011 .padding_left(1)
2012 .padding_right(1)
2013 .padding_top(1)
2014 .padding_bottom(1)
2015 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3);
2016 }
2017 }
2018 }
2019#endif // XNN_ARCH_ARM64
2020
2021
2022#if XNN_ARCH_ARM64
2023 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC4, output_width_eq_4) {
2024 TEST_REQUIRES_ARM_NEON_FMA;
2025 DWConv2DMicrokernelTester()
2026 .input_width(4)
2027 .input_height(1)
2028 .kernel_height(3)
2029 .kernel_width(3)
2030 .subsampling(1)
2031 .padding_left(1)
2032 .padding_right(1)
2033 .padding_top(1)
2034 .padding_bottom(1)
2035 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4);
2036 }
2037
2038 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC4, output_width_div_4) {
2039 TEST_REQUIRES_ARM_NEON_FMA;
2040 for (size_t input_width = 8; input_width < 32; input_width += 4) {
2041 DWConv2DMicrokernelTester()
2042 .input_width(input_width)
2043 .input_height(1)
2044 .kernel_height(3)
2045 .kernel_width(3)
2046 .subsampling(1)
2047 .padding_left(1)
2048 .padding_right(1)
2049 .padding_top(1)
2050 .padding_bottom(1)
2051 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4);
2052 }
2053 }
2054
2055 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC4, output_width_lt_4) {
2056 TEST_REQUIRES_ARM_NEON_FMA;
2057 for (size_t input_width = 1; input_width < 4; input_width++) {
2058 DWConv2DMicrokernelTester()
2059 .input_width(4)
2060 .input_height(1)
2061 .kernel_height(3)
2062 .kernel_width(3)
2063 .subsampling(1)
2064 .padding_left(1)
2065 .padding_right(1)
2066 .padding_top(1)
2067 .padding_bottom(1)
2068 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4);
2069 }
2070 }
2071
2072 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC4, output_width_gt_4) {
2073 TEST_REQUIRES_ARM_NEON_FMA;
2074 for (size_t input_width = 5; input_width < 9; input_width++) {
2075 DWConv2DMicrokernelTester()
2076 .input_width(input_width)
2077 .input_height(1)
2078 .kernel_height(3)
2079 .kernel_width(3)
2080 .subsampling(1)
2081 .padding_left(1)
2082 .padding_right(1)
2083 .padding_top(1)
2084 .padding_bottom(1)
2085 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4);
2086 }
2087 }
2088
2089 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_1X4_ACC4, output_height_gt_1) {
2090 TEST_REQUIRES_ARM_NEON_FMA;
2091 for (size_t input_height = 2; input_height < 3; input_height++) {
2092 for (size_t input_width = 1; input_width < 21; input_width += 3) {
2093 DWConv2DMicrokernelTester()
2094 .input_width(input_width)
2095 .input_height(input_height)
2096 .kernel_height(3)
2097 .kernel_width(3)
2098 .subsampling(1)
2099 .padding_left(1)
2100 .padding_right(1)
2101 .padding_top(1)
2102 .padding_bottom(1)
2103 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4);
2104 }
2105 }
2106 }
2107#endif // XNN_ARCH_ARM64
2108
2109
2110#if XNN_ARCH_ARM64
2111 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4_ACC2, output_width_eq_4) {
2112 TEST_REQUIRES_ARM_NEON_FMA;
2113 DWConv2DMicrokernelTester()
2114 .input_width(4)
2115 .input_height(2)
2116 .kernel_height(3)
2117 .kernel_width(3)
2118 .subsampling(1)
2119 .padding_left(1)
2120 .padding_right(1)
2121 .padding_top(1)
2122 .padding_bottom(1)
2123 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2);
2124 }
2125
2126 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4_ACC2, output_width_div_4) {
2127 TEST_REQUIRES_ARM_NEON_FMA;
2128 for (size_t input_width = 8; input_width < 32; input_width += 4) {
2129 DWConv2DMicrokernelTester()
2130 .input_width(input_width)
2131 .input_height(2)
2132 .kernel_height(3)
2133 .kernel_width(3)
2134 .subsampling(1)
2135 .padding_left(1)
2136 .padding_right(1)
2137 .padding_top(1)
2138 .padding_bottom(1)
2139 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2);
2140 }
2141 }
2142
2143 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4_ACC2, output_width_lt_4) {
2144 TEST_REQUIRES_ARM_NEON_FMA;
2145 for (size_t input_width = 1; input_width < 4; input_width++) {
2146 DWConv2DMicrokernelTester()
2147 .input_width(4)
2148 .input_height(2)
2149 .kernel_height(3)
2150 .kernel_width(3)
2151 .subsampling(1)
2152 .padding_left(1)
2153 .padding_right(1)
2154 .padding_top(1)
2155 .padding_bottom(1)
2156 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2);
2157 }
2158 }
2159
2160 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4_ACC2, output_width_gt_4) {
2161 TEST_REQUIRES_ARM_NEON_FMA;
2162 for (size_t input_width = 5; input_width < 9; input_width++) {
2163 DWConv2DMicrokernelTester()
2164 .input_width(input_width)
2165 .input_height(2)
2166 .kernel_height(3)
2167 .kernel_width(3)
2168 .subsampling(1)
2169 .padding_left(1)
2170 .padding_right(1)
2171 .padding_top(1)
2172 .padding_bottom(1)
2173 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2);
2174 }
2175 }
2176
2177 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4_ACC2, output_height_div_2) {
2178 TEST_REQUIRES_ARM_NEON_FMA;
2179 for (size_t input_height = 4; input_height < 16; input_height += 2) {
2180 for (size_t input_width = 1; input_width < 21; input_width += 3) {
2181 DWConv2DMicrokernelTester()
2182 .input_width(input_width)
2183 .input_height(input_height)
2184 .kernel_height(3)
2185 .kernel_width(3)
2186 .subsampling(1)
2187 .padding_left(1)
2188 .padding_right(1)
2189 .padding_top(1)
2190 .padding_bottom(1)
2191 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2);
2192 }
2193 }
2194 }
2195
2196 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4_ACC2, output_height_lt_2) {
2197 TEST_REQUIRES_ARM_NEON_FMA;
2198 for (size_t input_height = 1; input_height < 2; input_height++) {
2199 for (size_t input_width = 1; input_width < 21; input_width += 3) {
2200 DWConv2DMicrokernelTester()
2201 .input_width(input_width)
2202 .input_height(input_height)
2203 .kernel_height(3)
2204 .kernel_width(3)
2205 .subsampling(1)
2206 .padding_left(1)
2207 .padding_right(1)
2208 .padding_top(1)
2209 .padding_bottom(1)
2210 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2);
2211 }
2212 }
2213 }
2214
2215 TEST(F32_DWCONV2D_CHW_3X3P1__NEONFMA_2X4_ACC2, output_height_gt_2) {
2216 TEST_REQUIRES_ARM_NEON_FMA;
2217 for (size_t input_height = 3; input_height < 5; input_height++) {
2218 for (size_t input_width = 1; input_width < 21; input_width += 3) {
2219 DWConv2DMicrokernelTester()
2220 .input_width(input_width)
2221 .input_height(input_height)
2222 .kernel_height(3)
2223 .kernel_width(3)
2224 .subsampling(1)
2225 .padding_left(1)
2226 .padding_right(1)
2227 .padding_top(1)
2228 .padding_bottom(1)
2229 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2);
2230 }
2231 }
2232 }
2233#endif // XNN_ARCH_ARM64
2234
2235
Marat Dukhan82f0c322020-10-25 19:17:35 -07002236#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2237 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4, output_width_eq_4) {
2238 TEST_REQUIRES_ARM_NEON;
2239 for (size_t input_width = 7; input_width < 9; input_width++) {
2240 DWConv2DMicrokernelTester()
2241 .input_width(input_width)
2242 .input_height(2)
2243 .kernel_height(3)
2244 .kernel_width(3)
2245 .subsampling(2)
2246 .padding_left(1)
2247 .padding_right(1)
2248 .padding_top(1)
2249 .padding_bottom(1)
2250 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4);
2251 }
2252 }
2253
2254 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4, output_width_div_4) {
2255 TEST_REQUIRES_ARM_NEON;
2256 for (size_t input_width = 16; input_width < 64; input_width += 8) {
2257 DWConv2DMicrokernelTester()
2258 .input_width(input_width)
2259 .input_height(2)
2260 .kernel_height(3)
2261 .kernel_width(3)
2262 .subsampling(2)
2263 .padding_left(1)
2264 .padding_right(1)
2265 .padding_top(1)
2266 .padding_bottom(1)
2267 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4);
2268 }
2269 }
2270
2271 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4, output_width_lt_4) {
2272 TEST_REQUIRES_ARM_NEON;
2273 for (size_t input_width = 1; input_width < 7; input_width++) {
2274 DWConv2DMicrokernelTester()
2275 .input_width(8)
2276 .input_height(2)
2277 .kernel_height(3)
2278 .kernel_width(3)
2279 .subsampling(2)
2280 .padding_left(1)
2281 .padding_right(1)
2282 .padding_top(1)
2283 .padding_bottom(1)
2284 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4);
2285 }
2286 }
2287
2288 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4, output_width_gt_4) {
2289 TEST_REQUIRES_ARM_NEON;
2290 for (size_t input_width = 9; input_width < 17; input_width++) {
2291 DWConv2DMicrokernelTester()
2292 .input_width(input_width)
2293 .input_height(2)
2294 .kernel_height(3)
2295 .kernel_width(3)
2296 .subsampling(2)
2297 .padding_left(1)
2298 .padding_right(1)
2299 .padding_top(1)
2300 .padding_bottom(1)
2301 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4);
2302 }
2303 }
2304
2305 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4, output_height_eq_1) {
2306 TEST_REQUIRES_ARM_NEON;
2307 for (size_t input_height = 1; input_height < 3; input_height++) {
2308 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2309 DWConv2DMicrokernelTester()
2310 .input_width(input_width)
2311 .input_height(input_height)
2312 .kernel_height(3)
2313 .kernel_width(3)
2314 .subsampling(2)
2315 .padding_left(1)
2316 .padding_right(1)
2317 .padding_top(1)
2318 .padding_bottom(1)
2319 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4);
2320 }
2321 }
2322 }
2323
2324 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4, output_height_gt_1) {
2325 TEST_REQUIRES_ARM_NEON;
2326 for (size_t input_height = 3; input_height < 5; input_height++) {
2327 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2328 DWConv2DMicrokernelTester()
2329 .input_width(input_width)
2330 .input_height(input_height)
2331 .kernel_height(3)
2332 .kernel_width(3)
2333 .subsampling(2)
2334 .padding_left(1)
2335 .padding_right(1)
2336 .padding_top(1)
2337 .padding_bottom(1)
2338 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4);
2339 }
2340 }
2341 }
2342
2343 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4, padding_top_eq_1) {
2344 TEST_REQUIRES_ARM_NEON;
2345 for (size_t input_height = 2; input_height < 8; input_height++) {
2346 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2347 DWConv2DMicrokernelTester()
2348 .input_width(input_width)
2349 .input_height(input_height)
2350 .kernel_height(3)
2351 .kernel_width(3)
2352 .subsampling(2)
2353 .padding_left(1)
2354 .padding_right(1)
2355 .padding_top(0)
2356 .padding_bottom(1)
2357 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4);
2358 }
2359 }
2360 }
2361#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2362
2363
2364#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2365 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4, output_width_eq_4) {
2366 TEST_REQUIRES_ARM_NEON;
2367 for (size_t input_width = 7; input_width < 9; input_width++) {
2368 DWConv2DMicrokernelTester()
2369 .input_width(input_width)
2370 .input_height(4)
2371 .kernel_height(3)
2372 .kernel_width(3)
2373 .subsampling(2)
2374 .padding_left(1)
2375 .padding_right(1)
2376 .padding_top(1)
2377 .padding_bottom(1)
2378 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4);
2379 }
2380 }
2381
2382 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4, output_width_div_4) {
2383 TEST_REQUIRES_ARM_NEON;
2384 for (size_t input_width = 16; input_width < 64; input_width += 8) {
2385 DWConv2DMicrokernelTester()
2386 .input_width(input_width)
2387 .input_height(4)
2388 .kernel_height(3)
2389 .kernel_width(3)
2390 .subsampling(2)
2391 .padding_left(1)
2392 .padding_right(1)
2393 .padding_top(1)
2394 .padding_bottom(1)
2395 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4);
2396 }
2397 }
2398
2399 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4, output_width_lt_4) {
2400 TEST_REQUIRES_ARM_NEON;
2401 for (size_t input_width = 1; input_width < 7; input_width++) {
2402 DWConv2DMicrokernelTester()
2403 .input_width(8)
2404 .input_height(4)
2405 .kernel_height(3)
2406 .kernel_width(3)
2407 .subsampling(2)
2408 .padding_left(1)
2409 .padding_right(1)
2410 .padding_top(1)
2411 .padding_bottom(1)
2412 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4);
2413 }
2414 }
2415
2416 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4, output_width_gt_4) {
2417 TEST_REQUIRES_ARM_NEON;
2418 for (size_t input_width = 9; input_width < 17; input_width++) {
2419 DWConv2DMicrokernelTester()
2420 .input_width(input_width)
2421 .input_height(4)
2422 .kernel_height(3)
2423 .kernel_width(3)
2424 .subsampling(2)
2425 .padding_left(1)
2426 .padding_right(1)
2427 .padding_top(1)
2428 .padding_bottom(1)
2429 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4);
2430 }
2431 }
2432
2433 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4, output_height_eq_2) {
2434 TEST_REQUIRES_ARM_NEON;
2435 for (size_t input_height = 3; input_height < 5; input_height++) {
2436 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2437 DWConv2DMicrokernelTester()
2438 .input_width(input_width)
2439 .input_height(input_height)
2440 .kernel_height(3)
2441 .kernel_width(3)
2442 .subsampling(2)
2443 .padding_left(1)
2444 .padding_right(1)
2445 .padding_top(1)
2446 .padding_bottom(1)
2447 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4);
2448 }
2449 }
2450 }
2451
2452 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4, output_height_div_2) {
2453 TEST_REQUIRES_ARM_NEON;
2454 for (size_t input_height = 8; input_height < 32; input_height += 4) {
2455 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2456 DWConv2DMicrokernelTester()
2457 .input_width(input_width)
2458 .input_height(input_height)
2459 .kernel_height(3)
2460 .kernel_width(3)
2461 .subsampling(2)
2462 .padding_left(1)
2463 .padding_right(1)
2464 .padding_top(1)
2465 .padding_bottom(1)
2466 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4);
2467 }
2468 }
2469 }
2470
2471 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4, output_height_lt_2) {
2472 TEST_REQUIRES_ARM_NEON;
2473 for (size_t input_height = 1; input_height < 3; input_height++) {
2474 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2475 DWConv2DMicrokernelTester()
2476 .input_width(input_width)
2477 .input_height(input_height)
2478 .kernel_height(3)
2479 .kernel_width(3)
2480 .subsampling(2)
2481 .padding_left(1)
2482 .padding_right(1)
2483 .padding_top(1)
2484 .padding_bottom(1)
2485 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4);
2486 }
2487 }
2488 }
2489
2490 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4, output_height_gt_2) {
2491 TEST_REQUIRES_ARM_NEON;
2492 for (size_t input_height = 5; input_height < 9; input_height++) {
2493 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2494 DWConv2DMicrokernelTester()
2495 .input_width(input_width)
2496 .input_height(input_height)
2497 .kernel_height(3)
2498 .kernel_width(3)
2499 .subsampling(2)
2500 .padding_left(1)
2501 .padding_right(1)
2502 .padding_top(1)
2503 .padding_bottom(1)
2504 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4);
2505 }
2506 }
2507 }
2508
2509 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4, padding_top_eq_1) {
2510 TEST_REQUIRES_ARM_NEON;
2511 for (size_t input_height = 2; input_height < 14; input_height++) {
2512 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2513 DWConv2DMicrokernelTester()
2514 .input_width(input_width)
2515 .input_height(input_height)
2516 .kernel_height(3)
2517 .kernel_width(3)
2518 .subsampling(2)
2519 .padding_left(1)
2520 .padding_right(1)
2521 .padding_top(0)
2522 .padding_bottom(1)
2523 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4);
2524 }
2525 }
2526 }
2527#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2528
2529
2530#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2531 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_3X4, output_width_eq_4) {
2532 TEST_REQUIRES_ARM_NEON;
2533 for (size_t input_width = 7; input_width < 9; input_width++) {
2534 DWConv2DMicrokernelTester()
2535 .input_width(input_width)
2536 .input_height(6)
2537 .kernel_height(3)
2538 .kernel_width(3)
2539 .subsampling(2)
2540 .padding_left(1)
2541 .padding_right(1)
2542 .padding_top(1)
2543 .padding_bottom(1)
2544 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4);
2545 }
2546 }
2547
2548 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_3X4, output_width_div_4) {
2549 TEST_REQUIRES_ARM_NEON;
2550 for (size_t input_width = 16; input_width < 64; input_width += 8) {
2551 DWConv2DMicrokernelTester()
2552 .input_width(input_width)
2553 .input_height(6)
2554 .kernel_height(3)
2555 .kernel_width(3)
2556 .subsampling(2)
2557 .padding_left(1)
2558 .padding_right(1)
2559 .padding_top(1)
2560 .padding_bottom(1)
2561 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4);
2562 }
2563 }
2564
2565 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_3X4, output_width_lt_4) {
2566 TEST_REQUIRES_ARM_NEON;
2567 for (size_t input_width = 1; input_width < 7; input_width++) {
2568 DWConv2DMicrokernelTester()
2569 .input_width(8)
2570 .input_height(6)
2571 .kernel_height(3)
2572 .kernel_width(3)
2573 .subsampling(2)
2574 .padding_left(1)
2575 .padding_right(1)
2576 .padding_top(1)
2577 .padding_bottom(1)
2578 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4);
2579 }
2580 }
2581
2582 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_3X4, output_width_gt_4) {
2583 TEST_REQUIRES_ARM_NEON;
2584 for (size_t input_width = 9; input_width < 17; input_width++) {
2585 DWConv2DMicrokernelTester()
2586 .input_width(input_width)
2587 .input_height(6)
2588 .kernel_height(3)
2589 .kernel_width(3)
2590 .subsampling(2)
2591 .padding_left(1)
2592 .padding_right(1)
2593 .padding_top(1)
2594 .padding_bottom(1)
2595 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4);
2596 }
2597 }
2598
2599 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_3X4, output_height_eq_3) {
2600 TEST_REQUIRES_ARM_NEON;
2601 for (size_t input_height = 5; input_height < 7; input_height++) {
2602 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2603 DWConv2DMicrokernelTester()
2604 .input_width(input_width)
2605 .input_height(input_height)
2606 .kernel_height(3)
2607 .kernel_width(3)
2608 .subsampling(2)
2609 .padding_left(1)
2610 .padding_right(1)
2611 .padding_top(1)
2612 .padding_bottom(1)
2613 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4);
2614 }
2615 }
2616 }
2617
2618 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_3X4, output_height_div_3) {
2619 TEST_REQUIRES_ARM_NEON;
2620 for (size_t input_height = 12; input_height < 48; input_height += 6) {
2621 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2622 DWConv2DMicrokernelTester()
2623 .input_width(input_width)
2624 .input_height(input_height)
2625 .kernel_height(3)
2626 .kernel_width(3)
2627 .subsampling(2)
2628 .padding_left(1)
2629 .padding_right(1)
2630 .padding_top(1)
2631 .padding_bottom(1)
2632 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4);
2633 }
2634 }
2635 }
2636
2637 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_3X4, output_height_lt_3) {
2638 TEST_REQUIRES_ARM_NEON;
2639 for (size_t input_height = 1; input_height < 5; input_height++) {
2640 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2641 DWConv2DMicrokernelTester()
2642 .input_width(input_width)
2643 .input_height(input_height)
2644 .kernel_height(3)
2645 .kernel_width(3)
2646 .subsampling(2)
2647 .padding_left(1)
2648 .padding_right(1)
2649 .padding_top(1)
2650 .padding_bottom(1)
2651 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4);
2652 }
2653 }
2654 }
2655
2656 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_3X4, output_height_gt_3) {
2657 TEST_REQUIRES_ARM_NEON;
2658 for (size_t input_height = 7; input_height < 13; input_height++) {
2659 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2660 DWConv2DMicrokernelTester()
2661 .input_width(input_width)
2662 .input_height(input_height)
2663 .kernel_height(3)
2664 .kernel_width(3)
2665 .subsampling(2)
2666 .padding_left(1)
2667 .padding_right(1)
2668 .padding_top(1)
2669 .padding_bottom(1)
2670 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4);
2671 }
2672 }
2673 }
2674
2675 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_3X4, padding_top_eq_1) {
2676 TEST_REQUIRES_ARM_NEON;
2677 for (size_t input_height = 2; input_height < 20; input_height++) {
2678 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2679 DWConv2DMicrokernelTester()
2680 .input_width(input_width)
2681 .input_height(input_height)
2682 .kernel_height(3)
2683 .kernel_width(3)
2684 .subsampling(2)
2685 .padding_left(1)
2686 .padding_right(1)
2687 .padding_top(0)
2688 .padding_bottom(1)
2689 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4);
2690 }
2691 }
2692 }
2693#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2694
2695
2696#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2697 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_4X4, output_width_eq_4) {
2698 TEST_REQUIRES_ARM_NEON;
2699 for (size_t input_width = 7; input_width < 9; input_width++) {
2700 DWConv2DMicrokernelTester()
2701 .input_width(input_width)
2702 .input_height(8)
2703 .kernel_height(3)
2704 .kernel_width(3)
2705 .subsampling(2)
2706 .padding_left(1)
2707 .padding_right(1)
2708 .padding_top(1)
2709 .padding_bottom(1)
2710 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4);
2711 }
2712 }
2713
2714 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_4X4, output_width_div_4) {
2715 TEST_REQUIRES_ARM_NEON;
2716 for (size_t input_width = 16; input_width < 64; input_width += 8) {
2717 DWConv2DMicrokernelTester()
2718 .input_width(input_width)
2719 .input_height(8)
2720 .kernel_height(3)
2721 .kernel_width(3)
2722 .subsampling(2)
2723 .padding_left(1)
2724 .padding_right(1)
2725 .padding_top(1)
2726 .padding_bottom(1)
2727 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4);
2728 }
2729 }
2730
2731 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_4X4, output_width_lt_4) {
2732 TEST_REQUIRES_ARM_NEON;
2733 for (size_t input_width = 1; input_width < 7; input_width++) {
2734 DWConv2DMicrokernelTester()
2735 .input_width(8)
2736 .input_height(8)
2737 .kernel_height(3)
2738 .kernel_width(3)
2739 .subsampling(2)
2740 .padding_left(1)
2741 .padding_right(1)
2742 .padding_top(1)
2743 .padding_bottom(1)
2744 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4);
2745 }
2746 }
2747
2748 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_4X4, output_width_gt_4) {
2749 TEST_REQUIRES_ARM_NEON;
2750 for (size_t input_width = 9; input_width < 17; input_width++) {
2751 DWConv2DMicrokernelTester()
2752 .input_width(input_width)
2753 .input_height(8)
2754 .kernel_height(3)
2755 .kernel_width(3)
2756 .subsampling(2)
2757 .padding_left(1)
2758 .padding_right(1)
2759 .padding_top(1)
2760 .padding_bottom(1)
2761 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4);
2762 }
2763 }
2764
2765 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_4X4, output_height_eq_4) {
2766 TEST_REQUIRES_ARM_NEON;
2767 for (size_t input_height = 7; input_height < 9; input_height++) {
2768 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2769 DWConv2DMicrokernelTester()
2770 .input_width(input_width)
2771 .input_height(input_height)
2772 .kernel_height(3)
2773 .kernel_width(3)
2774 .subsampling(2)
2775 .padding_left(1)
2776 .padding_right(1)
2777 .padding_top(1)
2778 .padding_bottom(1)
2779 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4);
2780 }
2781 }
2782 }
2783
2784 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_4X4, output_height_div_4) {
2785 TEST_REQUIRES_ARM_NEON;
2786 for (size_t input_height = 16; input_height < 64; input_height += 8) {
2787 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2788 DWConv2DMicrokernelTester()
2789 .input_width(input_width)
2790 .input_height(input_height)
2791 .kernel_height(3)
2792 .kernel_width(3)
2793 .subsampling(2)
2794 .padding_left(1)
2795 .padding_right(1)
2796 .padding_top(1)
2797 .padding_bottom(1)
2798 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4);
2799 }
2800 }
2801 }
2802
2803 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_4X4, output_height_lt_4) {
2804 TEST_REQUIRES_ARM_NEON;
2805 for (size_t input_height = 1; input_height < 7; input_height++) {
2806 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2807 DWConv2DMicrokernelTester()
2808 .input_width(input_width)
2809 .input_height(input_height)
2810 .kernel_height(3)
2811 .kernel_width(3)
2812 .subsampling(2)
2813 .padding_left(1)
2814 .padding_right(1)
2815 .padding_top(1)
2816 .padding_bottom(1)
2817 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4);
2818 }
2819 }
2820 }
2821
2822 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_4X4, output_height_gt_4) {
2823 TEST_REQUIRES_ARM_NEON;
2824 for (size_t input_height = 9; input_height < 17; input_height++) {
2825 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2826 DWConv2DMicrokernelTester()
2827 .input_width(input_width)
2828 .input_height(input_height)
2829 .kernel_height(3)
2830 .kernel_width(3)
2831 .subsampling(2)
2832 .padding_left(1)
2833 .padding_right(1)
2834 .padding_top(1)
2835 .padding_bottom(1)
2836 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4);
2837 }
2838 }
2839 }
2840
2841 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_4X4, padding_top_eq_1) {
2842 TEST_REQUIRES_ARM_NEON;
2843 for (size_t input_height = 2; input_height < 26; input_height++) {
2844 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2845 DWConv2DMicrokernelTester()
2846 .input_width(input_width)
2847 .input_height(input_height)
2848 .kernel_height(3)
2849 .kernel_width(3)
2850 .subsampling(2)
2851 .padding_left(1)
2852 .padding_right(1)
2853 .padding_top(0)
2854 .padding_bottom(1)
2855 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4);
2856 }
2857 }
2858 }
2859#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2860
2861
2862#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2863 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC2, output_width_eq_4) {
2864 TEST_REQUIRES_ARM_NEON;
2865 for (size_t input_width = 7; input_width < 9; input_width++) {
2866 DWConv2DMicrokernelTester()
2867 .input_width(input_width)
2868 .input_height(2)
2869 .kernel_height(3)
2870 .kernel_width(3)
2871 .subsampling(2)
2872 .padding_left(1)
2873 .padding_right(1)
2874 .padding_top(1)
2875 .padding_bottom(1)
2876 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2);
2877 }
2878 }
2879
2880 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC2, output_width_div_4) {
2881 TEST_REQUIRES_ARM_NEON;
2882 for (size_t input_width = 16; input_width < 64; input_width += 8) {
2883 DWConv2DMicrokernelTester()
2884 .input_width(input_width)
2885 .input_height(2)
2886 .kernel_height(3)
2887 .kernel_width(3)
2888 .subsampling(2)
2889 .padding_left(1)
2890 .padding_right(1)
2891 .padding_top(1)
2892 .padding_bottom(1)
2893 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2);
2894 }
2895 }
2896
2897 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC2, output_width_lt_4) {
2898 TEST_REQUIRES_ARM_NEON;
2899 for (size_t input_width = 1; input_width < 7; input_width++) {
2900 DWConv2DMicrokernelTester()
2901 .input_width(8)
2902 .input_height(2)
2903 .kernel_height(3)
2904 .kernel_width(3)
2905 .subsampling(2)
2906 .padding_left(1)
2907 .padding_right(1)
2908 .padding_top(1)
2909 .padding_bottom(1)
2910 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2);
2911 }
2912 }
2913
2914 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC2, output_width_gt_4) {
2915 TEST_REQUIRES_ARM_NEON;
2916 for (size_t input_width = 9; input_width < 17; input_width++) {
2917 DWConv2DMicrokernelTester()
2918 .input_width(input_width)
2919 .input_height(2)
2920 .kernel_height(3)
2921 .kernel_width(3)
2922 .subsampling(2)
2923 .padding_left(1)
2924 .padding_right(1)
2925 .padding_top(1)
2926 .padding_bottom(1)
2927 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2);
2928 }
2929 }
2930
2931 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC2, output_height_eq_1) {
2932 TEST_REQUIRES_ARM_NEON;
2933 for (size_t input_height = 1; input_height < 3; input_height++) {
2934 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2935 DWConv2DMicrokernelTester()
2936 .input_width(input_width)
2937 .input_height(input_height)
2938 .kernel_height(3)
2939 .kernel_width(3)
2940 .subsampling(2)
2941 .padding_left(1)
2942 .padding_right(1)
2943 .padding_top(1)
2944 .padding_bottom(1)
2945 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2);
2946 }
2947 }
2948 }
2949
2950 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC2, output_height_gt_1) {
2951 TEST_REQUIRES_ARM_NEON;
2952 for (size_t input_height = 3; input_height < 5; input_height++) {
2953 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2954 DWConv2DMicrokernelTester()
2955 .input_width(input_width)
2956 .input_height(input_height)
2957 .kernel_height(3)
2958 .kernel_width(3)
2959 .subsampling(2)
2960 .padding_left(1)
2961 .padding_right(1)
2962 .padding_top(1)
2963 .padding_bottom(1)
2964 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2);
2965 }
2966 }
2967 }
2968
2969 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC2, padding_top_eq_1) {
2970 TEST_REQUIRES_ARM_NEON;
2971 for (size_t input_height = 2; input_height < 8; input_height++) {
2972 for (size_t input_width = 1; input_width < 41; input_width += 7) {
2973 DWConv2DMicrokernelTester()
2974 .input_width(input_width)
2975 .input_height(input_height)
2976 .kernel_height(3)
2977 .kernel_width(3)
2978 .subsampling(2)
2979 .padding_left(1)
2980 .padding_right(1)
2981 .padding_top(0)
2982 .padding_bottom(1)
2983 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2);
2984 }
2985 }
2986 }
2987#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2988
2989
2990#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2991 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC3, output_width_eq_4) {
2992 TEST_REQUIRES_ARM_NEON;
2993 for (size_t input_width = 7; input_width < 9; input_width++) {
2994 DWConv2DMicrokernelTester()
2995 .input_width(input_width)
2996 .input_height(2)
2997 .kernel_height(3)
2998 .kernel_width(3)
2999 .subsampling(2)
3000 .padding_left(1)
3001 .padding_right(1)
3002 .padding_top(1)
3003 .padding_bottom(1)
3004 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3);
3005 }
3006 }
3007
3008 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC3, output_width_div_4) {
3009 TEST_REQUIRES_ARM_NEON;
3010 for (size_t input_width = 16; input_width < 64; input_width += 8) {
3011 DWConv2DMicrokernelTester()
3012 .input_width(input_width)
3013 .input_height(2)
3014 .kernel_height(3)
3015 .kernel_width(3)
3016 .subsampling(2)
3017 .padding_left(1)
3018 .padding_right(1)
3019 .padding_top(1)
3020 .padding_bottom(1)
3021 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3);
3022 }
3023 }
3024
3025 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC3, output_width_lt_4) {
3026 TEST_REQUIRES_ARM_NEON;
3027 for (size_t input_width = 1; input_width < 7; input_width++) {
3028 DWConv2DMicrokernelTester()
3029 .input_width(8)
3030 .input_height(2)
3031 .kernel_height(3)
3032 .kernel_width(3)
3033 .subsampling(2)
3034 .padding_left(1)
3035 .padding_right(1)
3036 .padding_top(1)
3037 .padding_bottom(1)
3038 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3);
3039 }
3040 }
3041
3042 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC3, output_width_gt_4) {
3043 TEST_REQUIRES_ARM_NEON;
3044 for (size_t input_width = 9; input_width < 17; input_width++) {
3045 DWConv2DMicrokernelTester()
3046 .input_width(input_width)
3047 .input_height(2)
3048 .kernel_height(3)
3049 .kernel_width(3)
3050 .subsampling(2)
3051 .padding_left(1)
3052 .padding_right(1)
3053 .padding_top(1)
3054 .padding_bottom(1)
3055 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3);
3056 }
3057 }
3058
3059 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC3, output_height_eq_1) {
3060 TEST_REQUIRES_ARM_NEON;
3061 for (size_t input_height = 1; input_height < 3; input_height++) {
3062 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3063 DWConv2DMicrokernelTester()
3064 .input_width(input_width)
3065 .input_height(input_height)
3066 .kernel_height(3)
3067 .kernel_width(3)
3068 .subsampling(2)
3069 .padding_left(1)
3070 .padding_right(1)
3071 .padding_top(1)
3072 .padding_bottom(1)
3073 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3);
3074 }
3075 }
3076 }
3077
3078 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC3, output_height_gt_1) {
3079 TEST_REQUIRES_ARM_NEON;
3080 for (size_t input_height = 3; input_height < 5; input_height++) {
3081 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3082 DWConv2DMicrokernelTester()
3083 .input_width(input_width)
3084 .input_height(input_height)
3085 .kernel_height(3)
3086 .kernel_width(3)
3087 .subsampling(2)
3088 .padding_left(1)
3089 .padding_right(1)
3090 .padding_top(1)
3091 .padding_bottom(1)
3092 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3);
3093 }
3094 }
3095 }
3096
3097 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC3, padding_top_eq_1) {
3098 TEST_REQUIRES_ARM_NEON;
3099 for (size_t input_height = 2; input_height < 8; input_height++) {
3100 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3101 DWConv2DMicrokernelTester()
3102 .input_width(input_width)
3103 .input_height(input_height)
3104 .kernel_height(3)
3105 .kernel_width(3)
3106 .subsampling(2)
3107 .padding_left(1)
3108 .padding_right(1)
3109 .padding_top(0)
3110 .padding_bottom(1)
3111 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3);
3112 }
3113 }
3114 }
3115#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3116
3117
3118#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3119 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC4, output_width_eq_4) {
3120 TEST_REQUIRES_ARM_NEON;
3121 for (size_t input_width = 7; input_width < 9; input_width++) {
3122 DWConv2DMicrokernelTester()
3123 .input_width(input_width)
3124 .input_height(2)
3125 .kernel_height(3)
3126 .kernel_width(3)
3127 .subsampling(2)
3128 .padding_left(1)
3129 .padding_right(1)
3130 .padding_top(1)
3131 .padding_bottom(1)
3132 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4);
3133 }
3134 }
3135
3136 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC4, output_width_div_4) {
3137 TEST_REQUIRES_ARM_NEON;
3138 for (size_t input_width = 16; input_width < 64; input_width += 8) {
3139 DWConv2DMicrokernelTester()
3140 .input_width(input_width)
3141 .input_height(2)
3142 .kernel_height(3)
3143 .kernel_width(3)
3144 .subsampling(2)
3145 .padding_left(1)
3146 .padding_right(1)
3147 .padding_top(1)
3148 .padding_bottom(1)
3149 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4);
3150 }
3151 }
3152
3153 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC4, output_width_lt_4) {
3154 TEST_REQUIRES_ARM_NEON;
3155 for (size_t input_width = 1; input_width < 7; input_width++) {
3156 DWConv2DMicrokernelTester()
3157 .input_width(8)
3158 .input_height(2)
3159 .kernel_height(3)
3160 .kernel_width(3)
3161 .subsampling(2)
3162 .padding_left(1)
3163 .padding_right(1)
3164 .padding_top(1)
3165 .padding_bottom(1)
3166 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4);
3167 }
3168 }
3169
3170 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC4, output_width_gt_4) {
3171 TEST_REQUIRES_ARM_NEON;
3172 for (size_t input_width = 9; input_width < 17; input_width++) {
3173 DWConv2DMicrokernelTester()
3174 .input_width(input_width)
3175 .input_height(2)
3176 .kernel_height(3)
3177 .kernel_width(3)
3178 .subsampling(2)
3179 .padding_left(1)
3180 .padding_right(1)
3181 .padding_top(1)
3182 .padding_bottom(1)
3183 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4);
3184 }
3185 }
3186
3187 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC4, output_height_eq_1) {
3188 TEST_REQUIRES_ARM_NEON;
3189 for (size_t input_height = 1; input_height < 3; input_height++) {
3190 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3191 DWConv2DMicrokernelTester()
3192 .input_width(input_width)
3193 .input_height(input_height)
3194 .kernel_height(3)
3195 .kernel_width(3)
3196 .subsampling(2)
3197 .padding_left(1)
3198 .padding_right(1)
3199 .padding_top(1)
3200 .padding_bottom(1)
3201 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4);
3202 }
3203 }
3204 }
3205
3206 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC4, output_height_gt_1) {
3207 TEST_REQUIRES_ARM_NEON;
3208 for (size_t input_height = 3; input_height < 5; input_height++) {
3209 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3210 DWConv2DMicrokernelTester()
3211 .input_width(input_width)
3212 .input_height(input_height)
3213 .kernel_height(3)
3214 .kernel_width(3)
3215 .subsampling(2)
3216 .padding_left(1)
3217 .padding_right(1)
3218 .padding_top(1)
3219 .padding_bottom(1)
3220 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4);
3221 }
3222 }
3223 }
3224
3225 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_1X4_ACC4, padding_top_eq_1) {
3226 TEST_REQUIRES_ARM_NEON;
3227 for (size_t input_height = 2; input_height < 8; input_height++) {
3228 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3229 DWConv2DMicrokernelTester()
3230 .input_width(input_width)
3231 .input_height(input_height)
3232 .kernel_height(3)
3233 .kernel_width(3)
3234 .subsampling(2)
3235 .padding_left(1)
3236 .padding_right(1)
3237 .padding_top(0)
3238 .padding_bottom(1)
3239 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4);
3240 }
3241 }
3242 }
3243#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3244
3245
3246#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3247 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4_ACC2, output_width_eq_4) {
3248 TEST_REQUIRES_ARM_NEON;
3249 for (size_t input_width = 7; input_width < 9; input_width++) {
3250 DWConv2DMicrokernelTester()
3251 .input_width(input_width)
3252 .input_height(4)
3253 .kernel_height(3)
3254 .kernel_width(3)
3255 .subsampling(2)
3256 .padding_left(1)
3257 .padding_right(1)
3258 .padding_top(1)
3259 .padding_bottom(1)
3260 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2);
3261 }
3262 }
3263
3264 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4_ACC2, output_width_div_4) {
3265 TEST_REQUIRES_ARM_NEON;
3266 for (size_t input_width = 16; input_width < 64; input_width += 8) {
3267 DWConv2DMicrokernelTester()
3268 .input_width(input_width)
3269 .input_height(4)
3270 .kernel_height(3)
3271 .kernel_width(3)
3272 .subsampling(2)
3273 .padding_left(1)
3274 .padding_right(1)
3275 .padding_top(1)
3276 .padding_bottom(1)
3277 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2);
3278 }
3279 }
3280
3281 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4_ACC2, output_width_lt_4) {
3282 TEST_REQUIRES_ARM_NEON;
3283 for (size_t input_width = 1; input_width < 7; input_width++) {
3284 DWConv2DMicrokernelTester()
3285 .input_width(8)
3286 .input_height(4)
3287 .kernel_height(3)
3288 .kernel_width(3)
3289 .subsampling(2)
3290 .padding_left(1)
3291 .padding_right(1)
3292 .padding_top(1)
3293 .padding_bottom(1)
3294 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2);
3295 }
3296 }
3297
3298 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4_ACC2, output_width_gt_4) {
3299 TEST_REQUIRES_ARM_NEON;
3300 for (size_t input_width = 9; input_width < 17; input_width++) {
3301 DWConv2DMicrokernelTester()
3302 .input_width(input_width)
3303 .input_height(4)
3304 .kernel_height(3)
3305 .kernel_width(3)
3306 .subsampling(2)
3307 .padding_left(1)
3308 .padding_right(1)
3309 .padding_top(1)
3310 .padding_bottom(1)
3311 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2);
3312 }
3313 }
3314
3315 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4_ACC2, output_height_eq_2) {
3316 TEST_REQUIRES_ARM_NEON;
3317 for (size_t input_height = 3; input_height < 5; input_height++) {
3318 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3319 DWConv2DMicrokernelTester()
3320 .input_width(input_width)
3321 .input_height(input_height)
3322 .kernel_height(3)
3323 .kernel_width(3)
3324 .subsampling(2)
3325 .padding_left(1)
3326 .padding_right(1)
3327 .padding_top(1)
3328 .padding_bottom(1)
3329 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2);
3330 }
3331 }
3332 }
3333
3334 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4_ACC2, output_height_div_2) {
3335 TEST_REQUIRES_ARM_NEON;
3336 for (size_t input_height = 8; input_height < 32; input_height += 4) {
3337 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3338 DWConv2DMicrokernelTester()
3339 .input_width(input_width)
3340 .input_height(input_height)
3341 .kernel_height(3)
3342 .kernel_width(3)
3343 .subsampling(2)
3344 .padding_left(1)
3345 .padding_right(1)
3346 .padding_top(1)
3347 .padding_bottom(1)
3348 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2);
3349 }
3350 }
3351 }
3352
3353 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4_ACC2, output_height_lt_2) {
3354 TEST_REQUIRES_ARM_NEON;
3355 for (size_t input_height = 1; input_height < 3; input_height++) {
3356 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3357 DWConv2DMicrokernelTester()
3358 .input_width(input_width)
3359 .input_height(input_height)
3360 .kernel_height(3)
3361 .kernel_width(3)
3362 .subsampling(2)
3363 .padding_left(1)
3364 .padding_right(1)
3365 .padding_top(1)
3366 .padding_bottom(1)
3367 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2);
3368 }
3369 }
3370 }
3371
3372 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4_ACC2, output_height_gt_2) {
3373 TEST_REQUIRES_ARM_NEON;
3374 for (size_t input_height = 5; input_height < 9; input_height++) {
3375 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3376 DWConv2DMicrokernelTester()
3377 .input_width(input_width)
3378 .input_height(input_height)
3379 .kernel_height(3)
3380 .kernel_width(3)
3381 .subsampling(2)
3382 .padding_left(1)
3383 .padding_right(1)
3384 .padding_top(1)
3385 .padding_bottom(1)
3386 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2);
3387 }
3388 }
3389 }
3390
3391 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEON_2X4_ACC2, padding_top_eq_1) {
3392 TEST_REQUIRES_ARM_NEON;
3393 for (size_t input_height = 2; input_height < 14; input_height++) {
3394 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3395 DWConv2DMicrokernelTester()
3396 .input_width(input_width)
3397 .input_height(input_height)
3398 .kernel_height(3)
3399 .kernel_width(3)
3400 .subsampling(2)
3401 .padding_left(1)
3402 .padding_right(1)
3403 .padding_top(0)
3404 .padding_bottom(1)
3405 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2);
3406 }
3407 }
3408 }
3409#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3410
3411
3412#if XNN_ARCH_ARM64
3413 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4, output_width_eq_4) {
3414 TEST_REQUIRES_ARM_NEON_FMA;
3415 for (size_t input_width = 7; input_width < 9; input_width++) {
3416 DWConv2DMicrokernelTester()
3417 .input_width(input_width)
3418 .input_height(2)
3419 .kernel_height(3)
3420 .kernel_width(3)
3421 .subsampling(2)
3422 .padding_left(1)
3423 .padding_right(1)
3424 .padding_top(1)
3425 .padding_bottom(1)
3426 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4);
3427 }
3428 }
3429
3430 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4, output_width_div_4) {
3431 TEST_REQUIRES_ARM_NEON_FMA;
3432 for (size_t input_width = 16; input_width < 64; input_width += 8) {
3433 DWConv2DMicrokernelTester()
3434 .input_width(input_width)
3435 .input_height(2)
3436 .kernel_height(3)
3437 .kernel_width(3)
3438 .subsampling(2)
3439 .padding_left(1)
3440 .padding_right(1)
3441 .padding_top(1)
3442 .padding_bottom(1)
3443 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4);
3444 }
3445 }
3446
3447 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4, output_width_lt_4) {
3448 TEST_REQUIRES_ARM_NEON_FMA;
3449 for (size_t input_width = 1; input_width < 7; input_width++) {
3450 DWConv2DMicrokernelTester()
3451 .input_width(8)
3452 .input_height(2)
3453 .kernel_height(3)
3454 .kernel_width(3)
3455 .subsampling(2)
3456 .padding_left(1)
3457 .padding_right(1)
3458 .padding_top(1)
3459 .padding_bottom(1)
3460 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4);
3461 }
3462 }
3463
3464 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4, output_width_gt_4) {
3465 TEST_REQUIRES_ARM_NEON_FMA;
3466 for (size_t input_width = 9; input_width < 17; input_width++) {
3467 DWConv2DMicrokernelTester()
3468 .input_width(input_width)
3469 .input_height(2)
3470 .kernel_height(3)
3471 .kernel_width(3)
3472 .subsampling(2)
3473 .padding_left(1)
3474 .padding_right(1)
3475 .padding_top(1)
3476 .padding_bottom(1)
3477 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4);
3478 }
3479 }
3480
3481 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4, output_height_eq_1) {
3482 TEST_REQUIRES_ARM_NEON_FMA;
3483 for (size_t input_height = 1; input_height < 3; input_height++) {
3484 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3485 DWConv2DMicrokernelTester()
3486 .input_width(input_width)
3487 .input_height(input_height)
3488 .kernel_height(3)
3489 .kernel_width(3)
3490 .subsampling(2)
3491 .padding_left(1)
3492 .padding_right(1)
3493 .padding_top(1)
3494 .padding_bottom(1)
3495 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4);
3496 }
3497 }
3498 }
3499
3500 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4, output_height_gt_1) {
3501 TEST_REQUIRES_ARM_NEON_FMA;
3502 for (size_t input_height = 3; input_height < 5; input_height++) {
3503 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3504 DWConv2DMicrokernelTester()
3505 .input_width(input_width)
3506 .input_height(input_height)
3507 .kernel_height(3)
3508 .kernel_width(3)
3509 .subsampling(2)
3510 .padding_left(1)
3511 .padding_right(1)
3512 .padding_top(1)
3513 .padding_bottom(1)
3514 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4);
3515 }
3516 }
3517 }
3518
3519 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4, padding_top_eq_1) {
3520 TEST_REQUIRES_ARM_NEON_FMA;
3521 for (size_t input_height = 2; input_height < 8; input_height++) {
3522 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3523 DWConv2DMicrokernelTester()
3524 .input_width(input_width)
3525 .input_height(input_height)
3526 .kernel_height(3)
3527 .kernel_width(3)
3528 .subsampling(2)
3529 .padding_left(1)
3530 .padding_right(1)
3531 .padding_top(0)
3532 .padding_bottom(1)
3533 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4);
3534 }
3535 }
3536 }
3537#endif // XNN_ARCH_ARM64
3538
3539
3540#if XNN_ARCH_ARM64
3541 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4, output_width_eq_4) {
3542 TEST_REQUIRES_ARM_NEON_FMA;
3543 for (size_t input_width = 7; input_width < 9; input_width++) {
3544 DWConv2DMicrokernelTester()
3545 .input_width(input_width)
3546 .input_height(4)
3547 .kernel_height(3)
3548 .kernel_width(3)
3549 .subsampling(2)
3550 .padding_left(1)
3551 .padding_right(1)
3552 .padding_top(1)
3553 .padding_bottom(1)
3554 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4);
3555 }
3556 }
3557
3558 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4, output_width_div_4) {
3559 TEST_REQUIRES_ARM_NEON_FMA;
3560 for (size_t input_width = 16; input_width < 64; input_width += 8) {
3561 DWConv2DMicrokernelTester()
3562 .input_width(input_width)
3563 .input_height(4)
3564 .kernel_height(3)
3565 .kernel_width(3)
3566 .subsampling(2)
3567 .padding_left(1)
3568 .padding_right(1)
3569 .padding_top(1)
3570 .padding_bottom(1)
3571 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4);
3572 }
3573 }
3574
3575 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4, output_width_lt_4) {
3576 TEST_REQUIRES_ARM_NEON_FMA;
3577 for (size_t input_width = 1; input_width < 7; input_width++) {
3578 DWConv2DMicrokernelTester()
3579 .input_width(8)
3580 .input_height(4)
3581 .kernel_height(3)
3582 .kernel_width(3)
3583 .subsampling(2)
3584 .padding_left(1)
3585 .padding_right(1)
3586 .padding_top(1)
3587 .padding_bottom(1)
3588 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4);
3589 }
3590 }
3591
3592 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4, output_width_gt_4) {
3593 TEST_REQUIRES_ARM_NEON_FMA;
3594 for (size_t input_width = 9; input_width < 17; input_width++) {
3595 DWConv2DMicrokernelTester()
3596 .input_width(input_width)
3597 .input_height(4)
3598 .kernel_height(3)
3599 .kernel_width(3)
3600 .subsampling(2)
3601 .padding_left(1)
3602 .padding_right(1)
3603 .padding_top(1)
3604 .padding_bottom(1)
3605 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4);
3606 }
3607 }
3608
3609 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4, output_height_eq_2) {
3610 TEST_REQUIRES_ARM_NEON_FMA;
3611 for (size_t input_height = 3; input_height < 5; input_height++) {
3612 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3613 DWConv2DMicrokernelTester()
3614 .input_width(input_width)
3615 .input_height(input_height)
3616 .kernel_height(3)
3617 .kernel_width(3)
3618 .subsampling(2)
3619 .padding_left(1)
3620 .padding_right(1)
3621 .padding_top(1)
3622 .padding_bottom(1)
3623 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4);
3624 }
3625 }
3626 }
3627
3628 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4, output_height_div_2) {
3629 TEST_REQUIRES_ARM_NEON_FMA;
3630 for (size_t input_height = 8; input_height < 32; input_height += 4) {
3631 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3632 DWConv2DMicrokernelTester()
3633 .input_width(input_width)
3634 .input_height(input_height)
3635 .kernel_height(3)
3636 .kernel_width(3)
3637 .subsampling(2)
3638 .padding_left(1)
3639 .padding_right(1)
3640 .padding_top(1)
3641 .padding_bottom(1)
3642 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4);
3643 }
3644 }
3645 }
3646
3647 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4, output_height_lt_2) {
3648 TEST_REQUIRES_ARM_NEON_FMA;
3649 for (size_t input_height = 1; input_height < 3; input_height++) {
3650 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3651 DWConv2DMicrokernelTester()
3652 .input_width(input_width)
3653 .input_height(input_height)
3654 .kernel_height(3)
3655 .kernel_width(3)
3656 .subsampling(2)
3657 .padding_left(1)
3658 .padding_right(1)
3659 .padding_top(1)
3660 .padding_bottom(1)
3661 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4);
3662 }
3663 }
3664 }
3665
3666 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4, output_height_gt_2) {
3667 TEST_REQUIRES_ARM_NEON_FMA;
3668 for (size_t input_height = 5; input_height < 9; input_height++) {
3669 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3670 DWConv2DMicrokernelTester()
3671 .input_width(input_width)
3672 .input_height(input_height)
3673 .kernel_height(3)
3674 .kernel_width(3)
3675 .subsampling(2)
3676 .padding_left(1)
3677 .padding_right(1)
3678 .padding_top(1)
3679 .padding_bottom(1)
3680 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4);
3681 }
3682 }
3683 }
3684
3685 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4, padding_top_eq_1) {
3686 TEST_REQUIRES_ARM_NEON_FMA;
3687 for (size_t input_height = 2; input_height < 14; input_height++) {
3688 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3689 DWConv2DMicrokernelTester()
3690 .input_width(input_width)
3691 .input_height(input_height)
3692 .kernel_height(3)
3693 .kernel_width(3)
3694 .subsampling(2)
3695 .padding_left(1)
3696 .padding_right(1)
3697 .padding_top(0)
3698 .padding_bottom(1)
3699 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4);
3700 }
3701 }
3702 }
3703#endif // XNN_ARCH_ARM64
3704
3705
3706#if XNN_ARCH_ARM64
3707 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_3X4, output_width_eq_4) {
3708 TEST_REQUIRES_ARM_NEON_FMA;
3709 for (size_t input_width = 7; input_width < 9; input_width++) {
3710 DWConv2DMicrokernelTester()
3711 .input_width(input_width)
3712 .input_height(6)
3713 .kernel_height(3)
3714 .kernel_width(3)
3715 .subsampling(2)
3716 .padding_left(1)
3717 .padding_right(1)
3718 .padding_top(1)
3719 .padding_bottom(1)
3720 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4);
3721 }
3722 }
3723
3724 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_3X4, output_width_div_4) {
3725 TEST_REQUIRES_ARM_NEON_FMA;
3726 for (size_t input_width = 16; input_width < 64; input_width += 8) {
3727 DWConv2DMicrokernelTester()
3728 .input_width(input_width)
3729 .input_height(6)
3730 .kernel_height(3)
3731 .kernel_width(3)
3732 .subsampling(2)
3733 .padding_left(1)
3734 .padding_right(1)
3735 .padding_top(1)
3736 .padding_bottom(1)
3737 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4);
3738 }
3739 }
3740
3741 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_3X4, output_width_lt_4) {
3742 TEST_REQUIRES_ARM_NEON_FMA;
3743 for (size_t input_width = 1; input_width < 7; input_width++) {
3744 DWConv2DMicrokernelTester()
3745 .input_width(8)
3746 .input_height(6)
3747 .kernel_height(3)
3748 .kernel_width(3)
3749 .subsampling(2)
3750 .padding_left(1)
3751 .padding_right(1)
3752 .padding_top(1)
3753 .padding_bottom(1)
3754 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4);
3755 }
3756 }
3757
3758 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_3X4, output_width_gt_4) {
3759 TEST_REQUIRES_ARM_NEON_FMA;
3760 for (size_t input_width = 9; input_width < 17; input_width++) {
3761 DWConv2DMicrokernelTester()
3762 .input_width(input_width)
3763 .input_height(6)
3764 .kernel_height(3)
3765 .kernel_width(3)
3766 .subsampling(2)
3767 .padding_left(1)
3768 .padding_right(1)
3769 .padding_top(1)
3770 .padding_bottom(1)
3771 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4);
3772 }
3773 }
3774
3775 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_3X4, output_height_eq_3) {
3776 TEST_REQUIRES_ARM_NEON_FMA;
3777 for (size_t input_height = 5; input_height < 7; input_height++) {
3778 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3779 DWConv2DMicrokernelTester()
3780 .input_width(input_width)
3781 .input_height(input_height)
3782 .kernel_height(3)
3783 .kernel_width(3)
3784 .subsampling(2)
3785 .padding_left(1)
3786 .padding_right(1)
3787 .padding_top(1)
3788 .padding_bottom(1)
3789 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4);
3790 }
3791 }
3792 }
3793
3794 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_3X4, output_height_div_3) {
3795 TEST_REQUIRES_ARM_NEON_FMA;
3796 for (size_t input_height = 12; input_height < 48; input_height += 6) {
3797 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3798 DWConv2DMicrokernelTester()
3799 .input_width(input_width)
3800 .input_height(input_height)
3801 .kernel_height(3)
3802 .kernel_width(3)
3803 .subsampling(2)
3804 .padding_left(1)
3805 .padding_right(1)
3806 .padding_top(1)
3807 .padding_bottom(1)
3808 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4);
3809 }
3810 }
3811 }
3812
3813 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_3X4, output_height_lt_3) {
3814 TEST_REQUIRES_ARM_NEON_FMA;
3815 for (size_t input_height = 1; input_height < 5; input_height++) {
3816 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3817 DWConv2DMicrokernelTester()
3818 .input_width(input_width)
3819 .input_height(input_height)
3820 .kernel_height(3)
3821 .kernel_width(3)
3822 .subsampling(2)
3823 .padding_left(1)
3824 .padding_right(1)
3825 .padding_top(1)
3826 .padding_bottom(1)
3827 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4);
3828 }
3829 }
3830 }
3831
3832 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_3X4, output_height_gt_3) {
3833 TEST_REQUIRES_ARM_NEON_FMA;
3834 for (size_t input_height = 7; input_height < 13; input_height++) {
3835 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3836 DWConv2DMicrokernelTester()
3837 .input_width(input_width)
3838 .input_height(input_height)
3839 .kernel_height(3)
3840 .kernel_width(3)
3841 .subsampling(2)
3842 .padding_left(1)
3843 .padding_right(1)
3844 .padding_top(1)
3845 .padding_bottom(1)
3846 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4);
3847 }
3848 }
3849 }
3850
3851 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_3X4, padding_top_eq_1) {
3852 TEST_REQUIRES_ARM_NEON_FMA;
3853 for (size_t input_height = 2; input_height < 20; input_height++) {
3854 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3855 DWConv2DMicrokernelTester()
3856 .input_width(input_width)
3857 .input_height(input_height)
3858 .kernel_height(3)
3859 .kernel_width(3)
3860 .subsampling(2)
3861 .padding_left(1)
3862 .padding_right(1)
3863 .padding_top(0)
3864 .padding_bottom(1)
3865 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4);
3866 }
3867 }
3868 }
3869#endif // XNN_ARCH_ARM64
3870
3871
3872#if XNN_ARCH_ARM64
3873 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_4X4, output_width_eq_4) {
3874 TEST_REQUIRES_ARM_NEON_FMA;
3875 for (size_t input_width = 7; input_width < 9; input_width++) {
3876 DWConv2DMicrokernelTester()
3877 .input_width(input_width)
3878 .input_height(8)
3879 .kernel_height(3)
3880 .kernel_width(3)
3881 .subsampling(2)
3882 .padding_left(1)
3883 .padding_right(1)
3884 .padding_top(1)
3885 .padding_bottom(1)
3886 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4);
3887 }
3888 }
3889
3890 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_4X4, output_width_div_4) {
3891 TEST_REQUIRES_ARM_NEON_FMA;
3892 for (size_t input_width = 16; input_width < 64; input_width += 8) {
3893 DWConv2DMicrokernelTester()
3894 .input_width(input_width)
3895 .input_height(8)
3896 .kernel_height(3)
3897 .kernel_width(3)
3898 .subsampling(2)
3899 .padding_left(1)
3900 .padding_right(1)
3901 .padding_top(1)
3902 .padding_bottom(1)
3903 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4);
3904 }
3905 }
3906
3907 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_4X4, output_width_lt_4) {
3908 TEST_REQUIRES_ARM_NEON_FMA;
3909 for (size_t input_width = 1; input_width < 7; input_width++) {
3910 DWConv2DMicrokernelTester()
3911 .input_width(8)
3912 .input_height(8)
3913 .kernel_height(3)
3914 .kernel_width(3)
3915 .subsampling(2)
3916 .padding_left(1)
3917 .padding_right(1)
3918 .padding_top(1)
3919 .padding_bottom(1)
3920 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4);
3921 }
3922 }
3923
3924 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_4X4, output_width_gt_4) {
3925 TEST_REQUIRES_ARM_NEON_FMA;
3926 for (size_t input_width = 9; input_width < 17; input_width++) {
3927 DWConv2DMicrokernelTester()
3928 .input_width(input_width)
3929 .input_height(8)
3930 .kernel_height(3)
3931 .kernel_width(3)
3932 .subsampling(2)
3933 .padding_left(1)
3934 .padding_right(1)
3935 .padding_top(1)
3936 .padding_bottom(1)
3937 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4);
3938 }
3939 }
3940
3941 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_4X4, output_height_eq_4) {
3942 TEST_REQUIRES_ARM_NEON_FMA;
3943 for (size_t input_height = 7; input_height < 9; input_height++) {
3944 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3945 DWConv2DMicrokernelTester()
3946 .input_width(input_width)
3947 .input_height(input_height)
3948 .kernel_height(3)
3949 .kernel_width(3)
3950 .subsampling(2)
3951 .padding_left(1)
3952 .padding_right(1)
3953 .padding_top(1)
3954 .padding_bottom(1)
3955 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4);
3956 }
3957 }
3958 }
3959
3960 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_4X4, output_height_div_4) {
3961 TEST_REQUIRES_ARM_NEON_FMA;
3962 for (size_t input_height = 16; input_height < 64; input_height += 8) {
3963 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3964 DWConv2DMicrokernelTester()
3965 .input_width(input_width)
3966 .input_height(input_height)
3967 .kernel_height(3)
3968 .kernel_width(3)
3969 .subsampling(2)
3970 .padding_left(1)
3971 .padding_right(1)
3972 .padding_top(1)
3973 .padding_bottom(1)
3974 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4);
3975 }
3976 }
3977 }
3978
3979 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_4X4, output_height_lt_4) {
3980 TEST_REQUIRES_ARM_NEON_FMA;
3981 for (size_t input_height = 1; input_height < 7; input_height++) {
3982 for (size_t input_width = 1; input_width < 41; input_width += 7) {
3983 DWConv2DMicrokernelTester()
3984 .input_width(input_width)
3985 .input_height(input_height)
3986 .kernel_height(3)
3987 .kernel_width(3)
3988 .subsampling(2)
3989 .padding_left(1)
3990 .padding_right(1)
3991 .padding_top(1)
3992 .padding_bottom(1)
3993 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4);
3994 }
3995 }
3996 }
3997
3998 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_4X4, output_height_gt_4) {
3999 TEST_REQUIRES_ARM_NEON_FMA;
4000 for (size_t input_height = 9; input_height < 17; input_height++) {
4001 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4002 DWConv2DMicrokernelTester()
4003 .input_width(input_width)
4004 .input_height(input_height)
4005 .kernel_height(3)
4006 .kernel_width(3)
4007 .subsampling(2)
4008 .padding_left(1)
4009 .padding_right(1)
4010 .padding_top(1)
4011 .padding_bottom(1)
4012 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4);
4013 }
4014 }
4015 }
4016
4017 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_4X4, padding_top_eq_1) {
4018 TEST_REQUIRES_ARM_NEON_FMA;
4019 for (size_t input_height = 2; input_height < 26; input_height++) {
4020 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4021 DWConv2DMicrokernelTester()
4022 .input_width(input_width)
4023 .input_height(input_height)
4024 .kernel_height(3)
4025 .kernel_width(3)
4026 .subsampling(2)
4027 .padding_left(1)
4028 .padding_right(1)
4029 .padding_top(0)
4030 .padding_bottom(1)
4031 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4);
4032 }
4033 }
4034 }
4035#endif // XNN_ARCH_ARM64
4036
4037
4038#if XNN_ARCH_ARM64
4039 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC2, output_width_eq_4) {
4040 TEST_REQUIRES_ARM_NEON_FMA;
4041 for (size_t input_width = 7; input_width < 9; input_width++) {
4042 DWConv2DMicrokernelTester()
4043 .input_width(input_width)
4044 .input_height(2)
4045 .kernel_height(3)
4046 .kernel_width(3)
4047 .subsampling(2)
4048 .padding_left(1)
4049 .padding_right(1)
4050 .padding_top(1)
4051 .padding_bottom(1)
4052 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2);
4053 }
4054 }
4055
4056 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC2, output_width_div_4) {
4057 TEST_REQUIRES_ARM_NEON_FMA;
4058 for (size_t input_width = 16; input_width < 64; input_width += 8) {
4059 DWConv2DMicrokernelTester()
4060 .input_width(input_width)
4061 .input_height(2)
4062 .kernel_height(3)
4063 .kernel_width(3)
4064 .subsampling(2)
4065 .padding_left(1)
4066 .padding_right(1)
4067 .padding_top(1)
4068 .padding_bottom(1)
4069 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2);
4070 }
4071 }
4072
4073 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC2, output_width_lt_4) {
4074 TEST_REQUIRES_ARM_NEON_FMA;
4075 for (size_t input_width = 1; input_width < 7; input_width++) {
4076 DWConv2DMicrokernelTester()
4077 .input_width(8)
4078 .input_height(2)
4079 .kernel_height(3)
4080 .kernel_width(3)
4081 .subsampling(2)
4082 .padding_left(1)
4083 .padding_right(1)
4084 .padding_top(1)
4085 .padding_bottom(1)
4086 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2);
4087 }
4088 }
4089
4090 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC2, output_width_gt_4) {
4091 TEST_REQUIRES_ARM_NEON_FMA;
4092 for (size_t input_width = 9; input_width < 17; input_width++) {
4093 DWConv2DMicrokernelTester()
4094 .input_width(input_width)
4095 .input_height(2)
4096 .kernel_height(3)
4097 .kernel_width(3)
4098 .subsampling(2)
4099 .padding_left(1)
4100 .padding_right(1)
4101 .padding_top(1)
4102 .padding_bottom(1)
4103 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2);
4104 }
4105 }
4106
4107 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC2, output_height_eq_1) {
4108 TEST_REQUIRES_ARM_NEON_FMA;
4109 for (size_t input_height = 1; input_height < 3; input_height++) {
4110 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4111 DWConv2DMicrokernelTester()
4112 .input_width(input_width)
4113 .input_height(input_height)
4114 .kernel_height(3)
4115 .kernel_width(3)
4116 .subsampling(2)
4117 .padding_left(1)
4118 .padding_right(1)
4119 .padding_top(1)
4120 .padding_bottom(1)
4121 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2);
4122 }
4123 }
4124 }
4125
4126 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC2, output_height_gt_1) {
4127 TEST_REQUIRES_ARM_NEON_FMA;
4128 for (size_t input_height = 3; input_height < 5; input_height++) {
4129 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4130 DWConv2DMicrokernelTester()
4131 .input_width(input_width)
4132 .input_height(input_height)
4133 .kernel_height(3)
4134 .kernel_width(3)
4135 .subsampling(2)
4136 .padding_left(1)
4137 .padding_right(1)
4138 .padding_top(1)
4139 .padding_bottom(1)
4140 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2);
4141 }
4142 }
4143 }
4144
4145 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC2, padding_top_eq_1) {
4146 TEST_REQUIRES_ARM_NEON_FMA;
4147 for (size_t input_height = 2; input_height < 8; input_height++) {
4148 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4149 DWConv2DMicrokernelTester()
4150 .input_width(input_width)
4151 .input_height(input_height)
4152 .kernel_height(3)
4153 .kernel_width(3)
4154 .subsampling(2)
4155 .padding_left(1)
4156 .padding_right(1)
4157 .padding_top(0)
4158 .padding_bottom(1)
4159 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2);
4160 }
4161 }
4162 }
4163#endif // XNN_ARCH_ARM64
4164
4165
Marat Dukhan1268a242020-10-24 00:36:32 -07004166#if XNN_ARCH_ARM64
Marat Dukhanbf715f92020-10-23 20:17:00 -07004167 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC3, output_width_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004168 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004169 for (size_t input_width = 7; input_width < 9; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004170 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07004171 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004172 .input_height(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004173 .kernel_height(3)
4174 .kernel_width(3)
4175 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004176 .padding_left(1)
4177 .padding_right(1)
4178 .padding_top(1)
4179 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004180 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004181 }
4182 }
4183
Marat Dukhanbf715f92020-10-23 20:17:00 -07004184 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC3, output_width_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004185 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004186 for (size_t input_width = 16; input_width < 64; input_width += 8) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004187 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07004188 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004189 .input_height(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004190 .kernel_height(3)
4191 .kernel_width(3)
4192 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004193 .padding_left(1)
4194 .padding_right(1)
4195 .padding_top(1)
4196 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004197 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004198 }
4199 }
4200
Marat Dukhanbf715f92020-10-23 20:17:00 -07004201 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC3, output_width_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004202 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004203 for (size_t input_width = 1; input_width < 7; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004204 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004205 .input_width(8)
4206 .input_height(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004207 .kernel_height(3)
4208 .kernel_width(3)
4209 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004210 .padding_left(1)
4211 .padding_right(1)
4212 .padding_top(1)
4213 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004214 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004215 }
4216 }
4217
Marat Dukhanbf715f92020-10-23 20:17:00 -07004218 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC3, output_width_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004219 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004220 for (size_t input_width = 9; input_width < 17; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004221 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004222 .input_width(input_width)
4223 .input_height(2)
4224 .kernel_height(3)
4225 .kernel_width(3)
4226 .subsampling(2)
4227 .padding_left(1)
4228 .padding_right(1)
4229 .padding_top(1)
4230 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004231 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004232 }
4233 }
4234
Marat Dukhanbf715f92020-10-23 20:17:00 -07004235 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC3, output_height_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004236 TEST_REQUIRES_ARM_NEON_FMA;
4237 for (size_t input_height = 1; input_height < 3; input_height++) {
4238 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004239 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07004240 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004241 .input_height(input_height)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004242 .kernel_height(3)
4243 .kernel_width(3)
4244 .subsampling(2)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004245 .padding_left(1)
4246 .padding_right(1)
4247 .padding_top(1)
4248 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004249 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004250 }
4251 }
4252 }
4253
Marat Dukhanbf715f92020-10-23 20:17:00 -07004254 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC3, output_height_gt_1) {
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004255 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004256 for (size_t input_height = 3; input_height < 5; input_height++) {
4257 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004258 DWConv2DMicrokernelTester()
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004259 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004260 .input_height(input_height)
4261 .kernel_height(3)
4262 .kernel_width(3)
4263 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004264 .padding_left(1)
4265 .padding_right(1)
4266 .padding_top(1)
4267 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004268 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004269 }
4270 }
4271 }
4272
Marat Dukhanbf715f92020-10-23 20:17:00 -07004273 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC3, padding_top_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004274 TEST_REQUIRES_ARM_NEON_FMA;
4275 for (size_t input_height = 2; input_height < 8; input_height++) {
4276 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004277 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004278 .input_width(input_width)
4279 .input_height(input_height)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004280 .kernel_height(3)
4281 .kernel_width(3)
4282 .subsampling(2)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004283 .padding_left(1)
4284 .padding_right(1)
4285 .padding_top(0)
4286 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004287 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004288 }
4289 }
4290 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07004291#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07004292
4293
Marat Dukhan1dadbf72019-10-01 10:46:20 -07004294#if XNN_ARCH_ARM64
Marat Dukhan82f0c322020-10-25 19:17:35 -07004295 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC4, output_width_eq_4) {
4296 TEST_REQUIRES_ARM_NEON_FMA;
4297 for (size_t input_width = 7; input_width < 9; input_width++) {
4298 DWConv2DMicrokernelTester()
4299 .input_width(input_width)
4300 .input_height(2)
4301 .kernel_height(3)
4302 .kernel_width(3)
4303 .subsampling(2)
4304 .padding_left(1)
4305 .padding_right(1)
4306 .padding_top(1)
4307 .padding_bottom(1)
4308 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4);
4309 }
4310 }
4311
4312 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC4, output_width_div_4) {
4313 TEST_REQUIRES_ARM_NEON_FMA;
4314 for (size_t input_width = 16; input_width < 64; input_width += 8) {
4315 DWConv2DMicrokernelTester()
4316 .input_width(input_width)
4317 .input_height(2)
4318 .kernel_height(3)
4319 .kernel_width(3)
4320 .subsampling(2)
4321 .padding_left(1)
4322 .padding_right(1)
4323 .padding_top(1)
4324 .padding_bottom(1)
4325 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4);
4326 }
4327 }
4328
4329 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC4, output_width_lt_4) {
4330 TEST_REQUIRES_ARM_NEON_FMA;
4331 for (size_t input_width = 1; input_width < 7; input_width++) {
4332 DWConv2DMicrokernelTester()
4333 .input_width(8)
4334 .input_height(2)
4335 .kernel_height(3)
4336 .kernel_width(3)
4337 .subsampling(2)
4338 .padding_left(1)
4339 .padding_right(1)
4340 .padding_top(1)
4341 .padding_bottom(1)
4342 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4);
4343 }
4344 }
4345
4346 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC4, output_width_gt_4) {
4347 TEST_REQUIRES_ARM_NEON_FMA;
4348 for (size_t input_width = 9; input_width < 17; input_width++) {
4349 DWConv2DMicrokernelTester()
4350 .input_width(input_width)
4351 .input_height(2)
4352 .kernel_height(3)
4353 .kernel_width(3)
4354 .subsampling(2)
4355 .padding_left(1)
4356 .padding_right(1)
4357 .padding_top(1)
4358 .padding_bottom(1)
4359 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4);
4360 }
4361 }
4362
4363 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC4, output_height_eq_1) {
4364 TEST_REQUIRES_ARM_NEON_FMA;
4365 for (size_t input_height = 1; input_height < 3; input_height++) {
4366 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4367 DWConv2DMicrokernelTester()
4368 .input_width(input_width)
4369 .input_height(input_height)
4370 .kernel_height(3)
4371 .kernel_width(3)
4372 .subsampling(2)
4373 .padding_left(1)
4374 .padding_right(1)
4375 .padding_top(1)
4376 .padding_bottom(1)
4377 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4);
4378 }
4379 }
4380 }
4381
4382 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC4, output_height_gt_1) {
4383 TEST_REQUIRES_ARM_NEON_FMA;
4384 for (size_t input_height = 3; input_height < 5; input_height++) {
4385 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4386 DWConv2DMicrokernelTester()
4387 .input_width(input_width)
4388 .input_height(input_height)
4389 .kernel_height(3)
4390 .kernel_width(3)
4391 .subsampling(2)
4392 .padding_left(1)
4393 .padding_right(1)
4394 .padding_top(1)
4395 .padding_bottom(1)
4396 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4);
4397 }
4398 }
4399 }
4400
4401 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_1X4_ACC4, padding_top_eq_1) {
4402 TEST_REQUIRES_ARM_NEON_FMA;
4403 for (size_t input_height = 2; input_height < 8; input_height++) {
4404 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4405 DWConv2DMicrokernelTester()
4406 .input_width(input_width)
4407 .input_height(input_height)
4408 .kernel_height(3)
4409 .kernel_width(3)
4410 .subsampling(2)
4411 .padding_left(1)
4412 .padding_right(1)
4413 .padding_top(0)
4414 .padding_bottom(1)
4415 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4);
4416 }
4417 }
4418 }
4419#endif // XNN_ARCH_ARM64
4420
4421
4422#if XNN_ARCH_ARM64
4423 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4_ACC2, output_width_eq_4) {
4424 TEST_REQUIRES_ARM_NEON_FMA;
4425 for (size_t input_width = 7; input_width < 9; input_width++) {
4426 DWConv2DMicrokernelTester()
4427 .input_width(input_width)
4428 .input_height(4)
4429 .kernel_height(3)
4430 .kernel_width(3)
4431 .subsampling(2)
4432 .padding_left(1)
4433 .padding_right(1)
4434 .padding_top(1)
4435 .padding_bottom(1)
4436 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2);
4437 }
4438 }
4439
4440 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4_ACC2, output_width_div_4) {
4441 TEST_REQUIRES_ARM_NEON_FMA;
4442 for (size_t input_width = 16; input_width < 64; input_width += 8) {
4443 DWConv2DMicrokernelTester()
4444 .input_width(input_width)
4445 .input_height(4)
4446 .kernel_height(3)
4447 .kernel_width(3)
4448 .subsampling(2)
4449 .padding_left(1)
4450 .padding_right(1)
4451 .padding_top(1)
4452 .padding_bottom(1)
4453 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2);
4454 }
4455 }
4456
4457 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4_ACC2, output_width_lt_4) {
4458 TEST_REQUIRES_ARM_NEON_FMA;
4459 for (size_t input_width = 1; input_width < 7; input_width++) {
4460 DWConv2DMicrokernelTester()
4461 .input_width(8)
4462 .input_height(4)
4463 .kernel_height(3)
4464 .kernel_width(3)
4465 .subsampling(2)
4466 .padding_left(1)
4467 .padding_right(1)
4468 .padding_top(1)
4469 .padding_bottom(1)
4470 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2);
4471 }
4472 }
4473
4474 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4_ACC2, output_width_gt_4) {
4475 TEST_REQUIRES_ARM_NEON_FMA;
4476 for (size_t input_width = 9; input_width < 17; input_width++) {
4477 DWConv2DMicrokernelTester()
4478 .input_width(input_width)
4479 .input_height(4)
4480 .kernel_height(3)
4481 .kernel_width(3)
4482 .subsampling(2)
4483 .padding_left(1)
4484 .padding_right(1)
4485 .padding_top(1)
4486 .padding_bottom(1)
4487 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2);
4488 }
4489 }
4490
4491 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4_ACC2, output_height_eq_2) {
4492 TEST_REQUIRES_ARM_NEON_FMA;
4493 for (size_t input_height = 3; input_height < 5; input_height++) {
4494 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4495 DWConv2DMicrokernelTester()
4496 .input_width(input_width)
4497 .input_height(input_height)
4498 .kernel_height(3)
4499 .kernel_width(3)
4500 .subsampling(2)
4501 .padding_left(1)
4502 .padding_right(1)
4503 .padding_top(1)
4504 .padding_bottom(1)
4505 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2);
4506 }
4507 }
4508 }
4509
4510 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4_ACC2, output_height_div_2) {
4511 TEST_REQUIRES_ARM_NEON_FMA;
4512 for (size_t input_height = 8; input_height < 32; input_height += 4) {
4513 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4514 DWConv2DMicrokernelTester()
4515 .input_width(input_width)
4516 .input_height(input_height)
4517 .kernel_height(3)
4518 .kernel_width(3)
4519 .subsampling(2)
4520 .padding_left(1)
4521 .padding_right(1)
4522 .padding_top(1)
4523 .padding_bottom(1)
4524 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2);
4525 }
4526 }
4527 }
4528
4529 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4_ACC2, output_height_lt_2) {
4530 TEST_REQUIRES_ARM_NEON_FMA;
4531 for (size_t input_height = 1; input_height < 3; input_height++) {
4532 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4533 DWConv2DMicrokernelTester()
4534 .input_width(input_width)
4535 .input_height(input_height)
4536 .kernel_height(3)
4537 .kernel_width(3)
4538 .subsampling(2)
4539 .padding_left(1)
4540 .padding_right(1)
4541 .padding_top(1)
4542 .padding_bottom(1)
4543 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2);
4544 }
4545 }
4546 }
4547
4548 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4_ACC2, output_height_gt_2) {
4549 TEST_REQUIRES_ARM_NEON_FMA;
4550 for (size_t input_height = 5; input_height < 9; input_height++) {
4551 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4552 DWConv2DMicrokernelTester()
4553 .input_width(input_width)
4554 .input_height(input_height)
4555 .kernel_height(3)
4556 .kernel_width(3)
4557 .subsampling(2)
4558 .padding_left(1)
4559 .padding_right(1)
4560 .padding_top(1)
4561 .padding_bottom(1)
4562 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2);
4563 }
4564 }
4565 }
4566
4567 TEST(F32_DWCONV2D_CHW_3X3S2P1__NEONFMA_2X4_ACC2, padding_top_eq_1) {
4568 TEST_REQUIRES_ARM_NEON_FMA;
4569 for (size_t input_height = 2; input_height < 14; input_height++) {
4570 for (size_t input_width = 1; input_width < 41; input_width += 7) {
4571 DWConv2DMicrokernelTester()
4572 .input_width(input_width)
4573 .input_height(input_height)
4574 .kernel_height(3)
4575 .kernel_width(3)
4576 .subsampling(2)
4577 .padding_left(1)
4578 .padding_right(1)
4579 .padding_top(0)
4580 .padding_bottom(1)
4581 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2);
4582 }
4583 }
4584 }
4585#endif // XNN_ARCH_ARM64
4586
4587
4588#if XNN_ARCH_ARM64
Marat Dukhanbf715f92020-10-23 20:17:00 -07004589 TEST(F32_DWCONV2D_CHW_5X5P2__NEONFMA_3X4, output_width_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004590 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhanbf715f92020-10-23 20:17:00 -07004591 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07004592 .input_width(4)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004593 .input_height(3)
4594 .kernel_height(5)
4595 .kernel_width(5)
4596 .subsampling(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004597 .padding_left(2)
4598 .padding_right(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004599 .padding_top(2)
4600 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004601 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004602 }
4603
Marat Dukhanbf715f92020-10-23 20:17:00 -07004604 TEST(F32_DWCONV2D_CHW_5X5P2__NEONFMA_3X4, output_width_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004605 TEST_REQUIRES_ARM_NEON_FMA;
4606 for (size_t input_width = 8; input_width < 32; input_width += 4) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004607 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07004608 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004609 .input_height(3)
4610 .kernel_height(5)
4611 .kernel_width(5)
4612 .subsampling(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004613 .padding_left(2)
4614 .padding_right(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004615 .padding_top(2)
4616 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004617 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004618 }
4619 }
4620
Marat Dukhanbf715f92020-10-23 20:17:00 -07004621 TEST(F32_DWCONV2D_CHW_5X5P2__NEONFMA_3X4, output_width_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004622 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004623 for (size_t input_width = 1; input_width < 4; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004624 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004625 .input_width(4)
4626 .input_height(3)
4627 .kernel_height(5)
4628 .kernel_width(5)
4629 .subsampling(1)
4630 .padding_left(2)
4631 .padding_right(2)
4632 .padding_top(2)
4633 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004634 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004635 }
4636 }
4637
Marat Dukhanbf715f92020-10-23 20:17:00 -07004638 TEST(F32_DWCONV2D_CHW_5X5P2__NEONFMA_3X4, output_width_gt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004639 TEST_REQUIRES_ARM_NEON_FMA;
4640 for (size_t input_width = 5; input_width < 9; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004641 DWConv2DMicrokernelTester()
Erich Elsen4ad51152019-11-19 13:11:53 -08004642 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004643 .input_height(3)
4644 .kernel_height(5)
4645 .kernel_width(5)
4646 .subsampling(1)
Erich Elsen4ad51152019-11-19 13:11:53 -08004647 .padding_left(2)
4648 .padding_right(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004649 .padding_top(2)
4650 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004651 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4);
Erich Elsen4ad51152019-11-19 13:11:53 -08004652 }
4653 }
4654
Marat Dukhanbf715f92020-10-23 20:17:00 -07004655 TEST(F32_DWCONV2D_CHW_5X5P2__NEONFMA_3X4, output_height_div_3) {
Erich Elsen4ad51152019-11-19 13:11:53 -08004656 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004657 for (size_t input_height = 6; input_height < 24; input_height += 3) {
4658 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004659 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07004660 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004661 .input_height(input_height)
4662 .kernel_height(5)
4663 .kernel_width(5)
4664 .subsampling(1)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004665 .padding_left(2)
4666 .padding_right(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004667 .padding_top(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004668 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004669 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004670 }
4671 }
4672 }
4673
Marat Dukhanbf715f92020-10-23 20:17:00 -07004674 TEST(F32_DWCONV2D_CHW_5X5P2__NEONFMA_3X4, output_height_lt_3) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004675 TEST_REQUIRES_ARM_NEON_FMA;
4676 for (size_t input_height = 1; input_height < 3; input_height++) {
4677 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004678 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004679 .input_width(input_width)
4680 .input_height(input_height)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004681 .kernel_height(5)
4682 .kernel_width(5)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004683 .subsampling(1)
4684 .padding_left(2)
4685 .padding_right(2)
4686 .padding_top(2)
4687 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004688 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004689 }
4690 }
4691 }
4692
Marat Dukhanbf715f92020-10-23 20:17:00 -07004693 TEST(F32_DWCONV2D_CHW_5X5P2__NEONFMA_3X4, output_height_gt_3) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004694 TEST_REQUIRES_ARM_NEON_FMA;
4695 for (size_t input_height = 4; input_height < 7; input_height++) {
4696 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004697 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004698 .input_width(input_width)
4699 .input_height(input_height)
4700 .kernel_height(5)
4701 .kernel_width(5)
4702 .subsampling(1)
4703 .padding_left(2)
4704 .padding_right(2)
4705 .padding_top(2)
4706 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004707 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004708 }
4709 }
4710 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07004711#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07004712
4713
Marat Dukhan1dadbf72019-10-01 10:46:20 -07004714#if XNN_ARCH_ARM64
Marat Dukhanbf715f92020-10-23 20:17:00 -07004715 TEST(F32_DWCONV2D_CHW_5X5S2P2__NEONFMA_1X4_ACC2, output_width_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004716 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004717 for (size_t input_width = 7; input_width < 9; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004718 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07004719 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004720 .input_height(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004721 .kernel_height(5)
4722 .kernel_width(5)
4723 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004724 .padding_left(2)
4725 .padding_right(2)
4726 .padding_top(2)
4727 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004728 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004729 }
4730 }
4731
Marat Dukhanbf715f92020-10-23 20:17:00 -07004732 TEST(F32_DWCONV2D_CHW_5X5S2P2__NEONFMA_1X4_ACC2, output_width_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004733 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004734 for (size_t input_width = 16; input_width < 64; input_width += 8) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004735 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07004736 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004737 .input_height(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004738 .kernel_height(5)
4739 .kernel_width(5)
4740 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004741 .padding_left(2)
4742 .padding_right(2)
4743 .padding_top(2)
4744 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004745 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004746 }
4747 }
4748
Marat Dukhanbf715f92020-10-23 20:17:00 -07004749 TEST(F32_DWCONV2D_CHW_5X5S2P2__NEONFMA_1X4_ACC2, output_width_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004750 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004751 for (size_t input_width = 1; input_width < 7; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004752 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004753 .input_width(8)
4754 .input_height(2)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004755 .kernel_height(5)
4756 .kernel_width(5)
4757 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004758 .padding_left(2)
4759 .padding_right(2)
4760 .padding_top(2)
4761 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004762 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004763 }
4764 }
4765
Marat Dukhanbf715f92020-10-23 20:17:00 -07004766 TEST(F32_DWCONV2D_CHW_5X5S2P2__NEONFMA_1X4_ACC2, output_width_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004767 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004768 for (size_t input_width = 9; input_width < 17; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004769 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004770 .input_width(input_width)
4771 .input_height(2)
4772 .kernel_height(5)
4773 .kernel_width(5)
4774 .subsampling(2)
4775 .padding_left(2)
4776 .padding_right(2)
4777 .padding_top(2)
4778 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004779 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004780 }
4781 }
4782
Marat Dukhanbf715f92020-10-23 20:17:00 -07004783 TEST(F32_DWCONV2D_CHW_5X5S2P2__NEONFMA_1X4_ACC2, output_height_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004784 TEST_REQUIRES_ARM_NEON_FMA;
4785 for (size_t input_height = 1; input_height < 3; input_height++) {
4786 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004787 DWConv2DMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07004788 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004789 .input_height(input_height)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004790 .kernel_height(5)
4791 .kernel_width(5)
4792 .subsampling(2)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004793 .padding_left(2)
4794 .padding_right(2)
4795 .padding_top(2)
4796 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004797 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004798 }
4799 }
4800 }
4801
Marat Dukhanbf715f92020-10-23 20:17:00 -07004802 TEST(F32_DWCONV2D_CHW_5X5S2P2__NEONFMA_1X4_ACC2, output_height_gt_1) {
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004803 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004804 for (size_t input_height = 3; input_height < 5; input_height++) {
4805 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004806 DWConv2DMicrokernelTester()
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004807 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004808 .input_height(input_height)
4809 .kernel_height(5)
4810 .kernel_width(5)
4811 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004812 .padding_left(2)
4813 .padding_right(2)
4814 .padding_top(2)
4815 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004816 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004817 }
4818 }
4819 }
4820
Marat Dukhanbf715f92020-10-23 20:17:00 -07004821 TEST(F32_DWCONV2D_CHW_5X5S2P2__NEONFMA_1X4_ACC2, padding_top_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004822 TEST_REQUIRES_ARM_NEON_FMA;
4823 for (size_t input_height = 2; input_height < 8; input_height++) {
4824 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07004825 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004826 .input_width(input_width)
4827 .input_height(input_height)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004828 .kernel_height(5)
4829 .kernel_width(5)
4830 .subsampling(2)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004831 .padding_left(2)
4832 .padding_right(2)
4833 .padding_top(1)
4834 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07004835 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004836 }
4837 }
4838 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07004839#endif // XNN_ARCH_ARM64
Erich Elsen0cc2c532019-10-15 04:44:18 -07004840
Erich Elsen4e5db3d2020-05-07 08:57:47 -07004841
Marat Dukhandc6c77f2020-10-23 19:09:10 -07004842#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan470078a2020-10-23 22:36:52 -07004843 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4, output_width_eq_4) {
4844 TEST_REQUIRES_X86_SSE;
4845 DWConv2DMicrokernelTester()
4846 .input_width(4)
4847 .input_height(1)
4848 .kernel_height(3)
4849 .kernel_width(3)
4850 .subsampling(1)
4851 .padding_left(1)
4852 .padding_right(1)
4853 .padding_top(1)
4854 .padding_bottom(1)
4855 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4);
4856 }
4857
4858 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4, output_width_div_4) {
4859 TEST_REQUIRES_X86_SSE;
4860 for (size_t input_width = 8; input_width < 32; input_width += 4) {
4861 DWConv2DMicrokernelTester()
4862 .input_width(input_width)
4863 .input_height(1)
4864 .kernel_height(3)
4865 .kernel_width(3)
4866 .subsampling(1)
4867 .padding_left(1)
4868 .padding_right(1)
4869 .padding_top(1)
4870 .padding_bottom(1)
4871 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4);
4872 }
4873 }
4874
4875 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4, output_width_lt_4) {
4876 TEST_REQUIRES_X86_SSE;
4877 for (size_t input_width = 1; input_width < 4; input_width++) {
4878 DWConv2DMicrokernelTester()
4879 .input_width(4)
4880 .input_height(1)
4881 .kernel_height(3)
4882 .kernel_width(3)
4883 .subsampling(1)
4884 .padding_left(1)
4885 .padding_right(1)
4886 .padding_top(1)
4887 .padding_bottom(1)
4888 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4);
4889 }
4890 }
4891
4892 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4, output_width_gt_4) {
4893 TEST_REQUIRES_X86_SSE;
4894 for (size_t input_width = 5; input_width < 9; input_width++) {
4895 DWConv2DMicrokernelTester()
4896 .input_width(input_width)
4897 .input_height(1)
4898 .kernel_height(3)
4899 .kernel_width(3)
4900 .subsampling(1)
4901 .padding_left(1)
4902 .padding_right(1)
4903 .padding_top(1)
4904 .padding_bottom(1)
4905 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4);
4906 }
4907 }
4908
4909 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4, output_height_gt_1) {
4910 TEST_REQUIRES_X86_SSE;
4911 for (size_t input_height = 2; input_height < 3; input_height++) {
4912 for (size_t input_width = 1; input_width < 21; input_width += 3) {
4913 DWConv2DMicrokernelTester()
4914 .input_width(input_width)
4915 .input_height(input_height)
4916 .kernel_height(3)
4917 .kernel_width(3)
4918 .subsampling(1)
4919 .padding_left(1)
4920 .padding_right(1)
4921 .padding_top(1)
4922 .padding_bottom(1)
4923 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4);
4924 }
4925 }
4926 }
4927#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4928
4929
4930#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4931 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4, output_width_eq_4) {
4932 TEST_REQUIRES_X86_SSE;
4933 DWConv2DMicrokernelTester()
4934 .input_width(4)
4935 .input_height(2)
4936 .kernel_height(3)
4937 .kernel_width(3)
4938 .subsampling(1)
4939 .padding_left(1)
4940 .padding_right(1)
4941 .padding_top(1)
4942 .padding_bottom(1)
4943 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4);
4944 }
4945
4946 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4, output_width_div_4) {
4947 TEST_REQUIRES_X86_SSE;
4948 for (size_t input_width = 8; input_width < 32; input_width += 4) {
4949 DWConv2DMicrokernelTester()
4950 .input_width(input_width)
4951 .input_height(2)
4952 .kernel_height(3)
4953 .kernel_width(3)
4954 .subsampling(1)
4955 .padding_left(1)
4956 .padding_right(1)
4957 .padding_top(1)
4958 .padding_bottom(1)
4959 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4);
4960 }
4961 }
4962
4963 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4, output_width_lt_4) {
4964 TEST_REQUIRES_X86_SSE;
4965 for (size_t input_width = 1; input_width < 4; input_width++) {
4966 DWConv2DMicrokernelTester()
4967 .input_width(4)
4968 .input_height(2)
4969 .kernel_height(3)
4970 .kernel_width(3)
4971 .subsampling(1)
4972 .padding_left(1)
4973 .padding_right(1)
4974 .padding_top(1)
4975 .padding_bottom(1)
4976 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4);
4977 }
4978 }
4979
4980 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4, output_width_gt_4) {
4981 TEST_REQUIRES_X86_SSE;
4982 for (size_t input_width = 5; input_width < 9; input_width++) {
4983 DWConv2DMicrokernelTester()
4984 .input_width(input_width)
4985 .input_height(2)
4986 .kernel_height(3)
4987 .kernel_width(3)
4988 .subsampling(1)
4989 .padding_left(1)
4990 .padding_right(1)
4991 .padding_top(1)
4992 .padding_bottom(1)
4993 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4);
4994 }
4995 }
4996
4997 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4, output_height_div_2) {
4998 TEST_REQUIRES_X86_SSE;
4999 for (size_t input_height = 4; input_height < 16; input_height += 2) {
5000 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5001 DWConv2DMicrokernelTester()
5002 .input_width(input_width)
5003 .input_height(input_height)
5004 .kernel_height(3)
5005 .kernel_width(3)
5006 .subsampling(1)
5007 .padding_left(1)
5008 .padding_right(1)
5009 .padding_top(1)
5010 .padding_bottom(1)
5011 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4);
5012 }
5013 }
5014 }
5015
5016 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4, output_height_lt_2) {
5017 TEST_REQUIRES_X86_SSE;
5018 for (size_t input_height = 1; input_height < 2; input_height++) {
5019 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5020 DWConv2DMicrokernelTester()
5021 .input_width(input_width)
5022 .input_height(input_height)
5023 .kernel_height(3)
5024 .kernel_width(3)
5025 .subsampling(1)
5026 .padding_left(1)
5027 .padding_right(1)
5028 .padding_top(1)
5029 .padding_bottom(1)
5030 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4);
5031 }
5032 }
5033 }
5034
5035 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4, output_height_gt_2) {
5036 TEST_REQUIRES_X86_SSE;
5037 for (size_t input_height = 3; input_height < 5; input_height++) {
5038 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5039 DWConv2DMicrokernelTester()
5040 .input_width(input_width)
5041 .input_height(input_height)
5042 .kernel_height(3)
5043 .kernel_width(3)
5044 .subsampling(1)
5045 .padding_left(1)
5046 .padding_right(1)
5047 .padding_top(1)
5048 .padding_bottom(1)
5049 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4);
5050 }
5051 }
5052 }
5053#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5054
5055
5056#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5057 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_3X4, output_width_eq_4) {
5058 TEST_REQUIRES_X86_SSE;
5059 DWConv2DMicrokernelTester()
5060 .input_width(4)
5061 .input_height(3)
5062 .kernel_height(3)
5063 .kernel_width(3)
5064 .subsampling(1)
5065 .padding_left(1)
5066 .padding_right(1)
5067 .padding_top(1)
5068 .padding_bottom(1)
5069 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4);
5070 }
5071
5072 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_3X4, output_width_div_4) {
5073 TEST_REQUIRES_X86_SSE;
5074 for (size_t input_width = 8; input_width < 32; input_width += 4) {
5075 DWConv2DMicrokernelTester()
5076 .input_width(input_width)
5077 .input_height(3)
5078 .kernel_height(3)
5079 .kernel_width(3)
5080 .subsampling(1)
5081 .padding_left(1)
5082 .padding_right(1)
5083 .padding_top(1)
5084 .padding_bottom(1)
5085 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4);
5086 }
5087 }
5088
5089 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_3X4, output_width_lt_4) {
5090 TEST_REQUIRES_X86_SSE;
5091 for (size_t input_width = 1; input_width < 4; input_width++) {
5092 DWConv2DMicrokernelTester()
5093 .input_width(4)
5094 .input_height(3)
5095 .kernel_height(3)
5096 .kernel_width(3)
5097 .subsampling(1)
5098 .padding_left(1)
5099 .padding_right(1)
5100 .padding_top(1)
5101 .padding_bottom(1)
5102 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4);
5103 }
5104 }
5105
5106 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_3X4, output_width_gt_4) {
5107 TEST_REQUIRES_X86_SSE;
5108 for (size_t input_width = 5; input_width < 9; input_width++) {
5109 DWConv2DMicrokernelTester()
5110 .input_width(input_width)
5111 .input_height(3)
5112 .kernel_height(3)
5113 .kernel_width(3)
5114 .subsampling(1)
5115 .padding_left(1)
5116 .padding_right(1)
5117 .padding_top(1)
5118 .padding_bottom(1)
5119 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4);
5120 }
5121 }
5122
5123 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_3X4, output_height_div_3) {
5124 TEST_REQUIRES_X86_SSE;
5125 for (size_t input_height = 6; input_height < 24; input_height += 3) {
5126 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5127 DWConv2DMicrokernelTester()
5128 .input_width(input_width)
5129 .input_height(input_height)
5130 .kernel_height(3)
5131 .kernel_width(3)
5132 .subsampling(1)
5133 .padding_left(1)
5134 .padding_right(1)
5135 .padding_top(1)
5136 .padding_bottom(1)
5137 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4);
5138 }
5139 }
5140 }
5141
5142 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_3X4, output_height_lt_3) {
5143 TEST_REQUIRES_X86_SSE;
5144 for (size_t input_height = 1; input_height < 3; input_height++) {
5145 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5146 DWConv2DMicrokernelTester()
5147 .input_width(input_width)
5148 .input_height(input_height)
5149 .kernel_height(3)
5150 .kernel_width(3)
5151 .subsampling(1)
5152 .padding_left(1)
5153 .padding_right(1)
5154 .padding_top(1)
5155 .padding_bottom(1)
5156 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4);
5157 }
5158 }
5159 }
5160
5161 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_3X4, output_height_gt_3) {
5162 TEST_REQUIRES_X86_SSE;
5163 for (size_t input_height = 4; input_height < 7; input_height++) {
5164 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5165 DWConv2DMicrokernelTester()
5166 .input_width(input_width)
5167 .input_height(input_height)
5168 .kernel_height(3)
5169 .kernel_width(3)
5170 .subsampling(1)
5171 .padding_left(1)
5172 .padding_right(1)
5173 .padding_top(1)
5174 .padding_bottom(1)
5175 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4);
5176 }
5177 }
5178 }
5179#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5180
5181
5182#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5183 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_4X4, output_width_eq_4) {
5184 TEST_REQUIRES_X86_SSE;
5185 DWConv2DMicrokernelTester()
5186 .input_width(4)
5187 .input_height(4)
5188 .kernel_height(3)
5189 .kernel_width(3)
5190 .subsampling(1)
5191 .padding_left(1)
5192 .padding_right(1)
5193 .padding_top(1)
5194 .padding_bottom(1)
5195 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4);
5196 }
5197
5198 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_4X4, output_width_div_4) {
5199 TEST_REQUIRES_X86_SSE;
5200 for (size_t input_width = 8; input_width < 32; input_width += 4) {
5201 DWConv2DMicrokernelTester()
5202 .input_width(input_width)
5203 .input_height(4)
5204 .kernel_height(3)
5205 .kernel_width(3)
5206 .subsampling(1)
5207 .padding_left(1)
5208 .padding_right(1)
5209 .padding_top(1)
5210 .padding_bottom(1)
5211 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4);
5212 }
5213 }
5214
5215 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_4X4, output_width_lt_4) {
5216 TEST_REQUIRES_X86_SSE;
5217 for (size_t input_width = 1; input_width < 4; input_width++) {
5218 DWConv2DMicrokernelTester()
5219 .input_width(4)
5220 .input_height(4)
5221 .kernel_height(3)
5222 .kernel_width(3)
5223 .subsampling(1)
5224 .padding_left(1)
5225 .padding_right(1)
5226 .padding_top(1)
5227 .padding_bottom(1)
5228 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4);
5229 }
5230 }
5231
5232 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_4X4, output_width_gt_4) {
5233 TEST_REQUIRES_X86_SSE;
5234 for (size_t input_width = 5; input_width < 9; input_width++) {
5235 DWConv2DMicrokernelTester()
5236 .input_width(input_width)
5237 .input_height(4)
5238 .kernel_height(3)
5239 .kernel_width(3)
5240 .subsampling(1)
5241 .padding_left(1)
5242 .padding_right(1)
5243 .padding_top(1)
5244 .padding_bottom(1)
5245 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4);
5246 }
5247 }
5248
5249 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_4X4, output_height_div_4) {
5250 TEST_REQUIRES_X86_SSE;
5251 for (size_t input_height = 8; input_height < 32; input_height += 4) {
5252 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5253 DWConv2DMicrokernelTester()
5254 .input_width(input_width)
5255 .input_height(input_height)
5256 .kernel_height(3)
5257 .kernel_width(3)
5258 .subsampling(1)
5259 .padding_left(1)
5260 .padding_right(1)
5261 .padding_top(1)
5262 .padding_bottom(1)
5263 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4);
5264 }
5265 }
5266 }
5267
5268 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_4X4, output_height_lt_4) {
5269 TEST_REQUIRES_X86_SSE;
5270 for (size_t input_height = 1; input_height < 4; input_height++) {
5271 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5272 DWConv2DMicrokernelTester()
5273 .input_width(input_width)
5274 .input_height(input_height)
5275 .kernel_height(3)
5276 .kernel_width(3)
5277 .subsampling(1)
5278 .padding_left(1)
5279 .padding_right(1)
5280 .padding_top(1)
5281 .padding_bottom(1)
5282 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4);
5283 }
5284 }
5285 }
5286
5287 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_4X4, output_height_gt_4) {
5288 TEST_REQUIRES_X86_SSE;
5289 for (size_t input_height = 5; input_height < 9; input_height++) {
5290 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5291 DWConv2DMicrokernelTester()
5292 .input_width(input_width)
5293 .input_height(input_height)
5294 .kernel_height(3)
5295 .kernel_width(3)
5296 .subsampling(1)
5297 .padding_left(1)
5298 .padding_right(1)
5299 .padding_top(1)
5300 .padding_bottom(1)
5301 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4);
5302 }
5303 }
5304 }
5305#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5306
5307
5308#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5309 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_5X4, output_width_eq_4) {
5310 TEST_REQUIRES_X86_SSE;
5311 DWConv2DMicrokernelTester()
5312 .input_width(4)
5313 .input_height(5)
5314 .kernel_height(3)
5315 .kernel_width(3)
5316 .subsampling(1)
5317 .padding_left(1)
5318 .padding_right(1)
5319 .padding_top(1)
5320 .padding_bottom(1)
5321 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4);
5322 }
5323
5324 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_5X4, output_width_div_4) {
5325 TEST_REQUIRES_X86_SSE;
5326 for (size_t input_width = 8; input_width < 32; input_width += 4) {
5327 DWConv2DMicrokernelTester()
5328 .input_width(input_width)
5329 .input_height(5)
5330 .kernel_height(3)
5331 .kernel_width(3)
5332 .subsampling(1)
5333 .padding_left(1)
5334 .padding_right(1)
5335 .padding_top(1)
5336 .padding_bottom(1)
5337 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4);
5338 }
5339 }
5340
5341 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_5X4, output_width_lt_4) {
5342 TEST_REQUIRES_X86_SSE;
5343 for (size_t input_width = 1; input_width < 4; input_width++) {
5344 DWConv2DMicrokernelTester()
5345 .input_width(4)
5346 .input_height(5)
5347 .kernel_height(3)
5348 .kernel_width(3)
5349 .subsampling(1)
5350 .padding_left(1)
5351 .padding_right(1)
5352 .padding_top(1)
5353 .padding_bottom(1)
5354 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4);
5355 }
5356 }
5357
5358 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_5X4, output_width_gt_4) {
5359 TEST_REQUIRES_X86_SSE;
5360 for (size_t input_width = 5; input_width < 9; input_width++) {
5361 DWConv2DMicrokernelTester()
5362 .input_width(input_width)
5363 .input_height(5)
5364 .kernel_height(3)
5365 .kernel_width(3)
5366 .subsampling(1)
5367 .padding_left(1)
5368 .padding_right(1)
5369 .padding_top(1)
5370 .padding_bottom(1)
5371 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4);
5372 }
5373 }
5374
5375 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_5X4, output_height_div_5) {
5376 TEST_REQUIRES_X86_SSE;
5377 for (size_t input_height = 10; input_height < 40; input_height += 5) {
5378 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5379 DWConv2DMicrokernelTester()
5380 .input_width(input_width)
5381 .input_height(input_height)
5382 .kernel_height(3)
5383 .kernel_width(3)
5384 .subsampling(1)
5385 .padding_left(1)
5386 .padding_right(1)
5387 .padding_top(1)
5388 .padding_bottom(1)
5389 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4);
5390 }
5391 }
5392 }
5393
5394 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_5X4, output_height_lt_5) {
5395 TEST_REQUIRES_X86_SSE;
5396 for (size_t input_height = 1; input_height < 5; input_height++) {
5397 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5398 DWConv2DMicrokernelTester()
5399 .input_width(input_width)
5400 .input_height(input_height)
5401 .kernel_height(3)
5402 .kernel_width(3)
5403 .subsampling(1)
5404 .padding_left(1)
5405 .padding_right(1)
5406 .padding_top(1)
5407 .padding_bottom(1)
5408 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4);
5409 }
5410 }
5411 }
5412
5413 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_5X4, output_height_gt_5) {
5414 TEST_REQUIRES_X86_SSE;
5415 for (size_t input_height = 6; input_height < 11; input_height++) {
5416 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5417 DWConv2DMicrokernelTester()
5418 .input_width(input_width)
5419 .input_height(input_height)
5420 .kernel_height(3)
5421 .kernel_width(3)
5422 .subsampling(1)
5423 .padding_left(1)
5424 .padding_right(1)
5425 .padding_top(1)
5426 .padding_bottom(1)
5427 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4);
5428 }
5429 }
5430 }
5431#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5432
5433
5434#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5435 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_6X4, output_width_eq_4) {
5436 TEST_REQUIRES_X86_SSE;
5437 DWConv2DMicrokernelTester()
5438 .input_width(4)
5439 .input_height(6)
5440 .kernel_height(3)
5441 .kernel_width(3)
5442 .subsampling(1)
5443 .padding_left(1)
5444 .padding_right(1)
5445 .padding_top(1)
5446 .padding_bottom(1)
5447 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4);
5448 }
5449
5450 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_6X4, output_width_div_4) {
5451 TEST_REQUIRES_X86_SSE;
5452 for (size_t input_width = 8; input_width < 32; input_width += 4) {
5453 DWConv2DMicrokernelTester()
5454 .input_width(input_width)
5455 .input_height(6)
5456 .kernel_height(3)
5457 .kernel_width(3)
5458 .subsampling(1)
5459 .padding_left(1)
5460 .padding_right(1)
5461 .padding_top(1)
5462 .padding_bottom(1)
5463 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4);
5464 }
5465 }
5466
5467 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_6X4, output_width_lt_4) {
5468 TEST_REQUIRES_X86_SSE;
5469 for (size_t input_width = 1; input_width < 4; input_width++) {
5470 DWConv2DMicrokernelTester()
5471 .input_width(4)
5472 .input_height(6)
5473 .kernel_height(3)
5474 .kernel_width(3)
5475 .subsampling(1)
5476 .padding_left(1)
5477 .padding_right(1)
5478 .padding_top(1)
5479 .padding_bottom(1)
5480 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4);
5481 }
5482 }
5483
5484 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_6X4, output_width_gt_4) {
5485 TEST_REQUIRES_X86_SSE;
5486 for (size_t input_width = 5; input_width < 9; input_width++) {
5487 DWConv2DMicrokernelTester()
5488 .input_width(input_width)
5489 .input_height(6)
5490 .kernel_height(3)
5491 .kernel_width(3)
5492 .subsampling(1)
5493 .padding_left(1)
5494 .padding_right(1)
5495 .padding_top(1)
5496 .padding_bottom(1)
5497 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4);
5498 }
5499 }
5500
5501 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_6X4, output_height_div_6) {
5502 TEST_REQUIRES_X86_SSE;
5503 for (size_t input_height = 12; input_height < 48; input_height += 6) {
5504 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5505 DWConv2DMicrokernelTester()
5506 .input_width(input_width)
5507 .input_height(input_height)
5508 .kernel_height(3)
5509 .kernel_width(3)
5510 .subsampling(1)
5511 .padding_left(1)
5512 .padding_right(1)
5513 .padding_top(1)
5514 .padding_bottom(1)
5515 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4);
5516 }
5517 }
5518 }
5519
5520 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_6X4, output_height_lt_6) {
5521 TEST_REQUIRES_X86_SSE;
5522 for (size_t input_height = 1; input_height < 6; input_height++) {
5523 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5524 DWConv2DMicrokernelTester()
5525 .input_width(input_width)
5526 .input_height(input_height)
5527 .kernel_height(3)
5528 .kernel_width(3)
5529 .subsampling(1)
5530 .padding_left(1)
5531 .padding_right(1)
5532 .padding_top(1)
5533 .padding_bottom(1)
5534 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4);
5535 }
5536 }
5537 }
5538
5539 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_6X4, output_height_gt_6) {
5540 TEST_REQUIRES_X86_SSE;
5541 for (size_t input_height = 7; input_height < 13; input_height++) {
5542 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5543 DWConv2DMicrokernelTester()
5544 .input_width(input_width)
5545 .input_height(input_height)
5546 .kernel_height(3)
5547 .kernel_width(3)
5548 .subsampling(1)
5549 .padding_left(1)
5550 .padding_right(1)
5551 .padding_top(1)
5552 .padding_bottom(1)
5553 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4);
5554 }
5555 }
5556 }
5557#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5558
5559
5560#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5561 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC2, output_width_eq_4) {
5562 TEST_REQUIRES_X86_SSE;
5563 DWConv2DMicrokernelTester()
5564 .input_width(4)
5565 .input_height(1)
5566 .kernel_height(3)
5567 .kernel_width(3)
5568 .subsampling(1)
5569 .padding_left(1)
5570 .padding_right(1)
5571 .padding_top(1)
5572 .padding_bottom(1)
5573 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2);
5574 }
5575
5576 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC2, output_width_div_4) {
5577 TEST_REQUIRES_X86_SSE;
5578 for (size_t input_width = 8; input_width < 32; input_width += 4) {
5579 DWConv2DMicrokernelTester()
5580 .input_width(input_width)
5581 .input_height(1)
5582 .kernel_height(3)
5583 .kernel_width(3)
5584 .subsampling(1)
5585 .padding_left(1)
5586 .padding_right(1)
5587 .padding_top(1)
5588 .padding_bottom(1)
5589 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2);
5590 }
5591 }
5592
5593 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC2, output_width_lt_4) {
5594 TEST_REQUIRES_X86_SSE;
5595 for (size_t input_width = 1; input_width < 4; input_width++) {
5596 DWConv2DMicrokernelTester()
5597 .input_width(4)
5598 .input_height(1)
5599 .kernel_height(3)
5600 .kernel_width(3)
5601 .subsampling(1)
5602 .padding_left(1)
5603 .padding_right(1)
5604 .padding_top(1)
5605 .padding_bottom(1)
5606 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2);
5607 }
5608 }
5609
5610 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC2, output_width_gt_4) {
5611 TEST_REQUIRES_X86_SSE;
5612 for (size_t input_width = 5; input_width < 9; input_width++) {
5613 DWConv2DMicrokernelTester()
5614 .input_width(input_width)
5615 .input_height(1)
5616 .kernel_height(3)
5617 .kernel_width(3)
5618 .subsampling(1)
5619 .padding_left(1)
5620 .padding_right(1)
5621 .padding_top(1)
5622 .padding_bottom(1)
5623 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2);
5624 }
5625 }
5626
5627 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC2, output_height_gt_1) {
5628 TEST_REQUIRES_X86_SSE;
5629 for (size_t input_height = 2; input_height < 3; input_height++) {
5630 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5631 DWConv2DMicrokernelTester()
5632 .input_width(input_width)
5633 .input_height(input_height)
5634 .kernel_height(3)
5635 .kernel_width(3)
5636 .subsampling(1)
5637 .padding_left(1)
5638 .padding_right(1)
5639 .padding_top(1)
5640 .padding_bottom(1)
5641 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2);
5642 }
5643 }
5644 }
5645#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5646
5647
5648#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanbf715f92020-10-23 20:17:00 -07005649 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC3, output_width_eq_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005650 TEST_REQUIRES_X86_SSE;
Marat Dukhanbf715f92020-10-23 20:17:00 -07005651 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005652 .input_width(4)
5653 .input_height(1)
5654 .kernel_height(3)
5655 .kernel_width(3)
5656 .subsampling(1)
Erich Elsen0cc2c532019-10-15 04:44:18 -07005657 .padding_left(1)
5658 .padding_right(1)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07005659 .padding_top(1)
5660 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07005661 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005662 }
5663
Marat Dukhanbf715f92020-10-23 20:17:00 -07005664 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC3, output_width_div_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005665 TEST_REQUIRES_X86_SSE;
5666 for (size_t input_width = 8; input_width < 32; input_width += 4) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07005667 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005668 .input_width(input_width)
5669 .input_height(1)
5670 .kernel_height(3)
5671 .kernel_width(3)
5672 .subsampling(1)
5673 .padding_left(1)
5674 .padding_right(1)
5675 .padding_top(1)
5676 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07005677 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005678 }
5679 }
5680
Marat Dukhanbf715f92020-10-23 20:17:00 -07005681 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC3, output_width_lt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005682 TEST_REQUIRES_X86_SSE;
5683 for (size_t input_width = 1; input_width < 4; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07005684 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005685 .input_width(4)
5686 .input_height(1)
5687 .kernel_height(3)
5688 .kernel_width(3)
5689 .subsampling(1)
5690 .padding_left(1)
5691 .padding_right(1)
5692 .padding_top(1)
5693 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07005694 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005695 }
5696 }
5697
Marat Dukhanbf715f92020-10-23 20:17:00 -07005698 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC3, output_width_gt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005699 TEST_REQUIRES_X86_SSE;
5700 for (size_t input_width = 5; input_width < 9; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07005701 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005702 .input_width(input_width)
5703 .input_height(1)
5704 .kernel_height(3)
5705 .kernel_width(3)
5706 .subsampling(1)
5707 .padding_left(1)
5708 .padding_right(1)
5709 .padding_top(1)
5710 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07005711 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005712 }
5713 }
5714
Marat Dukhanbf715f92020-10-23 20:17:00 -07005715 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC3, output_height_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005716 TEST_REQUIRES_X86_SSE;
5717 for (size_t input_height = 2; input_height < 3; input_height++) {
5718 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07005719 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005720 .input_width(input_width)
5721 .input_height(input_height)
5722 .kernel_height(3)
5723 .kernel_width(3)
5724 .subsampling(1)
5725 .padding_left(1)
5726 .padding_right(1)
5727 .padding_top(1)
5728 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07005729 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07005730 }
5731 }
5732 }
5733#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5734
5735
5736#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan470078a2020-10-23 22:36:52 -07005737 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC4, output_width_eq_4) {
5738 TEST_REQUIRES_X86_SSE;
5739 DWConv2DMicrokernelTester()
5740 .input_width(4)
5741 .input_height(1)
5742 .kernel_height(3)
5743 .kernel_width(3)
5744 .subsampling(1)
5745 .padding_left(1)
5746 .padding_right(1)
5747 .padding_top(1)
5748 .padding_bottom(1)
5749 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4);
5750 }
5751
5752 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC4, output_width_div_4) {
5753 TEST_REQUIRES_X86_SSE;
5754 for (size_t input_width = 8; input_width < 32; input_width += 4) {
5755 DWConv2DMicrokernelTester()
5756 .input_width(input_width)
5757 .input_height(1)
5758 .kernel_height(3)
5759 .kernel_width(3)
5760 .subsampling(1)
5761 .padding_left(1)
5762 .padding_right(1)
5763 .padding_top(1)
5764 .padding_bottom(1)
5765 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4);
5766 }
5767 }
5768
5769 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC4, output_width_lt_4) {
5770 TEST_REQUIRES_X86_SSE;
5771 for (size_t input_width = 1; input_width < 4; input_width++) {
5772 DWConv2DMicrokernelTester()
5773 .input_width(4)
5774 .input_height(1)
5775 .kernel_height(3)
5776 .kernel_width(3)
5777 .subsampling(1)
5778 .padding_left(1)
5779 .padding_right(1)
5780 .padding_top(1)
5781 .padding_bottom(1)
5782 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4);
5783 }
5784 }
5785
5786 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC4, output_width_gt_4) {
5787 TEST_REQUIRES_X86_SSE;
5788 for (size_t input_width = 5; input_width < 9; input_width++) {
5789 DWConv2DMicrokernelTester()
5790 .input_width(input_width)
5791 .input_height(1)
5792 .kernel_height(3)
5793 .kernel_width(3)
5794 .subsampling(1)
5795 .padding_left(1)
5796 .padding_right(1)
5797 .padding_top(1)
5798 .padding_bottom(1)
5799 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4);
5800 }
5801 }
5802
5803 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_1X4_ACC4, output_height_gt_1) {
5804 TEST_REQUIRES_X86_SSE;
5805 for (size_t input_height = 2; input_height < 3; input_height++) {
5806 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5807 DWConv2DMicrokernelTester()
5808 .input_width(input_width)
5809 .input_height(input_height)
5810 .kernel_height(3)
5811 .kernel_width(3)
5812 .subsampling(1)
5813 .padding_left(1)
5814 .padding_right(1)
5815 .padding_top(1)
5816 .padding_bottom(1)
5817 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4);
5818 }
5819 }
5820 }
5821#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5822
5823
5824#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5825 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4_ACC2, output_width_eq_4) {
5826 TEST_REQUIRES_X86_SSE;
5827 DWConv2DMicrokernelTester()
5828 .input_width(4)
5829 .input_height(2)
5830 .kernel_height(3)
5831 .kernel_width(3)
5832 .subsampling(1)
5833 .padding_left(1)
5834 .padding_right(1)
5835 .padding_top(1)
5836 .padding_bottom(1)
5837 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2);
5838 }
5839
5840 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4_ACC2, output_width_div_4) {
5841 TEST_REQUIRES_X86_SSE;
5842 for (size_t input_width = 8; input_width < 32; input_width += 4) {
5843 DWConv2DMicrokernelTester()
5844 .input_width(input_width)
5845 .input_height(2)
5846 .kernel_height(3)
5847 .kernel_width(3)
5848 .subsampling(1)
5849 .padding_left(1)
5850 .padding_right(1)
5851 .padding_top(1)
5852 .padding_bottom(1)
5853 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2);
5854 }
5855 }
5856
5857 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4_ACC2, output_width_lt_4) {
5858 TEST_REQUIRES_X86_SSE;
5859 for (size_t input_width = 1; input_width < 4; input_width++) {
5860 DWConv2DMicrokernelTester()
5861 .input_width(4)
5862 .input_height(2)
5863 .kernel_height(3)
5864 .kernel_width(3)
5865 .subsampling(1)
5866 .padding_left(1)
5867 .padding_right(1)
5868 .padding_top(1)
5869 .padding_bottom(1)
5870 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2);
5871 }
5872 }
5873
5874 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4_ACC2, output_width_gt_4) {
5875 TEST_REQUIRES_X86_SSE;
5876 for (size_t input_width = 5; input_width < 9; input_width++) {
5877 DWConv2DMicrokernelTester()
5878 .input_width(input_width)
5879 .input_height(2)
5880 .kernel_height(3)
5881 .kernel_width(3)
5882 .subsampling(1)
5883 .padding_left(1)
5884 .padding_right(1)
5885 .padding_top(1)
5886 .padding_bottom(1)
5887 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2);
5888 }
5889 }
5890
5891 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4_ACC2, output_height_div_2) {
5892 TEST_REQUIRES_X86_SSE;
5893 for (size_t input_height = 4; input_height < 16; input_height += 2) {
5894 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5895 DWConv2DMicrokernelTester()
5896 .input_width(input_width)
5897 .input_height(input_height)
5898 .kernel_height(3)
5899 .kernel_width(3)
5900 .subsampling(1)
5901 .padding_left(1)
5902 .padding_right(1)
5903 .padding_top(1)
5904 .padding_bottom(1)
5905 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2);
5906 }
5907 }
5908 }
5909
5910 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4_ACC2, output_height_lt_2) {
5911 TEST_REQUIRES_X86_SSE;
5912 for (size_t input_height = 1; input_height < 2; input_height++) {
5913 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5914 DWConv2DMicrokernelTester()
5915 .input_width(input_width)
5916 .input_height(input_height)
5917 .kernel_height(3)
5918 .kernel_width(3)
5919 .subsampling(1)
5920 .padding_left(1)
5921 .padding_right(1)
5922 .padding_top(1)
5923 .padding_bottom(1)
5924 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2);
5925 }
5926 }
5927 }
5928
5929 TEST(F32_DWCONV2D_CHW_3X3P1__SSE_2X4_ACC2, output_height_gt_2) {
5930 TEST_REQUIRES_X86_SSE;
5931 for (size_t input_height = 3; input_height < 5; input_height++) {
5932 for (size_t input_width = 1; input_width < 21; input_width += 3) {
5933 DWConv2DMicrokernelTester()
5934 .input_width(input_width)
5935 .input_height(input_height)
5936 .kernel_height(3)
5937 .kernel_width(3)
5938 .subsampling(1)
5939 .padding_left(1)
5940 .padding_right(1)
5941 .padding_top(1)
5942 .padding_bottom(1)
5943 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2);
5944 }
5945 }
5946 }
5947#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5948
5949
5950#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0ff97182020-10-25 19:14:03 -07005951 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4, output_width_eq_4) {
5952 TEST_REQUIRES_X86_SSE;
5953 for (size_t input_width = 7; input_width < 9; input_width++) {
5954 DWConv2DMicrokernelTester()
5955 .input_width(input_width)
5956 .input_height(2)
5957 .kernel_height(3)
5958 .kernel_width(3)
5959 .subsampling(2)
5960 .padding_left(1)
5961 .padding_right(1)
5962 .padding_top(1)
5963 .padding_bottom(1)
5964 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4);
5965 }
5966 }
5967
5968 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4, output_width_div_4) {
5969 TEST_REQUIRES_X86_SSE;
5970 for (size_t input_width = 16; input_width < 64; input_width += 8) {
5971 DWConv2DMicrokernelTester()
5972 .input_width(input_width)
5973 .input_height(2)
5974 .kernel_height(3)
5975 .kernel_width(3)
5976 .subsampling(2)
5977 .padding_left(1)
5978 .padding_right(1)
5979 .padding_top(1)
5980 .padding_bottom(1)
5981 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4);
5982 }
5983 }
5984
5985 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4, output_width_lt_4) {
5986 TEST_REQUIRES_X86_SSE;
5987 for (size_t input_width = 1; input_width < 7; input_width++) {
5988 DWConv2DMicrokernelTester()
5989 .input_width(8)
5990 .input_height(2)
5991 .kernel_height(3)
5992 .kernel_width(3)
5993 .subsampling(2)
5994 .padding_left(1)
5995 .padding_right(1)
5996 .padding_top(1)
5997 .padding_bottom(1)
5998 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4);
5999 }
6000 }
6001
6002 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4, output_width_gt_4) {
6003 TEST_REQUIRES_X86_SSE;
6004 for (size_t input_width = 9; input_width < 17; input_width++) {
6005 DWConv2DMicrokernelTester()
6006 .input_width(input_width)
6007 .input_height(2)
6008 .kernel_height(3)
6009 .kernel_width(3)
6010 .subsampling(2)
6011 .padding_left(1)
6012 .padding_right(1)
6013 .padding_top(1)
6014 .padding_bottom(1)
6015 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4);
6016 }
6017 }
6018
6019 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4, output_height_eq_1) {
6020 TEST_REQUIRES_X86_SSE;
6021 for (size_t input_height = 1; input_height < 3; input_height++) {
6022 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6023 DWConv2DMicrokernelTester()
6024 .input_width(input_width)
6025 .input_height(input_height)
6026 .kernel_height(3)
6027 .kernel_width(3)
6028 .subsampling(2)
6029 .padding_left(1)
6030 .padding_right(1)
6031 .padding_top(1)
6032 .padding_bottom(1)
6033 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4);
6034 }
6035 }
6036 }
6037
6038 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4, output_height_gt_1) {
6039 TEST_REQUIRES_X86_SSE;
6040 for (size_t input_height = 3; input_height < 5; input_height++) {
6041 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6042 DWConv2DMicrokernelTester()
6043 .input_width(input_width)
6044 .input_height(input_height)
6045 .kernel_height(3)
6046 .kernel_width(3)
6047 .subsampling(2)
6048 .padding_left(1)
6049 .padding_right(1)
6050 .padding_top(1)
6051 .padding_bottom(1)
6052 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4);
6053 }
6054 }
6055 }
6056
6057 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4, padding_top_eq_1) {
6058 TEST_REQUIRES_X86_SSE;
6059 for (size_t input_height = 2; input_height < 8; input_height++) {
6060 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6061 DWConv2DMicrokernelTester()
6062 .input_width(input_width)
6063 .input_height(input_height)
6064 .kernel_height(3)
6065 .kernel_width(3)
6066 .subsampling(2)
6067 .padding_left(1)
6068 .padding_right(1)
6069 .padding_top(0)
6070 .padding_bottom(1)
6071 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4);
6072 }
6073 }
6074 }
6075#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6076
6077
6078#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6079 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4, output_width_eq_4) {
6080 TEST_REQUIRES_X86_SSE;
6081 for (size_t input_width = 7; input_width < 9; input_width++) {
6082 DWConv2DMicrokernelTester()
6083 .input_width(input_width)
6084 .input_height(4)
6085 .kernel_height(3)
6086 .kernel_width(3)
6087 .subsampling(2)
6088 .padding_left(1)
6089 .padding_right(1)
6090 .padding_top(1)
6091 .padding_bottom(1)
6092 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4);
6093 }
6094 }
6095
6096 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4, output_width_div_4) {
6097 TEST_REQUIRES_X86_SSE;
6098 for (size_t input_width = 16; input_width < 64; input_width += 8) {
6099 DWConv2DMicrokernelTester()
6100 .input_width(input_width)
6101 .input_height(4)
6102 .kernel_height(3)
6103 .kernel_width(3)
6104 .subsampling(2)
6105 .padding_left(1)
6106 .padding_right(1)
6107 .padding_top(1)
6108 .padding_bottom(1)
6109 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4);
6110 }
6111 }
6112
6113 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4, output_width_lt_4) {
6114 TEST_REQUIRES_X86_SSE;
6115 for (size_t input_width = 1; input_width < 7; input_width++) {
6116 DWConv2DMicrokernelTester()
6117 .input_width(8)
6118 .input_height(4)
6119 .kernel_height(3)
6120 .kernel_width(3)
6121 .subsampling(2)
6122 .padding_left(1)
6123 .padding_right(1)
6124 .padding_top(1)
6125 .padding_bottom(1)
6126 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4);
6127 }
6128 }
6129
6130 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4, output_width_gt_4) {
6131 TEST_REQUIRES_X86_SSE;
6132 for (size_t input_width = 9; input_width < 17; input_width++) {
6133 DWConv2DMicrokernelTester()
6134 .input_width(input_width)
6135 .input_height(4)
6136 .kernel_height(3)
6137 .kernel_width(3)
6138 .subsampling(2)
6139 .padding_left(1)
6140 .padding_right(1)
6141 .padding_top(1)
6142 .padding_bottom(1)
6143 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4);
6144 }
6145 }
6146
6147 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4, output_height_eq_2) {
6148 TEST_REQUIRES_X86_SSE;
6149 for (size_t input_height = 3; input_height < 5; input_height++) {
6150 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6151 DWConv2DMicrokernelTester()
6152 .input_width(input_width)
6153 .input_height(input_height)
6154 .kernel_height(3)
6155 .kernel_width(3)
6156 .subsampling(2)
6157 .padding_left(1)
6158 .padding_right(1)
6159 .padding_top(1)
6160 .padding_bottom(1)
6161 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4);
6162 }
6163 }
6164 }
6165
6166 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4, output_height_div_2) {
6167 TEST_REQUIRES_X86_SSE;
6168 for (size_t input_height = 8; input_height < 32; input_height += 4) {
6169 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6170 DWConv2DMicrokernelTester()
6171 .input_width(input_width)
6172 .input_height(input_height)
6173 .kernel_height(3)
6174 .kernel_width(3)
6175 .subsampling(2)
6176 .padding_left(1)
6177 .padding_right(1)
6178 .padding_top(1)
6179 .padding_bottom(1)
6180 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4);
6181 }
6182 }
6183 }
6184
6185 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4, output_height_lt_2) {
6186 TEST_REQUIRES_X86_SSE;
6187 for (size_t input_height = 1; input_height < 3; input_height++) {
6188 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6189 DWConv2DMicrokernelTester()
6190 .input_width(input_width)
6191 .input_height(input_height)
6192 .kernel_height(3)
6193 .kernel_width(3)
6194 .subsampling(2)
6195 .padding_left(1)
6196 .padding_right(1)
6197 .padding_top(1)
6198 .padding_bottom(1)
6199 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4);
6200 }
6201 }
6202 }
6203
6204 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4, output_height_gt_2) {
6205 TEST_REQUIRES_X86_SSE;
6206 for (size_t input_height = 5; input_height < 9; input_height++) {
6207 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6208 DWConv2DMicrokernelTester()
6209 .input_width(input_width)
6210 .input_height(input_height)
6211 .kernel_height(3)
6212 .kernel_width(3)
6213 .subsampling(2)
6214 .padding_left(1)
6215 .padding_right(1)
6216 .padding_top(1)
6217 .padding_bottom(1)
6218 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4);
6219 }
6220 }
6221 }
6222
6223 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4, padding_top_eq_1) {
6224 TEST_REQUIRES_X86_SSE;
6225 for (size_t input_height = 2; input_height < 14; input_height++) {
6226 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6227 DWConv2DMicrokernelTester()
6228 .input_width(input_width)
6229 .input_height(input_height)
6230 .kernel_height(3)
6231 .kernel_width(3)
6232 .subsampling(2)
6233 .padding_left(1)
6234 .padding_right(1)
6235 .padding_top(0)
6236 .padding_bottom(1)
6237 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4);
6238 }
6239 }
6240 }
6241#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6242
6243
6244#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6245 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_3X4, output_width_eq_4) {
6246 TEST_REQUIRES_X86_SSE;
6247 for (size_t input_width = 7; input_width < 9; input_width++) {
6248 DWConv2DMicrokernelTester()
6249 .input_width(input_width)
6250 .input_height(6)
6251 .kernel_height(3)
6252 .kernel_width(3)
6253 .subsampling(2)
6254 .padding_left(1)
6255 .padding_right(1)
6256 .padding_top(1)
6257 .padding_bottom(1)
6258 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4);
6259 }
6260 }
6261
6262 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_3X4, output_width_div_4) {
6263 TEST_REQUIRES_X86_SSE;
6264 for (size_t input_width = 16; input_width < 64; input_width += 8) {
6265 DWConv2DMicrokernelTester()
6266 .input_width(input_width)
6267 .input_height(6)
6268 .kernel_height(3)
6269 .kernel_width(3)
6270 .subsampling(2)
6271 .padding_left(1)
6272 .padding_right(1)
6273 .padding_top(1)
6274 .padding_bottom(1)
6275 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4);
6276 }
6277 }
6278
6279 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_3X4, output_width_lt_4) {
6280 TEST_REQUIRES_X86_SSE;
6281 for (size_t input_width = 1; input_width < 7; input_width++) {
6282 DWConv2DMicrokernelTester()
6283 .input_width(8)
6284 .input_height(6)
6285 .kernel_height(3)
6286 .kernel_width(3)
6287 .subsampling(2)
6288 .padding_left(1)
6289 .padding_right(1)
6290 .padding_top(1)
6291 .padding_bottom(1)
6292 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4);
6293 }
6294 }
6295
6296 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_3X4, output_width_gt_4) {
6297 TEST_REQUIRES_X86_SSE;
6298 for (size_t input_width = 9; input_width < 17; input_width++) {
6299 DWConv2DMicrokernelTester()
6300 .input_width(input_width)
6301 .input_height(6)
6302 .kernel_height(3)
6303 .kernel_width(3)
6304 .subsampling(2)
6305 .padding_left(1)
6306 .padding_right(1)
6307 .padding_top(1)
6308 .padding_bottom(1)
6309 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4);
6310 }
6311 }
6312
6313 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_3X4, output_height_eq_3) {
6314 TEST_REQUIRES_X86_SSE;
6315 for (size_t input_height = 5; input_height < 7; input_height++) {
6316 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6317 DWConv2DMicrokernelTester()
6318 .input_width(input_width)
6319 .input_height(input_height)
6320 .kernel_height(3)
6321 .kernel_width(3)
6322 .subsampling(2)
6323 .padding_left(1)
6324 .padding_right(1)
6325 .padding_top(1)
6326 .padding_bottom(1)
6327 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4);
6328 }
6329 }
6330 }
6331
6332 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_3X4, output_height_div_3) {
6333 TEST_REQUIRES_X86_SSE;
6334 for (size_t input_height = 12; input_height < 48; input_height += 6) {
6335 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6336 DWConv2DMicrokernelTester()
6337 .input_width(input_width)
6338 .input_height(input_height)
6339 .kernel_height(3)
6340 .kernel_width(3)
6341 .subsampling(2)
6342 .padding_left(1)
6343 .padding_right(1)
6344 .padding_top(1)
6345 .padding_bottom(1)
6346 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4);
6347 }
6348 }
6349 }
6350
6351 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_3X4, output_height_lt_3) {
6352 TEST_REQUIRES_X86_SSE;
6353 for (size_t input_height = 1; input_height < 5; input_height++) {
6354 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6355 DWConv2DMicrokernelTester()
6356 .input_width(input_width)
6357 .input_height(input_height)
6358 .kernel_height(3)
6359 .kernel_width(3)
6360 .subsampling(2)
6361 .padding_left(1)
6362 .padding_right(1)
6363 .padding_top(1)
6364 .padding_bottom(1)
6365 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4);
6366 }
6367 }
6368 }
6369
6370 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_3X4, output_height_gt_3) {
6371 TEST_REQUIRES_X86_SSE;
6372 for (size_t input_height = 7; input_height < 13; input_height++) {
6373 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6374 DWConv2DMicrokernelTester()
6375 .input_width(input_width)
6376 .input_height(input_height)
6377 .kernel_height(3)
6378 .kernel_width(3)
6379 .subsampling(2)
6380 .padding_left(1)
6381 .padding_right(1)
6382 .padding_top(1)
6383 .padding_bottom(1)
6384 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4);
6385 }
6386 }
6387 }
6388
6389 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_3X4, padding_top_eq_1) {
6390 TEST_REQUIRES_X86_SSE;
6391 for (size_t input_height = 2; input_height < 20; input_height++) {
6392 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6393 DWConv2DMicrokernelTester()
6394 .input_width(input_width)
6395 .input_height(input_height)
6396 .kernel_height(3)
6397 .kernel_width(3)
6398 .subsampling(2)
6399 .padding_left(1)
6400 .padding_right(1)
6401 .padding_top(0)
6402 .padding_bottom(1)
6403 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4);
6404 }
6405 }
6406 }
6407#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6408
6409
6410#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6411 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_4X4, output_width_eq_4) {
6412 TEST_REQUIRES_X86_SSE;
6413 for (size_t input_width = 7; input_width < 9; input_width++) {
6414 DWConv2DMicrokernelTester()
6415 .input_width(input_width)
6416 .input_height(8)
6417 .kernel_height(3)
6418 .kernel_width(3)
6419 .subsampling(2)
6420 .padding_left(1)
6421 .padding_right(1)
6422 .padding_top(1)
6423 .padding_bottom(1)
6424 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4);
6425 }
6426 }
6427
6428 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_4X4, output_width_div_4) {
6429 TEST_REQUIRES_X86_SSE;
6430 for (size_t input_width = 16; input_width < 64; input_width += 8) {
6431 DWConv2DMicrokernelTester()
6432 .input_width(input_width)
6433 .input_height(8)
6434 .kernel_height(3)
6435 .kernel_width(3)
6436 .subsampling(2)
6437 .padding_left(1)
6438 .padding_right(1)
6439 .padding_top(1)
6440 .padding_bottom(1)
6441 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4);
6442 }
6443 }
6444
6445 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_4X4, output_width_lt_4) {
6446 TEST_REQUIRES_X86_SSE;
6447 for (size_t input_width = 1; input_width < 7; input_width++) {
6448 DWConv2DMicrokernelTester()
6449 .input_width(8)
6450 .input_height(8)
6451 .kernel_height(3)
6452 .kernel_width(3)
6453 .subsampling(2)
6454 .padding_left(1)
6455 .padding_right(1)
6456 .padding_top(1)
6457 .padding_bottom(1)
6458 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4);
6459 }
6460 }
6461
6462 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_4X4, output_width_gt_4) {
6463 TEST_REQUIRES_X86_SSE;
6464 for (size_t input_width = 9; input_width < 17; input_width++) {
6465 DWConv2DMicrokernelTester()
6466 .input_width(input_width)
6467 .input_height(8)
6468 .kernel_height(3)
6469 .kernel_width(3)
6470 .subsampling(2)
6471 .padding_left(1)
6472 .padding_right(1)
6473 .padding_top(1)
6474 .padding_bottom(1)
6475 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4);
6476 }
6477 }
6478
6479 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_4X4, output_height_eq_4) {
6480 TEST_REQUIRES_X86_SSE;
6481 for (size_t input_height = 7; input_height < 9; input_height++) {
6482 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6483 DWConv2DMicrokernelTester()
6484 .input_width(input_width)
6485 .input_height(input_height)
6486 .kernel_height(3)
6487 .kernel_width(3)
6488 .subsampling(2)
6489 .padding_left(1)
6490 .padding_right(1)
6491 .padding_top(1)
6492 .padding_bottom(1)
6493 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4);
6494 }
6495 }
6496 }
6497
6498 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_4X4, output_height_div_4) {
6499 TEST_REQUIRES_X86_SSE;
6500 for (size_t input_height = 16; input_height < 64; input_height += 8) {
6501 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6502 DWConv2DMicrokernelTester()
6503 .input_width(input_width)
6504 .input_height(input_height)
6505 .kernel_height(3)
6506 .kernel_width(3)
6507 .subsampling(2)
6508 .padding_left(1)
6509 .padding_right(1)
6510 .padding_top(1)
6511 .padding_bottom(1)
6512 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4);
6513 }
6514 }
6515 }
6516
6517 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_4X4, output_height_lt_4) {
6518 TEST_REQUIRES_X86_SSE;
6519 for (size_t input_height = 1; input_height < 7; input_height++) {
6520 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6521 DWConv2DMicrokernelTester()
6522 .input_width(input_width)
6523 .input_height(input_height)
6524 .kernel_height(3)
6525 .kernel_width(3)
6526 .subsampling(2)
6527 .padding_left(1)
6528 .padding_right(1)
6529 .padding_top(1)
6530 .padding_bottom(1)
6531 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4);
6532 }
6533 }
6534 }
6535
6536 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_4X4, output_height_gt_4) {
6537 TEST_REQUIRES_X86_SSE;
6538 for (size_t input_height = 9; input_height < 17; input_height++) {
6539 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6540 DWConv2DMicrokernelTester()
6541 .input_width(input_width)
6542 .input_height(input_height)
6543 .kernel_height(3)
6544 .kernel_width(3)
6545 .subsampling(2)
6546 .padding_left(1)
6547 .padding_right(1)
6548 .padding_top(1)
6549 .padding_bottom(1)
6550 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4);
6551 }
6552 }
6553 }
6554
6555 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_4X4, padding_top_eq_1) {
6556 TEST_REQUIRES_X86_SSE;
6557 for (size_t input_height = 2; input_height < 26; input_height++) {
6558 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6559 DWConv2DMicrokernelTester()
6560 .input_width(input_width)
6561 .input_height(input_height)
6562 .kernel_height(3)
6563 .kernel_width(3)
6564 .subsampling(2)
6565 .padding_left(1)
6566 .padding_right(1)
6567 .padding_top(0)
6568 .padding_bottom(1)
6569 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4);
6570 }
6571 }
6572 }
6573#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6574
6575
6576#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6577 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC2, output_width_eq_4) {
6578 TEST_REQUIRES_X86_SSE;
6579 for (size_t input_width = 7; input_width < 9; input_width++) {
6580 DWConv2DMicrokernelTester()
6581 .input_width(input_width)
6582 .input_height(2)
6583 .kernel_height(3)
6584 .kernel_width(3)
6585 .subsampling(2)
6586 .padding_left(1)
6587 .padding_right(1)
6588 .padding_top(1)
6589 .padding_bottom(1)
6590 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc2);
6591 }
6592 }
6593
6594 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC2, output_width_div_4) {
6595 TEST_REQUIRES_X86_SSE;
6596 for (size_t input_width = 16; input_width < 64; input_width += 8) {
6597 DWConv2DMicrokernelTester()
6598 .input_width(input_width)
6599 .input_height(2)
6600 .kernel_height(3)
6601 .kernel_width(3)
6602 .subsampling(2)
6603 .padding_left(1)
6604 .padding_right(1)
6605 .padding_top(1)
6606 .padding_bottom(1)
6607 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc2);
6608 }
6609 }
6610
6611 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC2, output_width_lt_4) {
6612 TEST_REQUIRES_X86_SSE;
6613 for (size_t input_width = 1; input_width < 7; input_width++) {
6614 DWConv2DMicrokernelTester()
6615 .input_width(8)
6616 .input_height(2)
6617 .kernel_height(3)
6618 .kernel_width(3)
6619 .subsampling(2)
6620 .padding_left(1)
6621 .padding_right(1)
6622 .padding_top(1)
6623 .padding_bottom(1)
6624 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc2);
6625 }
6626 }
6627
6628 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC2, output_width_gt_4) {
6629 TEST_REQUIRES_X86_SSE;
6630 for (size_t input_width = 9; input_width < 17; input_width++) {
6631 DWConv2DMicrokernelTester()
6632 .input_width(input_width)
6633 .input_height(2)
6634 .kernel_height(3)
6635 .kernel_width(3)
6636 .subsampling(2)
6637 .padding_left(1)
6638 .padding_right(1)
6639 .padding_top(1)
6640 .padding_bottom(1)
6641 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc2);
6642 }
6643 }
6644
6645 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC2, output_height_eq_1) {
6646 TEST_REQUIRES_X86_SSE;
6647 for (size_t input_height = 1; input_height < 3; input_height++) {
6648 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6649 DWConv2DMicrokernelTester()
6650 .input_width(input_width)
6651 .input_height(input_height)
6652 .kernel_height(3)
6653 .kernel_width(3)
6654 .subsampling(2)
6655 .padding_left(1)
6656 .padding_right(1)
6657 .padding_top(1)
6658 .padding_bottom(1)
6659 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc2);
6660 }
6661 }
6662 }
6663
6664 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC2, output_height_gt_1) {
6665 TEST_REQUIRES_X86_SSE;
6666 for (size_t input_height = 3; input_height < 5; input_height++) {
6667 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6668 DWConv2DMicrokernelTester()
6669 .input_width(input_width)
6670 .input_height(input_height)
6671 .kernel_height(3)
6672 .kernel_width(3)
6673 .subsampling(2)
6674 .padding_left(1)
6675 .padding_right(1)
6676 .padding_top(1)
6677 .padding_bottom(1)
6678 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc2);
6679 }
6680 }
6681 }
6682
6683 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC2, padding_top_eq_1) {
6684 TEST_REQUIRES_X86_SSE;
6685 for (size_t input_height = 2; input_height < 8; input_height++) {
6686 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6687 DWConv2DMicrokernelTester()
6688 .input_width(input_width)
6689 .input_height(input_height)
6690 .kernel_height(3)
6691 .kernel_width(3)
6692 .subsampling(2)
6693 .padding_left(1)
6694 .padding_right(1)
6695 .padding_top(0)
6696 .padding_bottom(1)
6697 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc2);
6698 }
6699 }
6700 }
6701#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6702
6703
6704#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6705 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC3, output_width_eq_4) {
6706 TEST_REQUIRES_X86_SSE;
6707 for (size_t input_width = 7; input_width < 9; input_width++) {
6708 DWConv2DMicrokernelTester()
6709 .input_width(input_width)
6710 .input_height(2)
6711 .kernel_height(3)
6712 .kernel_width(3)
6713 .subsampling(2)
6714 .padding_left(1)
6715 .padding_right(1)
6716 .padding_top(1)
6717 .padding_bottom(1)
6718 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3);
6719 }
6720 }
6721
6722 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC3, output_width_div_4) {
6723 TEST_REQUIRES_X86_SSE;
6724 for (size_t input_width = 16; input_width < 64; input_width += 8) {
6725 DWConv2DMicrokernelTester()
6726 .input_width(input_width)
6727 .input_height(2)
6728 .kernel_height(3)
6729 .kernel_width(3)
6730 .subsampling(2)
6731 .padding_left(1)
6732 .padding_right(1)
6733 .padding_top(1)
6734 .padding_bottom(1)
6735 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3);
6736 }
6737 }
6738
6739 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC3, output_width_lt_4) {
6740 TEST_REQUIRES_X86_SSE;
6741 for (size_t input_width = 1; input_width < 7; input_width++) {
6742 DWConv2DMicrokernelTester()
6743 .input_width(8)
6744 .input_height(2)
6745 .kernel_height(3)
6746 .kernel_width(3)
6747 .subsampling(2)
6748 .padding_left(1)
6749 .padding_right(1)
6750 .padding_top(1)
6751 .padding_bottom(1)
6752 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3);
6753 }
6754 }
6755
6756 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC3, output_width_gt_4) {
6757 TEST_REQUIRES_X86_SSE;
6758 for (size_t input_width = 9; input_width < 17; input_width++) {
6759 DWConv2DMicrokernelTester()
6760 .input_width(input_width)
6761 .input_height(2)
6762 .kernel_height(3)
6763 .kernel_width(3)
6764 .subsampling(2)
6765 .padding_left(1)
6766 .padding_right(1)
6767 .padding_top(1)
6768 .padding_bottom(1)
6769 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3);
6770 }
6771 }
6772
6773 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC3, output_height_eq_1) {
6774 TEST_REQUIRES_X86_SSE;
6775 for (size_t input_height = 1; input_height < 3; input_height++) {
6776 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6777 DWConv2DMicrokernelTester()
6778 .input_width(input_width)
6779 .input_height(input_height)
6780 .kernel_height(3)
6781 .kernel_width(3)
6782 .subsampling(2)
6783 .padding_left(1)
6784 .padding_right(1)
6785 .padding_top(1)
6786 .padding_bottom(1)
6787 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3);
6788 }
6789 }
6790 }
6791
6792 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC3, output_height_gt_1) {
6793 TEST_REQUIRES_X86_SSE;
6794 for (size_t input_height = 3; input_height < 5; input_height++) {
6795 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6796 DWConv2DMicrokernelTester()
6797 .input_width(input_width)
6798 .input_height(input_height)
6799 .kernel_height(3)
6800 .kernel_width(3)
6801 .subsampling(2)
6802 .padding_left(1)
6803 .padding_right(1)
6804 .padding_top(1)
6805 .padding_bottom(1)
6806 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3);
6807 }
6808 }
6809 }
6810
6811 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC3, padding_top_eq_1) {
6812 TEST_REQUIRES_X86_SSE;
6813 for (size_t input_height = 2; input_height < 8; input_height++) {
6814 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6815 DWConv2DMicrokernelTester()
6816 .input_width(input_width)
6817 .input_height(input_height)
6818 .kernel_height(3)
6819 .kernel_width(3)
6820 .subsampling(2)
6821 .padding_left(1)
6822 .padding_right(1)
6823 .padding_top(0)
6824 .padding_bottom(1)
6825 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3);
6826 }
6827 }
6828 }
6829#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6830
6831
6832#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6833 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC4, output_width_eq_4) {
6834 TEST_REQUIRES_X86_SSE;
6835 for (size_t input_width = 7; input_width < 9; input_width++) {
6836 DWConv2DMicrokernelTester()
6837 .input_width(input_width)
6838 .input_height(2)
6839 .kernel_height(3)
6840 .kernel_width(3)
6841 .subsampling(2)
6842 .padding_left(1)
6843 .padding_right(1)
6844 .padding_top(1)
6845 .padding_bottom(1)
6846 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc4);
6847 }
6848 }
6849
6850 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC4, output_width_div_4) {
6851 TEST_REQUIRES_X86_SSE;
6852 for (size_t input_width = 16; input_width < 64; input_width += 8) {
6853 DWConv2DMicrokernelTester()
6854 .input_width(input_width)
6855 .input_height(2)
6856 .kernel_height(3)
6857 .kernel_width(3)
6858 .subsampling(2)
6859 .padding_left(1)
6860 .padding_right(1)
6861 .padding_top(1)
6862 .padding_bottom(1)
6863 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc4);
6864 }
6865 }
6866
6867 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC4, output_width_lt_4) {
6868 TEST_REQUIRES_X86_SSE;
6869 for (size_t input_width = 1; input_width < 7; input_width++) {
6870 DWConv2DMicrokernelTester()
6871 .input_width(8)
6872 .input_height(2)
6873 .kernel_height(3)
6874 .kernel_width(3)
6875 .subsampling(2)
6876 .padding_left(1)
6877 .padding_right(1)
6878 .padding_top(1)
6879 .padding_bottom(1)
6880 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc4);
6881 }
6882 }
6883
6884 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC4, output_width_gt_4) {
6885 TEST_REQUIRES_X86_SSE;
6886 for (size_t input_width = 9; input_width < 17; input_width++) {
6887 DWConv2DMicrokernelTester()
6888 .input_width(input_width)
6889 .input_height(2)
6890 .kernel_height(3)
6891 .kernel_width(3)
6892 .subsampling(2)
6893 .padding_left(1)
6894 .padding_right(1)
6895 .padding_top(1)
6896 .padding_bottom(1)
6897 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc4);
6898 }
6899 }
6900
6901 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC4, output_height_eq_1) {
6902 TEST_REQUIRES_X86_SSE;
6903 for (size_t input_height = 1; input_height < 3; input_height++) {
6904 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6905 DWConv2DMicrokernelTester()
6906 .input_width(input_width)
6907 .input_height(input_height)
6908 .kernel_height(3)
6909 .kernel_width(3)
6910 .subsampling(2)
6911 .padding_left(1)
6912 .padding_right(1)
6913 .padding_top(1)
6914 .padding_bottom(1)
6915 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc4);
6916 }
6917 }
6918 }
6919
6920 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC4, output_height_gt_1) {
6921 TEST_REQUIRES_X86_SSE;
6922 for (size_t input_height = 3; input_height < 5; input_height++) {
6923 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6924 DWConv2DMicrokernelTester()
6925 .input_width(input_width)
6926 .input_height(input_height)
6927 .kernel_height(3)
6928 .kernel_width(3)
6929 .subsampling(2)
6930 .padding_left(1)
6931 .padding_right(1)
6932 .padding_top(1)
6933 .padding_bottom(1)
6934 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc4);
6935 }
6936 }
6937 }
6938
6939 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_1X4_ACC4, padding_top_eq_1) {
6940 TEST_REQUIRES_X86_SSE;
6941 for (size_t input_height = 2; input_height < 8; input_height++) {
6942 for (size_t input_width = 1; input_width < 41; input_width += 7) {
6943 DWConv2DMicrokernelTester()
6944 .input_width(input_width)
6945 .input_height(input_height)
6946 .kernel_height(3)
6947 .kernel_width(3)
6948 .subsampling(2)
6949 .padding_left(1)
6950 .padding_right(1)
6951 .padding_top(0)
6952 .padding_bottom(1)
6953 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc4);
6954 }
6955 }
6956 }
6957#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6958
6959
6960#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6961 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4_ACC2, output_width_eq_4) {
6962 TEST_REQUIRES_X86_SSE;
6963 for (size_t input_width = 7; input_width < 9; input_width++) {
6964 DWConv2DMicrokernelTester()
6965 .input_width(input_width)
6966 .input_height(4)
6967 .kernel_height(3)
6968 .kernel_width(3)
6969 .subsampling(2)
6970 .padding_left(1)
6971 .padding_right(1)
6972 .padding_top(1)
6973 .padding_bottom(1)
6974 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2);
6975 }
6976 }
6977
6978 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4_ACC2, output_width_div_4) {
6979 TEST_REQUIRES_X86_SSE;
6980 for (size_t input_width = 16; input_width < 64; input_width += 8) {
6981 DWConv2DMicrokernelTester()
6982 .input_width(input_width)
6983 .input_height(4)
6984 .kernel_height(3)
6985 .kernel_width(3)
6986 .subsampling(2)
6987 .padding_left(1)
6988 .padding_right(1)
6989 .padding_top(1)
6990 .padding_bottom(1)
6991 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2);
6992 }
6993 }
6994
6995 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4_ACC2, output_width_lt_4) {
6996 TEST_REQUIRES_X86_SSE;
6997 for (size_t input_width = 1; input_width < 7; input_width++) {
6998 DWConv2DMicrokernelTester()
6999 .input_width(8)
7000 .input_height(4)
7001 .kernel_height(3)
7002 .kernel_width(3)
7003 .subsampling(2)
7004 .padding_left(1)
7005 .padding_right(1)
7006 .padding_top(1)
7007 .padding_bottom(1)
7008 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2);
7009 }
7010 }
7011
7012 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4_ACC2, output_width_gt_4) {
7013 TEST_REQUIRES_X86_SSE;
7014 for (size_t input_width = 9; input_width < 17; input_width++) {
7015 DWConv2DMicrokernelTester()
7016 .input_width(input_width)
7017 .input_height(4)
7018 .kernel_height(3)
7019 .kernel_width(3)
7020 .subsampling(2)
7021 .padding_left(1)
7022 .padding_right(1)
7023 .padding_top(1)
7024 .padding_bottom(1)
7025 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2);
7026 }
7027 }
7028
7029 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4_ACC2, output_height_eq_2) {
7030 TEST_REQUIRES_X86_SSE;
7031 for (size_t input_height = 3; input_height < 5; input_height++) {
7032 for (size_t input_width = 1; input_width < 41; input_width += 7) {
7033 DWConv2DMicrokernelTester()
7034 .input_width(input_width)
7035 .input_height(input_height)
7036 .kernel_height(3)
7037 .kernel_width(3)
7038 .subsampling(2)
7039 .padding_left(1)
7040 .padding_right(1)
7041 .padding_top(1)
7042 .padding_bottom(1)
7043 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2);
7044 }
7045 }
7046 }
7047
7048 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4_ACC2, output_height_div_2) {
7049 TEST_REQUIRES_X86_SSE;
7050 for (size_t input_height = 8; input_height < 32; input_height += 4) {
7051 for (size_t input_width = 1; input_width < 41; input_width += 7) {
7052 DWConv2DMicrokernelTester()
7053 .input_width(input_width)
7054 .input_height(input_height)
7055 .kernel_height(3)
7056 .kernel_width(3)
7057 .subsampling(2)
7058 .padding_left(1)
7059 .padding_right(1)
7060 .padding_top(1)
7061 .padding_bottom(1)
7062 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2);
7063 }
7064 }
7065 }
7066
7067 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4_ACC2, output_height_lt_2) {
7068 TEST_REQUIRES_X86_SSE;
7069 for (size_t input_height = 1; input_height < 3; input_height++) {
7070 for (size_t input_width = 1; input_width < 41; input_width += 7) {
7071 DWConv2DMicrokernelTester()
7072 .input_width(input_width)
7073 .input_height(input_height)
7074 .kernel_height(3)
7075 .kernel_width(3)
7076 .subsampling(2)
7077 .padding_left(1)
7078 .padding_right(1)
7079 .padding_top(1)
7080 .padding_bottom(1)
7081 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2);
7082 }
7083 }
7084 }
7085
7086 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4_ACC2, output_height_gt_2) {
7087 TEST_REQUIRES_X86_SSE;
7088 for (size_t input_height = 5; input_height < 9; input_height++) {
7089 for (size_t input_width = 1; input_width < 41; input_width += 7) {
7090 DWConv2DMicrokernelTester()
7091 .input_width(input_width)
7092 .input_height(input_height)
7093 .kernel_height(3)
7094 .kernel_width(3)
7095 .subsampling(2)
7096 .padding_left(1)
7097 .padding_right(1)
7098 .padding_top(1)
7099 .padding_bottom(1)
7100 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2);
7101 }
7102 }
7103 }
7104
7105 TEST(F32_DWCONV2D_CHW_3X3S2P1__SSE_2X4_ACC2, padding_top_eq_1) {
7106 TEST_REQUIRES_X86_SSE;
7107 for (size_t input_height = 2; input_height < 14; input_height++) {
7108 for (size_t input_width = 1; input_width < 41; input_width += 7) {
7109 DWConv2DMicrokernelTester()
7110 .input_width(input_width)
7111 .input_height(input_height)
7112 .kernel_height(3)
7113 .kernel_width(3)
7114 .subsampling(2)
7115 .padding_left(1)
7116 .padding_right(1)
7117 .padding_top(0)
7118 .padding_bottom(1)
7119 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2);
7120 }
7121 }
7122 }
7123#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7124
7125
7126#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan98f2eeb2020-10-23 23:13:41 -07007127 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4, output_width_eq_4) {
7128 TEST_REQUIRES_X86_SSSE3;
7129 DWConv2DMicrokernelTester()
7130 .input_width(4)
7131 .input_height(1)
7132 .kernel_height(3)
7133 .kernel_width(3)
7134 .subsampling(1)
7135 .padding_left(1)
7136 .padding_right(1)
7137 .padding_top(1)
7138 .padding_bottom(1)
7139 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4);
7140 }
7141
7142 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4, output_width_div_4) {
7143 TEST_REQUIRES_X86_SSSE3;
7144 for (size_t input_width = 8; input_width < 32; input_width += 4) {
7145 DWConv2DMicrokernelTester()
7146 .input_width(input_width)
7147 .input_height(1)
7148 .kernel_height(3)
7149 .kernel_width(3)
7150 .subsampling(1)
7151 .padding_left(1)
7152 .padding_right(1)
7153 .padding_top(1)
7154 .padding_bottom(1)
7155 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4);
7156 }
7157 }
7158
7159 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4, output_width_lt_4) {
7160 TEST_REQUIRES_X86_SSSE3;
7161 for (size_t input_width = 1; input_width < 4; input_width++) {
7162 DWConv2DMicrokernelTester()
7163 .input_width(4)
7164 .input_height(1)
7165 .kernel_height(3)
7166 .kernel_width(3)
7167 .subsampling(1)
7168 .padding_left(1)
7169 .padding_right(1)
7170 .padding_top(1)
7171 .padding_bottom(1)
7172 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4);
7173 }
7174 }
7175
7176 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4, output_width_gt_4) {
7177 TEST_REQUIRES_X86_SSSE3;
7178 for (size_t input_width = 5; input_width < 9; input_width++) {
7179 DWConv2DMicrokernelTester()
7180 .input_width(input_width)
7181 .input_height(1)
7182 .kernel_height(3)
7183 .kernel_width(3)
7184 .subsampling(1)
7185 .padding_left(1)
7186 .padding_right(1)
7187 .padding_top(1)
7188 .padding_bottom(1)
7189 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4);
7190 }
7191 }
7192
7193 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4, output_height_gt_1) {
7194 TEST_REQUIRES_X86_SSSE3;
7195 for (size_t input_height = 2; input_height < 3; input_height++) {
7196 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7197 DWConv2DMicrokernelTester()
7198 .input_width(input_width)
7199 .input_height(input_height)
7200 .kernel_height(3)
7201 .kernel_width(3)
7202 .subsampling(1)
7203 .padding_left(1)
7204 .padding_right(1)
7205 .padding_top(1)
7206 .padding_bottom(1)
7207 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4);
7208 }
7209 }
7210 }
7211#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7212
7213
7214#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7215 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4, output_width_eq_4) {
7216 TEST_REQUIRES_X86_SSSE3;
7217 DWConv2DMicrokernelTester()
7218 .input_width(4)
7219 .input_height(2)
7220 .kernel_height(3)
7221 .kernel_width(3)
7222 .subsampling(1)
7223 .padding_left(1)
7224 .padding_right(1)
7225 .padding_top(1)
7226 .padding_bottom(1)
7227 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4);
7228 }
7229
7230 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4, output_width_div_4) {
7231 TEST_REQUIRES_X86_SSSE3;
7232 for (size_t input_width = 8; input_width < 32; input_width += 4) {
7233 DWConv2DMicrokernelTester()
7234 .input_width(input_width)
7235 .input_height(2)
7236 .kernel_height(3)
7237 .kernel_width(3)
7238 .subsampling(1)
7239 .padding_left(1)
7240 .padding_right(1)
7241 .padding_top(1)
7242 .padding_bottom(1)
7243 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4);
7244 }
7245 }
7246
7247 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4, output_width_lt_4) {
7248 TEST_REQUIRES_X86_SSSE3;
7249 for (size_t input_width = 1; input_width < 4; input_width++) {
7250 DWConv2DMicrokernelTester()
7251 .input_width(4)
7252 .input_height(2)
7253 .kernel_height(3)
7254 .kernel_width(3)
7255 .subsampling(1)
7256 .padding_left(1)
7257 .padding_right(1)
7258 .padding_top(1)
7259 .padding_bottom(1)
7260 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4);
7261 }
7262 }
7263
7264 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4, output_width_gt_4) {
7265 TEST_REQUIRES_X86_SSSE3;
7266 for (size_t input_width = 5; input_width < 9; input_width++) {
7267 DWConv2DMicrokernelTester()
7268 .input_width(input_width)
7269 .input_height(2)
7270 .kernel_height(3)
7271 .kernel_width(3)
7272 .subsampling(1)
7273 .padding_left(1)
7274 .padding_right(1)
7275 .padding_top(1)
7276 .padding_bottom(1)
7277 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4);
7278 }
7279 }
7280
7281 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4, output_height_div_2) {
7282 TEST_REQUIRES_X86_SSSE3;
7283 for (size_t input_height = 4; input_height < 16; input_height += 2) {
7284 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7285 DWConv2DMicrokernelTester()
7286 .input_width(input_width)
7287 .input_height(input_height)
7288 .kernel_height(3)
7289 .kernel_width(3)
7290 .subsampling(1)
7291 .padding_left(1)
7292 .padding_right(1)
7293 .padding_top(1)
7294 .padding_bottom(1)
7295 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4);
7296 }
7297 }
7298 }
7299
7300 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4, output_height_lt_2) {
7301 TEST_REQUIRES_X86_SSSE3;
7302 for (size_t input_height = 1; input_height < 2; input_height++) {
7303 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7304 DWConv2DMicrokernelTester()
7305 .input_width(input_width)
7306 .input_height(input_height)
7307 .kernel_height(3)
7308 .kernel_width(3)
7309 .subsampling(1)
7310 .padding_left(1)
7311 .padding_right(1)
7312 .padding_top(1)
7313 .padding_bottom(1)
7314 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4);
7315 }
7316 }
7317 }
7318
7319 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4, output_height_gt_2) {
7320 TEST_REQUIRES_X86_SSSE3;
7321 for (size_t input_height = 3; input_height < 5; input_height++) {
7322 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7323 DWConv2DMicrokernelTester()
7324 .input_width(input_width)
7325 .input_height(input_height)
7326 .kernel_height(3)
7327 .kernel_width(3)
7328 .subsampling(1)
7329 .padding_left(1)
7330 .padding_right(1)
7331 .padding_top(1)
7332 .padding_bottom(1)
7333 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4);
7334 }
7335 }
7336 }
7337#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7338
7339
7340#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7341 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_3X4, output_width_eq_4) {
7342 TEST_REQUIRES_X86_SSSE3;
7343 DWConv2DMicrokernelTester()
7344 .input_width(4)
7345 .input_height(3)
7346 .kernel_height(3)
7347 .kernel_width(3)
7348 .subsampling(1)
7349 .padding_left(1)
7350 .padding_right(1)
7351 .padding_top(1)
7352 .padding_bottom(1)
7353 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4);
7354 }
7355
7356 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_3X4, output_width_div_4) {
7357 TEST_REQUIRES_X86_SSSE3;
7358 for (size_t input_width = 8; input_width < 32; input_width += 4) {
7359 DWConv2DMicrokernelTester()
7360 .input_width(input_width)
7361 .input_height(3)
7362 .kernel_height(3)
7363 .kernel_width(3)
7364 .subsampling(1)
7365 .padding_left(1)
7366 .padding_right(1)
7367 .padding_top(1)
7368 .padding_bottom(1)
7369 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4);
7370 }
7371 }
7372
7373 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_3X4, output_width_lt_4) {
7374 TEST_REQUIRES_X86_SSSE3;
7375 for (size_t input_width = 1; input_width < 4; input_width++) {
7376 DWConv2DMicrokernelTester()
7377 .input_width(4)
7378 .input_height(3)
7379 .kernel_height(3)
7380 .kernel_width(3)
7381 .subsampling(1)
7382 .padding_left(1)
7383 .padding_right(1)
7384 .padding_top(1)
7385 .padding_bottom(1)
7386 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4);
7387 }
7388 }
7389
7390 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_3X4, output_width_gt_4) {
7391 TEST_REQUIRES_X86_SSSE3;
7392 for (size_t input_width = 5; input_width < 9; input_width++) {
7393 DWConv2DMicrokernelTester()
7394 .input_width(input_width)
7395 .input_height(3)
7396 .kernel_height(3)
7397 .kernel_width(3)
7398 .subsampling(1)
7399 .padding_left(1)
7400 .padding_right(1)
7401 .padding_top(1)
7402 .padding_bottom(1)
7403 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4);
7404 }
7405 }
7406
7407 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_3X4, output_height_div_3) {
7408 TEST_REQUIRES_X86_SSSE3;
7409 for (size_t input_height = 6; input_height < 24; input_height += 3) {
7410 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7411 DWConv2DMicrokernelTester()
7412 .input_width(input_width)
7413 .input_height(input_height)
7414 .kernel_height(3)
7415 .kernel_width(3)
7416 .subsampling(1)
7417 .padding_left(1)
7418 .padding_right(1)
7419 .padding_top(1)
7420 .padding_bottom(1)
7421 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4);
7422 }
7423 }
7424 }
7425
7426 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_3X4, output_height_lt_3) {
7427 TEST_REQUIRES_X86_SSSE3;
7428 for (size_t input_height = 1; input_height < 3; input_height++) {
7429 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7430 DWConv2DMicrokernelTester()
7431 .input_width(input_width)
7432 .input_height(input_height)
7433 .kernel_height(3)
7434 .kernel_width(3)
7435 .subsampling(1)
7436 .padding_left(1)
7437 .padding_right(1)
7438 .padding_top(1)
7439 .padding_bottom(1)
7440 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4);
7441 }
7442 }
7443 }
7444
7445 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_3X4, output_height_gt_3) {
7446 TEST_REQUIRES_X86_SSSE3;
7447 for (size_t input_height = 4; input_height < 7; input_height++) {
7448 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7449 DWConv2DMicrokernelTester()
7450 .input_width(input_width)
7451 .input_height(input_height)
7452 .kernel_height(3)
7453 .kernel_width(3)
7454 .subsampling(1)
7455 .padding_left(1)
7456 .padding_right(1)
7457 .padding_top(1)
7458 .padding_bottom(1)
7459 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4);
7460 }
7461 }
7462 }
7463#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7464
7465
7466#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7467 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_4X4, output_width_eq_4) {
7468 TEST_REQUIRES_X86_SSSE3;
7469 DWConv2DMicrokernelTester()
7470 .input_width(4)
7471 .input_height(4)
7472 .kernel_height(3)
7473 .kernel_width(3)
7474 .subsampling(1)
7475 .padding_left(1)
7476 .padding_right(1)
7477 .padding_top(1)
7478 .padding_bottom(1)
7479 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4);
7480 }
7481
7482 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_4X4, output_width_div_4) {
7483 TEST_REQUIRES_X86_SSSE3;
7484 for (size_t input_width = 8; input_width < 32; input_width += 4) {
7485 DWConv2DMicrokernelTester()
7486 .input_width(input_width)
7487 .input_height(4)
7488 .kernel_height(3)
7489 .kernel_width(3)
7490 .subsampling(1)
7491 .padding_left(1)
7492 .padding_right(1)
7493 .padding_top(1)
7494 .padding_bottom(1)
7495 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4);
7496 }
7497 }
7498
7499 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_4X4, output_width_lt_4) {
7500 TEST_REQUIRES_X86_SSSE3;
7501 for (size_t input_width = 1; input_width < 4; input_width++) {
7502 DWConv2DMicrokernelTester()
7503 .input_width(4)
7504 .input_height(4)
7505 .kernel_height(3)
7506 .kernel_width(3)
7507 .subsampling(1)
7508 .padding_left(1)
7509 .padding_right(1)
7510 .padding_top(1)
7511 .padding_bottom(1)
7512 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4);
7513 }
7514 }
7515
7516 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_4X4, output_width_gt_4) {
7517 TEST_REQUIRES_X86_SSSE3;
7518 for (size_t input_width = 5; input_width < 9; input_width++) {
7519 DWConv2DMicrokernelTester()
7520 .input_width(input_width)
7521 .input_height(4)
7522 .kernel_height(3)
7523 .kernel_width(3)
7524 .subsampling(1)
7525 .padding_left(1)
7526 .padding_right(1)
7527 .padding_top(1)
7528 .padding_bottom(1)
7529 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4);
7530 }
7531 }
7532
7533 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_4X4, output_height_div_4) {
7534 TEST_REQUIRES_X86_SSSE3;
7535 for (size_t input_height = 8; input_height < 32; input_height += 4) {
7536 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7537 DWConv2DMicrokernelTester()
7538 .input_width(input_width)
7539 .input_height(input_height)
7540 .kernel_height(3)
7541 .kernel_width(3)
7542 .subsampling(1)
7543 .padding_left(1)
7544 .padding_right(1)
7545 .padding_top(1)
7546 .padding_bottom(1)
7547 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4);
7548 }
7549 }
7550 }
7551
7552 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_4X4, output_height_lt_4) {
7553 TEST_REQUIRES_X86_SSSE3;
7554 for (size_t input_height = 1; input_height < 4; input_height++) {
7555 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7556 DWConv2DMicrokernelTester()
7557 .input_width(input_width)
7558 .input_height(input_height)
7559 .kernel_height(3)
7560 .kernel_width(3)
7561 .subsampling(1)
7562 .padding_left(1)
7563 .padding_right(1)
7564 .padding_top(1)
7565 .padding_bottom(1)
7566 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4);
7567 }
7568 }
7569 }
7570
7571 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_4X4, output_height_gt_4) {
7572 TEST_REQUIRES_X86_SSSE3;
7573 for (size_t input_height = 5; input_height < 9; input_height++) {
7574 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7575 DWConv2DMicrokernelTester()
7576 .input_width(input_width)
7577 .input_height(input_height)
7578 .kernel_height(3)
7579 .kernel_width(3)
7580 .subsampling(1)
7581 .padding_left(1)
7582 .padding_right(1)
7583 .padding_top(1)
7584 .padding_bottom(1)
7585 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4);
7586 }
7587 }
7588 }
7589#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7590
7591
7592#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7593 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_5X4, output_width_eq_4) {
7594 TEST_REQUIRES_X86_SSSE3;
7595 DWConv2DMicrokernelTester()
7596 .input_width(4)
7597 .input_height(5)
7598 .kernel_height(3)
7599 .kernel_width(3)
7600 .subsampling(1)
7601 .padding_left(1)
7602 .padding_right(1)
7603 .padding_top(1)
7604 .padding_bottom(1)
7605 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4);
7606 }
7607
7608 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_5X4, output_width_div_4) {
7609 TEST_REQUIRES_X86_SSSE3;
7610 for (size_t input_width = 8; input_width < 32; input_width += 4) {
7611 DWConv2DMicrokernelTester()
7612 .input_width(input_width)
7613 .input_height(5)
7614 .kernel_height(3)
7615 .kernel_width(3)
7616 .subsampling(1)
7617 .padding_left(1)
7618 .padding_right(1)
7619 .padding_top(1)
7620 .padding_bottom(1)
7621 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4);
7622 }
7623 }
7624
7625 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_5X4, output_width_lt_4) {
7626 TEST_REQUIRES_X86_SSSE3;
7627 for (size_t input_width = 1; input_width < 4; input_width++) {
7628 DWConv2DMicrokernelTester()
7629 .input_width(4)
7630 .input_height(5)
7631 .kernel_height(3)
7632 .kernel_width(3)
7633 .subsampling(1)
7634 .padding_left(1)
7635 .padding_right(1)
7636 .padding_top(1)
7637 .padding_bottom(1)
7638 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4);
7639 }
7640 }
7641
7642 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_5X4, output_width_gt_4) {
7643 TEST_REQUIRES_X86_SSSE3;
7644 for (size_t input_width = 5; input_width < 9; input_width++) {
7645 DWConv2DMicrokernelTester()
7646 .input_width(input_width)
7647 .input_height(5)
7648 .kernel_height(3)
7649 .kernel_width(3)
7650 .subsampling(1)
7651 .padding_left(1)
7652 .padding_right(1)
7653 .padding_top(1)
7654 .padding_bottom(1)
7655 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4);
7656 }
7657 }
7658
7659 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_5X4, output_height_div_5) {
7660 TEST_REQUIRES_X86_SSSE3;
7661 for (size_t input_height = 10; input_height < 40; input_height += 5) {
7662 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7663 DWConv2DMicrokernelTester()
7664 .input_width(input_width)
7665 .input_height(input_height)
7666 .kernel_height(3)
7667 .kernel_width(3)
7668 .subsampling(1)
7669 .padding_left(1)
7670 .padding_right(1)
7671 .padding_top(1)
7672 .padding_bottom(1)
7673 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4);
7674 }
7675 }
7676 }
7677
7678 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_5X4, output_height_lt_5) {
7679 TEST_REQUIRES_X86_SSSE3;
7680 for (size_t input_height = 1; input_height < 5; input_height++) {
7681 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7682 DWConv2DMicrokernelTester()
7683 .input_width(input_width)
7684 .input_height(input_height)
7685 .kernel_height(3)
7686 .kernel_width(3)
7687 .subsampling(1)
7688 .padding_left(1)
7689 .padding_right(1)
7690 .padding_top(1)
7691 .padding_bottom(1)
7692 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4);
7693 }
7694 }
7695 }
7696
7697 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_5X4, output_height_gt_5) {
7698 TEST_REQUIRES_X86_SSSE3;
7699 for (size_t input_height = 6; input_height < 11; input_height++) {
7700 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7701 DWConv2DMicrokernelTester()
7702 .input_width(input_width)
7703 .input_height(input_height)
7704 .kernel_height(3)
7705 .kernel_width(3)
7706 .subsampling(1)
7707 .padding_left(1)
7708 .padding_right(1)
7709 .padding_top(1)
7710 .padding_bottom(1)
7711 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4);
7712 }
7713 }
7714 }
7715#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7716
7717
7718#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7719 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_6X4, output_width_eq_4) {
7720 TEST_REQUIRES_X86_SSSE3;
7721 DWConv2DMicrokernelTester()
7722 .input_width(4)
7723 .input_height(6)
7724 .kernel_height(3)
7725 .kernel_width(3)
7726 .subsampling(1)
7727 .padding_left(1)
7728 .padding_right(1)
7729 .padding_top(1)
7730 .padding_bottom(1)
7731 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4);
7732 }
7733
7734 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_6X4, output_width_div_4) {
7735 TEST_REQUIRES_X86_SSSE3;
7736 for (size_t input_width = 8; input_width < 32; input_width += 4) {
7737 DWConv2DMicrokernelTester()
7738 .input_width(input_width)
7739 .input_height(6)
7740 .kernel_height(3)
7741 .kernel_width(3)
7742 .subsampling(1)
7743 .padding_left(1)
7744 .padding_right(1)
7745 .padding_top(1)
7746 .padding_bottom(1)
7747 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4);
7748 }
7749 }
7750
7751 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_6X4, output_width_lt_4) {
7752 TEST_REQUIRES_X86_SSSE3;
7753 for (size_t input_width = 1; input_width < 4; input_width++) {
7754 DWConv2DMicrokernelTester()
7755 .input_width(4)
7756 .input_height(6)
7757 .kernel_height(3)
7758 .kernel_width(3)
7759 .subsampling(1)
7760 .padding_left(1)
7761 .padding_right(1)
7762 .padding_top(1)
7763 .padding_bottom(1)
7764 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4);
7765 }
7766 }
7767
7768 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_6X4, output_width_gt_4) {
7769 TEST_REQUIRES_X86_SSSE3;
7770 for (size_t input_width = 5; input_width < 9; input_width++) {
7771 DWConv2DMicrokernelTester()
7772 .input_width(input_width)
7773 .input_height(6)
7774 .kernel_height(3)
7775 .kernel_width(3)
7776 .subsampling(1)
7777 .padding_left(1)
7778 .padding_right(1)
7779 .padding_top(1)
7780 .padding_bottom(1)
7781 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4);
7782 }
7783 }
7784
7785 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_6X4, output_height_div_6) {
7786 TEST_REQUIRES_X86_SSSE3;
7787 for (size_t input_height = 12; input_height < 48; input_height += 6) {
7788 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7789 DWConv2DMicrokernelTester()
7790 .input_width(input_width)
7791 .input_height(input_height)
7792 .kernel_height(3)
7793 .kernel_width(3)
7794 .subsampling(1)
7795 .padding_left(1)
7796 .padding_right(1)
7797 .padding_top(1)
7798 .padding_bottom(1)
7799 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4);
7800 }
7801 }
7802 }
7803
7804 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_6X4, output_height_lt_6) {
7805 TEST_REQUIRES_X86_SSSE3;
7806 for (size_t input_height = 1; input_height < 6; input_height++) {
7807 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7808 DWConv2DMicrokernelTester()
7809 .input_width(input_width)
7810 .input_height(input_height)
7811 .kernel_height(3)
7812 .kernel_width(3)
7813 .subsampling(1)
7814 .padding_left(1)
7815 .padding_right(1)
7816 .padding_top(1)
7817 .padding_bottom(1)
7818 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4);
7819 }
7820 }
7821 }
7822
7823 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_6X4, output_height_gt_6) {
7824 TEST_REQUIRES_X86_SSSE3;
7825 for (size_t input_height = 7; input_height < 13; input_height++) {
7826 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7827 DWConv2DMicrokernelTester()
7828 .input_width(input_width)
7829 .input_height(input_height)
7830 .kernel_height(3)
7831 .kernel_width(3)
7832 .subsampling(1)
7833 .padding_left(1)
7834 .padding_right(1)
7835 .padding_top(1)
7836 .padding_bottom(1)
7837 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4);
7838 }
7839 }
7840 }
7841#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7842
7843
7844#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7845 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC2, output_width_eq_4) {
7846 TEST_REQUIRES_X86_SSSE3;
7847 DWConv2DMicrokernelTester()
7848 .input_width(4)
7849 .input_height(1)
7850 .kernel_height(3)
7851 .kernel_width(3)
7852 .subsampling(1)
7853 .padding_left(1)
7854 .padding_right(1)
7855 .padding_top(1)
7856 .padding_bottom(1)
7857 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2);
7858 }
7859
7860 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC2, output_width_div_4) {
7861 TEST_REQUIRES_X86_SSSE3;
7862 for (size_t input_width = 8; input_width < 32; input_width += 4) {
7863 DWConv2DMicrokernelTester()
7864 .input_width(input_width)
7865 .input_height(1)
7866 .kernel_height(3)
7867 .kernel_width(3)
7868 .subsampling(1)
7869 .padding_left(1)
7870 .padding_right(1)
7871 .padding_top(1)
7872 .padding_bottom(1)
7873 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2);
7874 }
7875 }
7876
7877 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC2, output_width_lt_4) {
7878 TEST_REQUIRES_X86_SSSE3;
7879 for (size_t input_width = 1; input_width < 4; input_width++) {
7880 DWConv2DMicrokernelTester()
7881 .input_width(4)
7882 .input_height(1)
7883 .kernel_height(3)
7884 .kernel_width(3)
7885 .subsampling(1)
7886 .padding_left(1)
7887 .padding_right(1)
7888 .padding_top(1)
7889 .padding_bottom(1)
7890 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2);
7891 }
7892 }
7893
7894 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC2, output_width_gt_4) {
7895 TEST_REQUIRES_X86_SSSE3;
7896 for (size_t input_width = 5; input_width < 9; input_width++) {
7897 DWConv2DMicrokernelTester()
7898 .input_width(input_width)
7899 .input_height(1)
7900 .kernel_height(3)
7901 .kernel_width(3)
7902 .subsampling(1)
7903 .padding_left(1)
7904 .padding_right(1)
7905 .padding_top(1)
7906 .padding_bottom(1)
7907 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2);
7908 }
7909 }
7910
7911 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC2, output_height_gt_1) {
7912 TEST_REQUIRES_X86_SSSE3;
7913 for (size_t input_height = 2; input_height < 3; input_height++) {
7914 for (size_t input_width = 1; input_width < 21; input_width += 3) {
7915 DWConv2DMicrokernelTester()
7916 .input_width(input_width)
7917 .input_height(input_height)
7918 .kernel_height(3)
7919 .kernel_width(3)
7920 .subsampling(1)
7921 .padding_left(1)
7922 .padding_right(1)
7923 .padding_top(1)
7924 .padding_bottom(1)
7925 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2);
7926 }
7927 }
7928 }
7929#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7930
7931
7932#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7933 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC3, output_width_eq_4) {
7934 TEST_REQUIRES_X86_SSSE3;
7935 DWConv2DMicrokernelTester()
7936 .input_width(4)
7937 .input_height(1)
7938 .kernel_height(3)
7939 .kernel_width(3)
7940 .subsampling(1)
7941 .padding_left(1)
7942 .padding_right(1)
7943 .padding_top(1)
7944 .padding_bottom(1)
7945 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3);
7946 }
7947
7948 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC3, output_width_div_4) {
7949 TEST_REQUIRES_X86_SSSE3;
7950 for (size_t input_width = 8; input_width < 32; input_width += 4) {
7951 DWConv2DMicrokernelTester()
7952 .input_width(input_width)
7953 .input_height(1)
7954 .kernel_height(3)
7955 .kernel_width(3)
7956 .subsampling(1)
7957 .padding_left(1)
7958 .padding_right(1)
7959 .padding_top(1)
7960 .padding_bottom(1)
7961 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3);
7962 }
7963 }
7964
7965 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC3, output_width_lt_4) {
7966 TEST_REQUIRES_X86_SSSE3;
7967 for (size_t input_width = 1; input_width < 4; input_width++) {
7968 DWConv2DMicrokernelTester()
7969 .input_width(4)
7970 .input_height(1)
7971 .kernel_height(3)
7972 .kernel_width(3)
7973 .subsampling(1)
7974 .padding_left(1)
7975 .padding_right(1)
7976 .padding_top(1)
7977 .padding_bottom(1)
7978 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3);
7979 }
7980 }
7981
7982 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC3, output_width_gt_4) {
7983 TEST_REQUIRES_X86_SSSE3;
7984 for (size_t input_width = 5; input_width < 9; input_width++) {
7985 DWConv2DMicrokernelTester()
7986 .input_width(input_width)
7987 .input_height(1)
7988 .kernel_height(3)
7989 .kernel_width(3)
7990 .subsampling(1)
7991 .padding_left(1)
7992 .padding_right(1)
7993 .padding_top(1)
7994 .padding_bottom(1)
7995 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3);
7996 }
7997 }
7998
7999 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC3, output_height_gt_1) {
8000 TEST_REQUIRES_X86_SSSE3;
8001 for (size_t input_height = 2; input_height < 3; input_height++) {
8002 for (size_t input_width = 1; input_width < 21; input_width += 3) {
8003 DWConv2DMicrokernelTester()
8004 .input_width(input_width)
8005 .input_height(input_height)
8006 .kernel_height(3)
8007 .kernel_width(3)
8008 .subsampling(1)
8009 .padding_left(1)
8010 .padding_right(1)
8011 .padding_top(1)
8012 .padding_bottom(1)
8013 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3);
8014 }
8015 }
8016 }
8017#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8018
8019
8020#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8021 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC4, output_width_eq_4) {
8022 TEST_REQUIRES_X86_SSSE3;
8023 DWConv2DMicrokernelTester()
8024 .input_width(4)
8025 .input_height(1)
8026 .kernel_height(3)
8027 .kernel_width(3)
8028 .subsampling(1)
8029 .padding_left(1)
8030 .padding_right(1)
8031 .padding_top(1)
8032 .padding_bottom(1)
8033 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4);
8034 }
8035
8036 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC4, output_width_div_4) {
8037 TEST_REQUIRES_X86_SSSE3;
8038 for (size_t input_width = 8; input_width < 32; input_width += 4) {
8039 DWConv2DMicrokernelTester()
8040 .input_width(input_width)
8041 .input_height(1)
8042 .kernel_height(3)
8043 .kernel_width(3)
8044 .subsampling(1)
8045 .padding_left(1)
8046 .padding_right(1)
8047 .padding_top(1)
8048 .padding_bottom(1)
8049 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4);
8050 }
8051 }
8052
8053 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC4, output_width_lt_4) {
8054 TEST_REQUIRES_X86_SSSE3;
8055 for (size_t input_width = 1; input_width < 4; input_width++) {
8056 DWConv2DMicrokernelTester()
8057 .input_width(4)
8058 .input_height(1)
8059 .kernel_height(3)
8060 .kernel_width(3)
8061 .subsampling(1)
8062 .padding_left(1)
8063 .padding_right(1)
8064 .padding_top(1)
8065 .padding_bottom(1)
8066 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4);
8067 }
8068 }
8069
8070 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC4, output_width_gt_4) {
8071 TEST_REQUIRES_X86_SSSE3;
8072 for (size_t input_width = 5; input_width < 9; input_width++) {
8073 DWConv2DMicrokernelTester()
8074 .input_width(input_width)
8075 .input_height(1)
8076 .kernel_height(3)
8077 .kernel_width(3)
8078 .subsampling(1)
8079 .padding_left(1)
8080 .padding_right(1)
8081 .padding_top(1)
8082 .padding_bottom(1)
8083 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4);
8084 }
8085 }
8086
8087 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_1X4_ACC4, output_height_gt_1) {
8088 TEST_REQUIRES_X86_SSSE3;
8089 for (size_t input_height = 2; input_height < 3; input_height++) {
8090 for (size_t input_width = 1; input_width < 21; input_width += 3) {
8091 DWConv2DMicrokernelTester()
8092 .input_width(input_width)
8093 .input_height(input_height)
8094 .kernel_height(3)
8095 .kernel_width(3)
8096 .subsampling(1)
8097 .padding_left(1)
8098 .padding_right(1)
8099 .padding_top(1)
8100 .padding_bottom(1)
8101 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4);
8102 }
8103 }
8104 }
8105#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8106
8107
8108#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8109 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4_ACC2, output_width_eq_4) {
8110 TEST_REQUIRES_X86_SSSE3;
8111 DWConv2DMicrokernelTester()
8112 .input_width(4)
8113 .input_height(2)
8114 .kernel_height(3)
8115 .kernel_width(3)
8116 .subsampling(1)
8117 .padding_left(1)
8118 .padding_right(1)
8119 .padding_top(1)
8120 .padding_bottom(1)
8121 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2);
8122 }
8123
8124 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4_ACC2, output_width_div_4) {
8125 TEST_REQUIRES_X86_SSSE3;
8126 for (size_t input_width = 8; input_width < 32; input_width += 4) {
8127 DWConv2DMicrokernelTester()
8128 .input_width(input_width)
8129 .input_height(2)
8130 .kernel_height(3)
8131 .kernel_width(3)
8132 .subsampling(1)
8133 .padding_left(1)
8134 .padding_right(1)
8135 .padding_top(1)
8136 .padding_bottom(1)
8137 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2);
8138 }
8139 }
8140
8141 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4_ACC2, output_width_lt_4) {
8142 TEST_REQUIRES_X86_SSSE3;
8143 for (size_t input_width = 1; input_width < 4; input_width++) {
8144 DWConv2DMicrokernelTester()
8145 .input_width(4)
8146 .input_height(2)
8147 .kernel_height(3)
8148 .kernel_width(3)
8149 .subsampling(1)
8150 .padding_left(1)
8151 .padding_right(1)
8152 .padding_top(1)
8153 .padding_bottom(1)
8154 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2);
8155 }
8156 }
8157
8158 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4_ACC2, output_width_gt_4) {
8159 TEST_REQUIRES_X86_SSSE3;
8160 for (size_t input_width = 5; input_width < 9; input_width++) {
8161 DWConv2DMicrokernelTester()
8162 .input_width(input_width)
8163 .input_height(2)
8164 .kernel_height(3)
8165 .kernel_width(3)
8166 .subsampling(1)
8167 .padding_left(1)
8168 .padding_right(1)
8169 .padding_top(1)
8170 .padding_bottom(1)
8171 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2);
8172 }
8173 }
8174
8175 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4_ACC2, output_height_div_2) {
8176 TEST_REQUIRES_X86_SSSE3;
8177 for (size_t input_height = 4; input_height < 16; input_height += 2) {
8178 for (size_t input_width = 1; input_width < 21; input_width += 3) {
8179 DWConv2DMicrokernelTester()
8180 .input_width(input_width)
8181 .input_height(input_height)
8182 .kernel_height(3)
8183 .kernel_width(3)
8184 .subsampling(1)
8185 .padding_left(1)
8186 .padding_right(1)
8187 .padding_top(1)
8188 .padding_bottom(1)
8189 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2);
8190 }
8191 }
8192 }
8193
8194 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4_ACC2, output_height_lt_2) {
8195 TEST_REQUIRES_X86_SSSE3;
8196 for (size_t input_height = 1; input_height < 2; input_height++) {
8197 for (size_t input_width = 1; input_width < 21; input_width += 3) {
8198 DWConv2DMicrokernelTester()
8199 .input_width(input_width)
8200 .input_height(input_height)
8201 .kernel_height(3)
8202 .kernel_width(3)
8203 .subsampling(1)
8204 .padding_left(1)
8205 .padding_right(1)
8206 .padding_top(1)
8207 .padding_bottom(1)
8208 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2);
8209 }
8210 }
8211 }
8212
8213 TEST(F32_DWCONV2D_CHW_3X3P1__SSSE3_2X4_ACC2, output_height_gt_2) {
8214 TEST_REQUIRES_X86_SSSE3;
8215 for (size_t input_height = 3; input_height < 5; input_height++) {
8216 for (size_t input_width = 1; input_width < 21; input_width += 3) {
8217 DWConv2DMicrokernelTester()
8218 .input_width(input_width)
8219 .input_height(input_height)
8220 .kernel_height(3)
8221 .kernel_width(3)
8222 .subsampling(1)
8223 .padding_left(1)
8224 .padding_right(1)
8225 .padding_top(1)
8226 .padding_bottom(1)
8227 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2);
8228 }
8229 }
8230 }
8231#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8232
8233
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008234#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanbf715f92020-10-23 20:17:00 -07008235 TEST(F32_DWCONV2D_CHW_3X3P1__PSIMD_1X4_ACC3, output_width_eq_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008236 TEST_REQUIRES_PSIMD;
Marat Dukhanbf715f92020-10-23 20:17:00 -07008237 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008238 .input_width(4)
8239 .input_height(1)
Erich Elsen0cc2c532019-10-15 04:44:18 -07008240 .kernel_height(3)
8241 .kernel_width(3)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008242 .subsampling(1)
8243 .padding_left(1)
8244 .padding_right(1)
8245 .padding_top(1)
8246 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008247 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008248 }
8249
Marat Dukhanbf715f92020-10-23 20:17:00 -07008250 TEST(F32_DWCONV2D_CHW_3X3P1__PSIMD_1X4_ACC3, output_width_div_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008251 TEST_REQUIRES_PSIMD;
8252 for (size_t input_width = 8; input_width < 32; input_width += 4) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008253 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008254 .input_width(input_width)
8255 .input_height(1)
8256 .kernel_height(3)
8257 .kernel_width(3)
8258 .subsampling(1)
8259 .padding_left(1)
8260 .padding_right(1)
8261 .padding_top(1)
8262 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008263 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008264 }
8265 }
8266
Marat Dukhanbf715f92020-10-23 20:17:00 -07008267 TEST(F32_DWCONV2D_CHW_3X3P1__PSIMD_1X4_ACC3, output_width_lt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008268 TEST_REQUIRES_PSIMD;
8269 for (size_t input_width = 1; input_width < 4; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008270 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008271 .input_width(4)
8272 .input_height(1)
8273 .kernel_height(3)
8274 .kernel_width(3)
8275 .subsampling(1)
8276 .padding_left(1)
8277 .padding_right(1)
8278 .padding_top(1)
8279 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008280 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008281 }
8282 }
8283
Marat Dukhanbf715f92020-10-23 20:17:00 -07008284 TEST(F32_DWCONV2D_CHW_3X3P1__PSIMD_1X4_ACC3, output_width_gt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008285 TEST_REQUIRES_PSIMD;
8286 for (size_t input_width = 5; input_width < 9; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008287 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008288 .input_width(input_width)
8289 .input_height(1)
8290 .kernel_height(3)
8291 .kernel_width(3)
8292 .subsampling(1)
8293 .padding_left(1)
8294 .padding_right(1)
8295 .padding_top(1)
8296 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008297 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008298 }
8299 }
8300
Marat Dukhanbf715f92020-10-23 20:17:00 -07008301 TEST(F32_DWCONV2D_CHW_3X3P1__PSIMD_1X4_ACC3, output_height_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008302 TEST_REQUIRES_PSIMD;
8303 for (size_t input_height = 2; input_height < 3; input_height++) {
8304 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008305 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008306 .input_width(input_width)
8307 .input_height(input_height)
8308 .kernel_height(3)
8309 .kernel_width(3)
8310 .subsampling(1)
8311 .padding_left(1)
8312 .padding_right(1)
8313 .padding_top(1)
8314 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008315 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008316 }
8317 }
8318 }
8319#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
8320
8321
8322#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanbf715f92020-10-23 20:17:00 -07008323 TEST(F32_DWCONV2D_CHW_3X3S2P1__PSIMD_1X4_ACC3, output_width_eq_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008324 TEST_REQUIRES_PSIMD;
8325 for (size_t input_width = 7; input_width < 9; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008326 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008327 .input_width(input_width)
8328 .input_height(2)
8329 .kernel_height(3)
8330 .kernel_width(3)
8331 .subsampling(2)
8332 .padding_left(1)
8333 .padding_right(1)
8334 .padding_top(1)
8335 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008336 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008337 }
8338 }
8339
Marat Dukhanbf715f92020-10-23 20:17:00 -07008340 TEST(F32_DWCONV2D_CHW_3X3S2P1__PSIMD_1X4_ACC3, output_width_div_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008341 TEST_REQUIRES_PSIMD;
8342 for (size_t input_width = 16; input_width < 64; input_width += 8) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008343 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008344 .input_width(input_width)
8345 .input_height(2)
8346 .kernel_height(3)
8347 .kernel_width(3)
8348 .subsampling(2)
8349 .padding_left(1)
8350 .padding_right(1)
8351 .padding_top(1)
8352 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008353 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008354 }
8355 }
8356
Marat Dukhanbf715f92020-10-23 20:17:00 -07008357 TEST(F32_DWCONV2D_CHW_3X3S2P1__PSIMD_1X4_ACC3, output_width_lt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008358 TEST_REQUIRES_PSIMD;
8359 for (size_t input_width = 1; input_width < 7; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008360 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008361 .input_width(8)
8362 .input_height(2)
8363 .kernel_height(3)
8364 .kernel_width(3)
8365 .subsampling(2)
8366 .padding_left(1)
8367 .padding_right(1)
8368 .padding_top(1)
8369 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008370 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008371 }
8372 }
8373
Marat Dukhanbf715f92020-10-23 20:17:00 -07008374 TEST(F32_DWCONV2D_CHW_3X3S2P1__PSIMD_1X4_ACC3, output_width_gt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008375 TEST_REQUIRES_PSIMD;
8376 for (size_t input_width = 9; input_width < 17; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008377 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008378 .input_width(input_width)
8379 .input_height(2)
8380 .kernel_height(3)
8381 .kernel_width(3)
8382 .subsampling(2)
8383 .padding_left(1)
8384 .padding_right(1)
8385 .padding_top(1)
8386 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008387 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008388 }
8389 }
8390
Marat Dukhanbf715f92020-10-23 20:17:00 -07008391 TEST(F32_DWCONV2D_CHW_3X3S2P1__PSIMD_1X4_ACC3, output_height_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008392 TEST_REQUIRES_PSIMD;
8393 for (size_t input_height = 1; input_height < 3; input_height++) {
8394 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008395 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008396 .input_width(input_width)
8397 .input_height(input_height)
8398 .kernel_height(3)
8399 .kernel_width(3)
8400 .subsampling(2)
8401 .padding_left(1)
8402 .padding_right(1)
8403 .padding_top(1)
8404 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008405 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008406 }
8407 }
8408 }
8409
Marat Dukhanbf715f92020-10-23 20:17:00 -07008410 TEST(F32_DWCONV2D_CHW_3X3S2P1__PSIMD_1X4_ACC3, output_height_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008411 TEST_REQUIRES_PSIMD;
8412 for (size_t input_height = 3; input_height < 5; input_height++) {
8413 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008414 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008415 .input_width(input_width)
8416 .input_height(input_height)
8417 .kernel_height(3)
8418 .kernel_width(3)
8419 .subsampling(2)
8420 .padding_left(1)
8421 .padding_right(1)
8422 .padding_top(1)
8423 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008424 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008425 }
8426 }
8427 }
8428
Marat Dukhanbf715f92020-10-23 20:17:00 -07008429 TEST(F32_DWCONV2D_CHW_3X3S2P1__PSIMD_1X4_ACC3, padding_top_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008430 TEST_REQUIRES_PSIMD;
8431 for (size_t input_height = 2; input_height < 8; input_height++) {
8432 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008433 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008434 .input_width(input_width)
8435 .input_height(input_height)
8436 .kernel_height(3)
8437 .kernel_width(3)
8438 .subsampling(2)
8439 .padding_left(1)
8440 .padding_right(1)
8441 .padding_top(0)
8442 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008443 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__psimd_1x4_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008444 }
8445 }
8446 }
8447#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
8448
8449
8450#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanbf715f92020-10-23 20:17:00 -07008451 TEST(F32_DWCONV2D_CHW_5X5P2__PSIMD_3X4, output_width_eq_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008452 TEST_REQUIRES_PSIMD;
Marat Dukhanbf715f92020-10-23 20:17:00 -07008453 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008454 .input_width(4)
8455 .input_height(3)
8456 .kernel_height(5)
8457 .kernel_width(5)
8458 .subsampling(1)
8459 .padding_left(2)
8460 .padding_right(2)
8461 .padding_top(2)
8462 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008463 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__psimd_3x4, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008464 }
8465
Marat Dukhanbf715f92020-10-23 20:17:00 -07008466 TEST(F32_DWCONV2D_CHW_5X5P2__PSIMD_3X4, output_width_div_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008467 TEST_REQUIRES_PSIMD;
8468 for (size_t input_width = 8; input_width < 32; input_width += 4) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008469 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008470 .input_width(input_width)
8471 .input_height(3)
8472 .kernel_height(5)
8473 .kernel_width(5)
8474 .subsampling(1)
8475 .padding_left(2)
8476 .padding_right(2)
8477 .padding_top(2)
8478 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008479 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__psimd_3x4, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008480 }
8481 }
8482
Marat Dukhanbf715f92020-10-23 20:17:00 -07008483 TEST(F32_DWCONV2D_CHW_5X5P2__PSIMD_3X4, output_width_lt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008484 TEST_REQUIRES_PSIMD;
8485 for (size_t input_width = 1; input_width < 4; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008486 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008487 .input_width(4)
8488 .input_height(3)
8489 .kernel_height(5)
8490 .kernel_width(5)
8491 .subsampling(1)
8492 .padding_left(2)
8493 .padding_right(2)
8494 .padding_top(2)
8495 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008496 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__psimd_3x4, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008497 }
8498 }
8499
Marat Dukhanbf715f92020-10-23 20:17:00 -07008500 TEST(F32_DWCONV2D_CHW_5X5P2__PSIMD_3X4, output_width_gt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008501 TEST_REQUIRES_PSIMD;
8502 for (size_t input_width = 5; input_width < 9; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008503 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008504 .input_width(input_width)
8505 .input_height(3)
8506 .kernel_height(5)
8507 .kernel_width(5)
8508 .subsampling(1)
8509 .padding_left(2)
8510 .padding_right(2)
8511 .padding_top(2)
8512 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008513 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__psimd_3x4, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008514 }
8515 }
8516
Marat Dukhanbf715f92020-10-23 20:17:00 -07008517 TEST(F32_DWCONV2D_CHW_5X5P2__PSIMD_3X4, output_height_div_3) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008518 TEST_REQUIRES_PSIMD;
8519 for (size_t input_height = 6; input_height < 24; input_height += 3) {
8520 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008521 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008522 .input_width(input_width)
8523 .input_height(input_height)
8524 .kernel_height(5)
8525 .kernel_width(5)
8526 .subsampling(1)
8527 .padding_left(2)
8528 .padding_right(2)
8529 .padding_top(2)
8530 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008531 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__psimd_3x4, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008532 }
8533 }
8534 }
8535
Marat Dukhanbf715f92020-10-23 20:17:00 -07008536 TEST(F32_DWCONV2D_CHW_5X5P2__PSIMD_3X4, output_height_lt_3) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008537 TEST_REQUIRES_PSIMD;
8538 for (size_t input_height = 1; input_height < 3; input_height++) {
8539 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008540 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008541 .input_width(input_width)
8542 .input_height(input_height)
8543 .kernel_height(5)
8544 .kernel_width(5)
8545 .subsampling(1)
8546 .padding_left(2)
8547 .padding_right(2)
8548 .padding_top(2)
8549 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008550 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__psimd_3x4, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008551 }
8552 }
8553 }
8554
Marat Dukhanbf715f92020-10-23 20:17:00 -07008555 TEST(F32_DWCONV2D_CHW_5X5P2__PSIMD_3X4, output_height_gt_3) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008556 TEST_REQUIRES_PSIMD;
8557 for (size_t input_height = 4; input_height < 7; input_height++) {
8558 for (size_t input_width = 1; input_width < 21; input_width += 3) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008559 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008560 .input_width(input_width)
8561 .input_height(input_height)
8562 .kernel_height(5)
8563 .kernel_width(5)
8564 .subsampling(1)
8565 .padding_left(2)
8566 .padding_right(2)
8567 .padding_top(2)
8568 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008569 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__psimd_3x4, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008570 }
8571 }
8572 }
8573#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
8574
8575
8576#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanbf715f92020-10-23 20:17:00 -07008577 TEST(F32_DWCONV2D_CHW_5X5S2P2__PSIMD_1X4_ACC2, output_width_eq_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008578 TEST_REQUIRES_PSIMD;
8579 for (size_t input_width = 7; input_width < 9; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008580 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008581 .input_width(input_width)
8582 .input_height(2)
8583 .kernel_height(5)
8584 .kernel_width(5)
8585 .subsampling(2)
8586 .padding_left(2)
8587 .padding_right(2)
8588 .padding_top(2)
8589 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008590 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__psimd_1x4_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008591 }
8592 }
8593
Marat Dukhanbf715f92020-10-23 20:17:00 -07008594 TEST(F32_DWCONV2D_CHW_5X5S2P2__PSIMD_1X4_ACC2, output_width_div_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008595 TEST_REQUIRES_PSIMD;
8596 for (size_t input_width = 16; input_width < 64; input_width += 8) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008597 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008598 .input_width(input_width)
8599 .input_height(2)
8600 .kernel_height(5)
8601 .kernel_width(5)
8602 .subsampling(2)
8603 .padding_left(2)
8604 .padding_right(2)
8605 .padding_top(2)
8606 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008607 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__psimd_1x4_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008608 }
8609 }
8610
Marat Dukhanbf715f92020-10-23 20:17:00 -07008611 TEST(F32_DWCONV2D_CHW_5X5S2P2__PSIMD_1X4_ACC2, output_width_lt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008612 TEST_REQUIRES_PSIMD;
8613 for (size_t input_width = 1; input_width < 7; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008614 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008615 .input_width(8)
8616 .input_height(2)
8617 .kernel_height(5)
8618 .kernel_width(5)
8619 .subsampling(2)
8620 .padding_left(2)
8621 .padding_right(2)
8622 .padding_top(2)
8623 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008624 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__psimd_1x4_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008625 }
8626 }
8627
Marat Dukhanbf715f92020-10-23 20:17:00 -07008628 TEST(F32_DWCONV2D_CHW_5X5S2P2__PSIMD_1X4_ACC2, output_width_gt_4) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008629 TEST_REQUIRES_PSIMD;
8630 for (size_t input_width = 9; input_width < 17; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008631 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008632 .input_width(input_width)
8633 .input_height(2)
8634 .kernel_height(5)
8635 .kernel_width(5)
8636 .subsampling(2)
8637 .padding_left(2)
8638 .padding_right(2)
8639 .padding_top(2)
8640 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008641 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__psimd_1x4_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008642 }
8643 }
8644
Marat Dukhanbf715f92020-10-23 20:17:00 -07008645 TEST(F32_DWCONV2D_CHW_5X5S2P2__PSIMD_1X4_ACC2, output_height_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008646 TEST_REQUIRES_PSIMD;
8647 for (size_t input_height = 1; input_height < 3; input_height++) {
8648 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008649 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008650 .input_width(input_width)
8651 .input_height(input_height)
8652 .kernel_height(5)
8653 .kernel_width(5)
8654 .subsampling(2)
8655 .padding_left(2)
8656 .padding_right(2)
8657 .padding_top(2)
8658 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008659 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__psimd_1x4_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008660 }
8661 }
8662 }
8663
Marat Dukhanbf715f92020-10-23 20:17:00 -07008664 TEST(F32_DWCONV2D_CHW_5X5S2P2__PSIMD_1X4_ACC2, output_height_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008665 TEST_REQUIRES_PSIMD;
8666 for (size_t input_height = 3; input_height < 5; input_height++) {
8667 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008668 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008669 .input_width(input_width)
8670 .input_height(input_height)
8671 .kernel_height(5)
8672 .kernel_width(5)
8673 .subsampling(2)
8674 .padding_left(2)
8675 .padding_right(2)
8676 .padding_top(2)
8677 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008678 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__psimd_1x4_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008679 }
8680 }
8681 }
8682
Marat Dukhanbf715f92020-10-23 20:17:00 -07008683 TEST(F32_DWCONV2D_CHW_5X5S2P2__PSIMD_1X4_ACC2, padding_top_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008684 TEST_REQUIRES_PSIMD;
8685 for (size_t input_height = 2; input_height < 8; input_height++) {
8686 for (size_t input_width = 1; input_width < 41; input_width += 7) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07008687 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008688 .input_width(input_width)
8689 .input_height(input_height)
8690 .kernel_height(5)
8691 .kernel_width(5)
8692 .subsampling(2)
8693 .padding_left(2)
8694 .padding_right(2)
8695 .padding_top(1)
8696 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -07008697 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__psimd_1x4_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07008698 }
8699 }
8700 }
8701#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
8702
8703
Marat Dukhan91249d22020-10-24 12:02:51 -07008704TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1, output_width_eq_1) {
8705 DWConv2DMicrokernelTester()
8706 .input_width(1)
8707 .input_height(1)
8708 .kernel_height(3)
8709 .kernel_width(3)
8710 .subsampling(1)
8711 .padding_left(1)
8712 .padding_right(1)
8713 .padding_top(1)
8714 .padding_bottom(1)
8715 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
8716}
8717
8718TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1, output_width_gt_1) {
8719 for (size_t input_width = 2; input_width < 6; input_width++) {
8720 DWConv2DMicrokernelTester()
8721 .input_width(input_width)
8722 .input_height(1)
8723 .kernel_height(3)
8724 .kernel_width(3)
8725 .subsampling(1)
8726 .padding_left(1)
8727 .padding_right(1)
8728 .padding_top(1)
8729 .padding_bottom(1)
8730 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
8731 }
8732}
8733
8734TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1, output_height_gt_1) {
8735 for (size_t input_height = 2; input_height < 6; input_height++) {
8736 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8737 DWConv2DMicrokernelTester()
8738 .input_width(input_width)
8739 .input_height(input_height)
8740 .kernel_height(3)
8741 .kernel_width(3)
8742 .subsampling(1)
8743 .padding_left(1)
8744 .padding_right(1)
8745 .padding_top(1)
8746 .padding_bottom(1)
8747 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
8748 }
8749 }
8750}
8751
8752
8753TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1, output_width_eq_1) {
8754 DWConv2DMicrokernelTester()
8755 .input_width(1)
8756 .input_height(2)
8757 .kernel_height(3)
8758 .kernel_width(3)
8759 .subsampling(1)
8760 .padding_left(1)
8761 .padding_right(1)
8762 .padding_top(1)
8763 .padding_bottom(1)
8764 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
8765}
8766
8767TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1, output_width_gt_1) {
8768 for (size_t input_width = 2; input_width < 6; input_width++) {
8769 DWConv2DMicrokernelTester()
8770 .input_width(input_width)
8771 .input_height(2)
8772 .kernel_height(3)
8773 .kernel_width(3)
8774 .subsampling(1)
8775 .padding_left(1)
8776 .padding_right(1)
8777 .padding_top(1)
8778 .padding_bottom(1)
8779 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
8780 }
8781}
8782
8783TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1, output_height_div_2) {
8784 for (size_t input_height = 4; input_height < 16; input_height += 2) {
8785 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8786 DWConv2DMicrokernelTester()
8787 .input_width(input_width)
8788 .input_height(input_height)
8789 .kernel_height(3)
8790 .kernel_width(3)
8791 .subsampling(1)
8792 .padding_left(1)
8793 .padding_right(1)
8794 .padding_top(1)
8795 .padding_bottom(1)
8796 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
8797 }
8798 }
8799}
8800
8801TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1, output_height_lt_2) {
8802 for (size_t input_height = 1; input_height < 2; input_height++) {
8803 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8804 DWConv2DMicrokernelTester()
8805 .input_width(input_width)
8806 .input_height(input_height)
8807 .kernel_height(3)
8808 .kernel_width(3)
8809 .subsampling(1)
8810 .padding_left(1)
8811 .padding_right(1)
8812 .padding_top(1)
8813 .padding_bottom(1)
8814 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
8815 }
8816 }
8817}
8818
8819TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1, output_height_gt_2) {
8820 for (size_t input_height = 3; input_height < 11; input_height++) {
8821 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8822 DWConv2DMicrokernelTester()
8823 .input_width(input_width)
8824 .input_height(input_height)
8825 .kernel_height(3)
8826 .kernel_width(3)
8827 .subsampling(1)
8828 .padding_left(1)
8829 .padding_right(1)
8830 .padding_top(1)
8831 .padding_bottom(1)
8832 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
8833 }
8834 }
8835}
8836
8837
8838TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_3X1, output_width_eq_1) {
8839 DWConv2DMicrokernelTester()
8840 .input_width(1)
8841 .input_height(3)
8842 .kernel_height(3)
8843 .kernel_width(3)
8844 .subsampling(1)
8845 .padding_left(1)
8846 .padding_right(1)
8847 .padding_top(1)
8848 .padding_bottom(1)
8849 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
8850}
8851
8852TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_3X1, output_width_gt_1) {
8853 for (size_t input_width = 2; input_width < 6; input_width++) {
8854 DWConv2DMicrokernelTester()
8855 .input_width(input_width)
8856 .input_height(3)
8857 .kernel_height(3)
8858 .kernel_width(3)
8859 .subsampling(1)
8860 .padding_left(1)
8861 .padding_right(1)
8862 .padding_top(1)
8863 .padding_bottom(1)
8864 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
8865 }
8866}
8867
8868TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_3X1, output_height_div_3) {
8869 for (size_t input_height = 6; input_height < 24; input_height += 3) {
8870 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8871 DWConv2DMicrokernelTester()
8872 .input_width(input_width)
8873 .input_height(input_height)
8874 .kernel_height(3)
8875 .kernel_width(3)
8876 .subsampling(1)
8877 .padding_left(1)
8878 .padding_right(1)
8879 .padding_top(1)
8880 .padding_bottom(1)
8881 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
8882 }
8883 }
8884}
8885
8886TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_3X1, output_height_lt_3) {
8887 for (size_t input_height = 1; input_height < 3; input_height++) {
8888 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8889 DWConv2DMicrokernelTester()
8890 .input_width(input_width)
8891 .input_height(input_height)
8892 .kernel_height(3)
8893 .kernel_width(3)
8894 .subsampling(1)
8895 .padding_left(1)
8896 .padding_right(1)
8897 .padding_top(1)
8898 .padding_bottom(1)
8899 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
8900 }
8901 }
8902}
8903
8904TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_3X1, output_height_gt_3) {
8905 for (size_t input_height = 4; input_height < 16; input_height++) {
8906 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8907 DWConv2DMicrokernelTester()
8908 .input_width(input_width)
8909 .input_height(input_height)
8910 .kernel_height(3)
8911 .kernel_width(3)
8912 .subsampling(1)
8913 .padding_left(1)
8914 .padding_right(1)
8915 .padding_top(1)
8916 .padding_bottom(1)
8917 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
8918 }
8919 }
8920}
8921
8922
8923TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_4X1, output_width_eq_1) {
8924 DWConv2DMicrokernelTester()
8925 .input_width(1)
8926 .input_height(4)
8927 .kernel_height(3)
8928 .kernel_width(3)
8929 .subsampling(1)
8930 .padding_left(1)
8931 .padding_right(1)
8932 .padding_top(1)
8933 .padding_bottom(1)
8934 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
8935}
8936
8937TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_4X1, output_width_gt_1) {
8938 for (size_t input_width = 2; input_width < 6; input_width++) {
8939 DWConv2DMicrokernelTester()
8940 .input_width(input_width)
8941 .input_height(4)
8942 .kernel_height(3)
8943 .kernel_width(3)
8944 .subsampling(1)
8945 .padding_left(1)
8946 .padding_right(1)
8947 .padding_top(1)
8948 .padding_bottom(1)
8949 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
8950 }
8951}
8952
8953TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_4X1, output_height_div_4) {
8954 for (size_t input_height = 8; input_height < 32; input_height += 4) {
8955 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8956 DWConv2DMicrokernelTester()
8957 .input_width(input_width)
8958 .input_height(input_height)
8959 .kernel_height(3)
8960 .kernel_width(3)
8961 .subsampling(1)
8962 .padding_left(1)
8963 .padding_right(1)
8964 .padding_top(1)
8965 .padding_bottom(1)
8966 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
8967 }
8968 }
8969}
8970
8971TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_4X1, output_height_lt_4) {
8972 for (size_t input_height = 1; input_height < 4; input_height++) {
8973 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8974 DWConv2DMicrokernelTester()
8975 .input_width(input_width)
8976 .input_height(input_height)
8977 .kernel_height(3)
8978 .kernel_width(3)
8979 .subsampling(1)
8980 .padding_left(1)
8981 .padding_right(1)
8982 .padding_top(1)
8983 .padding_bottom(1)
8984 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
8985 }
8986 }
8987}
8988
8989TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_4X1, output_height_gt_4) {
8990 for (size_t input_height = 5; input_height < 21; input_height++) {
8991 for (size_t input_width = 1; input_width < 6; input_width += 1) {
8992 DWConv2DMicrokernelTester()
8993 .input_width(input_width)
8994 .input_height(input_height)
8995 .kernel_height(3)
8996 .kernel_width(3)
8997 .subsampling(1)
8998 .padding_left(1)
8999 .padding_right(1)
9000 .padding_top(1)
9001 .padding_bottom(1)
9002 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
9003 }
9004 }
9005}
9006
9007
9008TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_5X1, output_width_eq_1) {
9009 DWConv2DMicrokernelTester()
9010 .input_width(1)
9011 .input_height(5)
9012 .kernel_height(3)
9013 .kernel_width(3)
9014 .subsampling(1)
9015 .padding_left(1)
9016 .padding_right(1)
9017 .padding_top(1)
9018 .padding_bottom(1)
9019 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1, DWConv2DMicrokernelTester::Variant::Scalar);
9020}
9021
9022TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_5X1, output_width_gt_1) {
9023 for (size_t input_width = 2; input_width < 6; input_width++) {
9024 DWConv2DMicrokernelTester()
9025 .input_width(input_width)
9026 .input_height(5)
9027 .kernel_height(3)
9028 .kernel_width(3)
9029 .subsampling(1)
9030 .padding_left(1)
9031 .padding_right(1)
9032 .padding_top(1)
9033 .padding_bottom(1)
9034 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1, DWConv2DMicrokernelTester::Variant::Scalar);
9035 }
9036}
9037
9038TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_5X1, output_height_div_5) {
9039 for (size_t input_height = 10; input_height < 40; input_height += 5) {
9040 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9041 DWConv2DMicrokernelTester()
9042 .input_width(input_width)
9043 .input_height(input_height)
9044 .kernel_height(3)
9045 .kernel_width(3)
9046 .subsampling(1)
9047 .padding_left(1)
9048 .padding_right(1)
9049 .padding_top(1)
9050 .padding_bottom(1)
9051 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1, DWConv2DMicrokernelTester::Variant::Scalar);
9052 }
9053 }
9054}
9055
9056TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_5X1, output_height_lt_5) {
9057 for (size_t input_height = 1; input_height < 5; input_height++) {
9058 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9059 DWConv2DMicrokernelTester()
9060 .input_width(input_width)
9061 .input_height(input_height)
9062 .kernel_height(3)
9063 .kernel_width(3)
9064 .subsampling(1)
9065 .padding_left(1)
9066 .padding_right(1)
9067 .padding_top(1)
9068 .padding_bottom(1)
9069 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1, DWConv2DMicrokernelTester::Variant::Scalar);
9070 }
9071 }
9072}
9073
9074TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_5X1, output_height_gt_5) {
9075 for (size_t input_height = 6; input_height < 26; input_height++) {
9076 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9077 DWConv2DMicrokernelTester()
9078 .input_width(input_width)
9079 .input_height(input_height)
9080 .kernel_height(3)
9081 .kernel_width(3)
9082 .subsampling(1)
9083 .padding_left(1)
9084 .padding_right(1)
9085 .padding_top(1)
9086 .padding_bottom(1)
9087 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1, DWConv2DMicrokernelTester::Variant::Scalar);
9088 }
9089 }
9090}
9091
9092
9093TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_6X1, output_width_eq_1) {
9094 DWConv2DMicrokernelTester()
9095 .input_width(1)
9096 .input_height(6)
9097 .kernel_height(3)
9098 .kernel_width(3)
9099 .subsampling(1)
9100 .padding_left(1)
9101 .padding_right(1)
9102 .padding_top(1)
9103 .padding_bottom(1)
9104 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1, DWConv2DMicrokernelTester::Variant::Scalar);
9105}
9106
9107TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_6X1, output_width_gt_1) {
9108 for (size_t input_width = 2; input_width < 6; input_width++) {
9109 DWConv2DMicrokernelTester()
9110 .input_width(input_width)
9111 .input_height(6)
9112 .kernel_height(3)
9113 .kernel_width(3)
9114 .subsampling(1)
9115 .padding_left(1)
9116 .padding_right(1)
9117 .padding_top(1)
9118 .padding_bottom(1)
9119 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1, DWConv2DMicrokernelTester::Variant::Scalar);
9120 }
9121}
9122
9123TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_6X1, output_height_div_6) {
9124 for (size_t input_height = 12; input_height < 48; input_height += 6) {
9125 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9126 DWConv2DMicrokernelTester()
9127 .input_width(input_width)
9128 .input_height(input_height)
9129 .kernel_height(3)
9130 .kernel_width(3)
9131 .subsampling(1)
9132 .padding_left(1)
9133 .padding_right(1)
9134 .padding_top(1)
9135 .padding_bottom(1)
9136 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1, DWConv2DMicrokernelTester::Variant::Scalar);
9137 }
9138 }
9139}
9140
9141TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_6X1, output_height_lt_6) {
9142 for (size_t input_height = 1; input_height < 6; input_height++) {
9143 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9144 DWConv2DMicrokernelTester()
9145 .input_width(input_width)
9146 .input_height(input_height)
9147 .kernel_height(3)
9148 .kernel_width(3)
9149 .subsampling(1)
9150 .padding_left(1)
9151 .padding_right(1)
9152 .padding_top(1)
9153 .padding_bottom(1)
9154 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1, DWConv2DMicrokernelTester::Variant::Scalar);
9155 }
9156 }
9157}
9158
9159TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_6X1, output_height_gt_6) {
9160 for (size_t input_height = 7; input_height < 31; input_height++) {
9161 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9162 DWConv2DMicrokernelTester()
9163 .input_width(input_width)
9164 .input_height(input_height)
9165 .kernel_height(3)
9166 .kernel_width(3)
9167 .subsampling(1)
9168 .padding_left(1)
9169 .padding_right(1)
9170 .padding_top(1)
9171 .padding_bottom(1)
9172 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1, DWConv2DMicrokernelTester::Variant::Scalar);
9173 }
9174 }
9175}
9176
9177
9178TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1_ACC2, output_width_eq_1) {
9179 DWConv2DMicrokernelTester()
9180 .input_width(1)
9181 .input_height(1)
9182 .kernel_height(3)
9183 .kernel_width(3)
9184 .subsampling(1)
9185 .padding_left(1)
9186 .padding_right(1)
9187 .padding_top(1)
9188 .padding_bottom(1)
9189 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9190}
9191
9192TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1_ACC2, output_width_gt_1) {
9193 for (size_t input_width = 2; input_width < 6; input_width++) {
9194 DWConv2DMicrokernelTester()
9195 .input_width(input_width)
9196 .input_height(1)
9197 .kernel_height(3)
9198 .kernel_width(3)
9199 .subsampling(1)
9200 .padding_left(1)
9201 .padding_right(1)
9202 .padding_top(1)
9203 .padding_bottom(1)
9204 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9205 }
9206}
9207
9208TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1_ACC2, output_height_gt_1) {
9209 for (size_t input_height = 2; input_height < 6; input_height++) {
9210 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9211 DWConv2DMicrokernelTester()
9212 .input_width(input_width)
9213 .input_height(input_height)
9214 .kernel_height(3)
9215 .kernel_width(3)
9216 .subsampling(1)
9217 .padding_left(1)
9218 .padding_right(1)
9219 .padding_top(1)
9220 .padding_bottom(1)
9221 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9222 }
9223 }
9224}
9225
9226
Marat Dukhanbf715f92020-10-23 20:17:00 -07009227TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1_ACC3, output_width_eq_1) {
9228 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009229 .input_width(1)
9230 .input_height(1)
9231 .kernel_height(3)
9232 .kernel_width(3)
9233 .subsampling(1)
9234 .padding_left(1)
9235 .padding_right(1)
9236 .padding_top(1)
9237 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07009238 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009239}
9240
Marat Dukhanbf715f92020-10-23 20:17:00 -07009241TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1_ACC3, output_width_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009242 for (size_t input_width = 2; input_width < 6; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07009243 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009244 .input_width(input_width)
9245 .input_height(1)
9246 .kernel_height(3)
9247 .kernel_width(3)
9248 .subsampling(1)
9249 .padding_left(1)
9250 .padding_right(1)
9251 .padding_top(1)
9252 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07009253 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen0cc2c532019-10-15 04:44:18 -07009254 }
9255}
9256
Marat Dukhanbf715f92020-10-23 20:17:00 -07009257TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1_ACC3, output_height_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009258 for (size_t input_height = 2; input_height < 6; input_height++) {
9259 for (size_t input_width = 1; input_width < 6; input_width += 1) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07009260 DWConv2DMicrokernelTester()
Erich Elsen0cc2c532019-10-15 04:44:18 -07009261 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009262 .input_height(input_height)
9263 .kernel_height(3)
9264 .kernel_width(3)
9265 .subsampling(1)
Erich Elsen0cc2c532019-10-15 04:44:18 -07009266 .padding_left(1)
9267 .padding_right(1)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07009268 .padding_top(1)
9269 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07009270 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen0cc2c532019-10-15 04:44:18 -07009271 }
9272 }
9273}
9274
Erich Elsenac4de802019-10-16 04:35:30 -07009275
Marat Dukhan91249d22020-10-24 12:02:51 -07009276TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1_ACC4, output_width_eq_1) {
9277 DWConv2DMicrokernelTester()
9278 .input_width(1)
9279 .input_height(1)
9280 .kernel_height(3)
9281 .kernel_width(3)
9282 .subsampling(1)
9283 .padding_left(1)
9284 .padding_right(1)
9285 .padding_top(1)
9286 .padding_bottom(1)
9287 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
9288}
9289
9290TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1_ACC4, output_width_gt_1) {
9291 for (size_t input_width = 2; input_width < 6; input_width++) {
9292 DWConv2DMicrokernelTester()
9293 .input_width(input_width)
9294 .input_height(1)
9295 .kernel_height(3)
9296 .kernel_width(3)
9297 .subsampling(1)
9298 .padding_left(1)
9299 .padding_right(1)
9300 .padding_top(1)
9301 .padding_bottom(1)
9302 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
9303 }
9304}
9305
9306TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_1X1_ACC4, output_height_gt_1) {
9307 for (size_t input_height = 2; input_height < 6; input_height++) {
9308 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9309 DWConv2DMicrokernelTester()
9310 .input_width(input_width)
9311 .input_height(input_height)
9312 .kernel_height(3)
9313 .kernel_width(3)
9314 .subsampling(1)
9315 .padding_left(1)
9316 .padding_right(1)
9317 .padding_top(1)
9318 .padding_bottom(1)
9319 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
9320 }
9321 }
9322}
9323
9324
9325TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1_ACC2, output_width_eq_1) {
9326 DWConv2DMicrokernelTester()
9327 .input_width(1)
9328 .input_height(2)
9329 .kernel_height(3)
9330 .kernel_width(3)
9331 .subsampling(1)
9332 .padding_left(1)
9333 .padding_right(1)
9334 .padding_top(1)
9335 .padding_bottom(1)
9336 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9337}
9338
9339TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1_ACC2, output_width_gt_1) {
9340 for (size_t input_width = 2; input_width < 6; input_width++) {
9341 DWConv2DMicrokernelTester()
9342 .input_width(input_width)
9343 .input_height(2)
9344 .kernel_height(3)
9345 .kernel_width(3)
9346 .subsampling(1)
9347 .padding_left(1)
9348 .padding_right(1)
9349 .padding_top(1)
9350 .padding_bottom(1)
9351 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9352 }
9353}
9354
9355TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1_ACC2, output_height_div_2) {
9356 for (size_t input_height = 4; input_height < 16; input_height += 2) {
9357 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9358 DWConv2DMicrokernelTester()
9359 .input_width(input_width)
9360 .input_height(input_height)
9361 .kernel_height(3)
9362 .kernel_width(3)
9363 .subsampling(1)
9364 .padding_left(1)
9365 .padding_right(1)
9366 .padding_top(1)
9367 .padding_bottom(1)
9368 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9369 }
9370 }
9371}
9372
9373TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1_ACC2, output_height_lt_2) {
9374 for (size_t input_height = 1; input_height < 2; input_height++) {
9375 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9376 DWConv2DMicrokernelTester()
9377 .input_width(input_width)
9378 .input_height(input_height)
9379 .kernel_height(3)
9380 .kernel_width(3)
9381 .subsampling(1)
9382 .padding_left(1)
9383 .padding_right(1)
9384 .padding_top(1)
9385 .padding_bottom(1)
9386 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9387 }
9388 }
9389}
9390
9391TEST(F32_DWCONV2D_CHW_3X3P1__SCALAR_2X1_ACC2, output_height_gt_2) {
9392 for (size_t input_height = 3; input_height < 11; input_height++) {
9393 for (size_t input_width = 1; input_width < 6; input_width += 1) {
9394 DWConv2DMicrokernelTester()
9395 .input_width(input_width)
9396 .input_height(input_height)
9397 .kernel_height(3)
9398 .kernel_width(3)
9399 .subsampling(1)
9400 .padding_left(1)
9401 .padding_right(1)
9402 .padding_top(1)
9403 .padding_bottom(1)
9404 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9405 }
9406 }
9407}
9408
9409
Marat Dukhancf5b3c32020-10-25 19:21:10 -07009410TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1, output_width_eq_1) {
9411 for (size_t input_width = 1; input_width < 3; input_width++) {
9412 DWConv2DMicrokernelTester()
9413 .input_width(input_width)
9414 .input_height(2)
9415 .kernel_height(3)
9416 .kernel_width(3)
9417 .subsampling(2)
9418 .padding_left(1)
9419 .padding_right(1)
9420 .padding_top(1)
9421 .padding_bottom(1)
9422 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
9423 }
9424}
9425
9426TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1, output_width_gt_1) {
9427 for (size_t input_width = 3; input_width < 11; input_width++) {
9428 DWConv2DMicrokernelTester()
9429 .input_width(input_width)
9430 .input_height(2)
9431 .kernel_height(3)
9432 .kernel_width(3)
9433 .subsampling(2)
9434 .padding_left(1)
9435 .padding_right(1)
9436 .padding_top(1)
9437 .padding_bottom(1)
9438 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
9439 }
9440}
9441
9442TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1, output_height_eq_1) {
9443 for (size_t input_height = 1; input_height < 3; input_height++) {
9444 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9445 DWConv2DMicrokernelTester()
9446 .input_width(input_width)
9447 .input_height(input_height)
9448 .kernel_height(3)
9449 .kernel_width(3)
9450 .subsampling(2)
9451 .padding_left(1)
9452 .padding_right(1)
9453 .padding_top(1)
9454 .padding_bottom(1)
9455 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
9456 }
9457 }
9458}
9459
9460TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1, output_height_gt_1) {
9461 for (size_t input_height = 3; input_height < 11; input_height++) {
9462 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9463 DWConv2DMicrokernelTester()
9464 .input_width(input_width)
9465 .input_height(input_height)
9466 .kernel_height(3)
9467 .kernel_width(3)
9468 .subsampling(2)
9469 .padding_left(1)
9470 .padding_right(1)
9471 .padding_top(1)
9472 .padding_bottom(1)
9473 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
9474 }
9475 }
9476}
9477
9478TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1, padding_top_eq_1) {
9479 for (size_t input_height = 2; input_height < 8; input_height++) {
9480 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9481 DWConv2DMicrokernelTester()
9482 .input_width(input_width)
9483 .input_height(input_height)
9484 .kernel_height(3)
9485 .kernel_width(3)
9486 .subsampling(2)
9487 .padding_left(1)
9488 .padding_right(1)
9489 .padding_top(0)
9490 .padding_bottom(1)
9491 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
9492 }
9493 }
9494}
9495
9496TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1, output_width_eq_1) {
9497 for (size_t input_width = 1; input_width < 3; input_width++) {
9498 DWConv2DMicrokernelTester()
9499 .input_width(input_width)
9500 .input_height(4)
9501 .kernel_height(3)
9502 .kernel_width(3)
9503 .subsampling(2)
9504 .padding_left(1)
9505 .padding_right(1)
9506 .padding_top(1)
9507 .padding_bottom(1)
9508 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
9509 }
9510}
9511
9512TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1, output_width_gt_1) {
9513 for (size_t input_width = 3; input_width < 11; input_width++) {
9514 DWConv2DMicrokernelTester()
9515 .input_width(input_width)
9516 .input_height(4)
9517 .kernel_height(3)
9518 .kernel_width(3)
9519 .subsampling(2)
9520 .padding_left(1)
9521 .padding_right(1)
9522 .padding_top(1)
9523 .padding_bottom(1)
9524 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
9525 }
9526}
9527
9528TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1, output_height_eq_2) {
9529 for (size_t input_height = 3; input_height < 5; input_height++) {
9530 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9531 DWConv2DMicrokernelTester()
9532 .input_width(input_width)
9533 .input_height(input_height)
9534 .kernel_height(3)
9535 .kernel_width(3)
9536 .subsampling(2)
9537 .padding_left(1)
9538 .padding_right(1)
9539 .padding_top(1)
9540 .padding_bottom(1)
9541 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
9542 }
9543 }
9544}
9545
9546TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1, output_height_div_2) {
9547 for (size_t input_height = 8; input_height < 32; input_height += 4) {
9548 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9549 DWConv2DMicrokernelTester()
9550 .input_width(input_width)
9551 .input_height(input_height)
9552 .kernel_height(3)
9553 .kernel_width(3)
9554 .subsampling(2)
9555 .padding_left(1)
9556 .padding_right(1)
9557 .padding_top(1)
9558 .padding_bottom(1)
9559 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
9560 }
9561 }
9562}
9563
9564TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1, output_height_lt_2) {
9565 for (size_t input_height = 1; input_height < 3; input_height++) {
9566 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9567 DWConv2DMicrokernelTester()
9568 .input_width(input_width)
9569 .input_height(input_height)
9570 .kernel_height(3)
9571 .kernel_width(3)
9572 .subsampling(2)
9573 .padding_left(1)
9574 .padding_right(1)
9575 .padding_top(1)
9576 .padding_bottom(1)
9577 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
9578 }
9579 }
9580}
9581
9582TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1, output_height_gt_2) {
9583 for (size_t input_height = 5; input_height < 21; input_height++) {
9584 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9585 DWConv2DMicrokernelTester()
9586 .input_width(input_width)
9587 .input_height(input_height)
9588 .kernel_height(3)
9589 .kernel_width(3)
9590 .subsampling(2)
9591 .padding_left(1)
9592 .padding_right(1)
9593 .padding_top(1)
9594 .padding_bottom(1)
9595 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
9596 }
9597 }
9598}
9599
9600TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1, padding_top_eq_1) {
9601 for (size_t input_height = 2; input_height < 14; input_height++) {
9602 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9603 DWConv2DMicrokernelTester()
9604 .input_width(input_width)
9605 .input_height(input_height)
9606 .kernel_height(3)
9607 .kernel_width(3)
9608 .subsampling(2)
9609 .padding_left(1)
9610 .padding_right(1)
9611 .padding_top(0)
9612 .padding_bottom(1)
9613 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
9614 }
9615 }
9616}
9617
9618TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_3X1, output_width_eq_1) {
9619 for (size_t input_width = 1; input_width < 3; input_width++) {
9620 DWConv2DMicrokernelTester()
9621 .input_width(input_width)
9622 .input_height(6)
9623 .kernel_height(3)
9624 .kernel_width(3)
9625 .subsampling(2)
9626 .padding_left(1)
9627 .padding_right(1)
9628 .padding_top(1)
9629 .padding_bottom(1)
9630 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
9631 }
9632}
9633
9634TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_3X1, output_width_gt_1) {
9635 for (size_t input_width = 3; input_width < 11; input_width++) {
9636 DWConv2DMicrokernelTester()
9637 .input_width(input_width)
9638 .input_height(6)
9639 .kernel_height(3)
9640 .kernel_width(3)
9641 .subsampling(2)
9642 .padding_left(1)
9643 .padding_right(1)
9644 .padding_top(1)
9645 .padding_bottom(1)
9646 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
9647 }
9648}
9649
9650TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_3X1, output_height_eq_3) {
9651 for (size_t input_height = 5; input_height < 7; input_height++) {
9652 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9653 DWConv2DMicrokernelTester()
9654 .input_width(input_width)
9655 .input_height(input_height)
9656 .kernel_height(3)
9657 .kernel_width(3)
9658 .subsampling(2)
9659 .padding_left(1)
9660 .padding_right(1)
9661 .padding_top(1)
9662 .padding_bottom(1)
9663 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
9664 }
9665 }
9666}
9667
9668TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_3X1, output_height_div_3) {
9669 for (size_t input_height = 12; input_height < 48; input_height += 6) {
9670 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9671 DWConv2DMicrokernelTester()
9672 .input_width(input_width)
9673 .input_height(input_height)
9674 .kernel_height(3)
9675 .kernel_width(3)
9676 .subsampling(2)
9677 .padding_left(1)
9678 .padding_right(1)
9679 .padding_top(1)
9680 .padding_bottom(1)
9681 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
9682 }
9683 }
9684}
9685
9686TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_3X1, output_height_lt_3) {
9687 for (size_t input_height = 1; input_height < 5; input_height++) {
9688 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9689 DWConv2DMicrokernelTester()
9690 .input_width(input_width)
9691 .input_height(input_height)
9692 .kernel_height(3)
9693 .kernel_width(3)
9694 .subsampling(2)
9695 .padding_left(1)
9696 .padding_right(1)
9697 .padding_top(1)
9698 .padding_bottom(1)
9699 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
9700 }
9701 }
9702}
9703
9704TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_3X1, output_height_gt_3) {
9705 for (size_t input_height = 7; input_height < 31; input_height++) {
9706 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9707 DWConv2DMicrokernelTester()
9708 .input_width(input_width)
9709 .input_height(input_height)
9710 .kernel_height(3)
9711 .kernel_width(3)
9712 .subsampling(2)
9713 .padding_left(1)
9714 .padding_right(1)
9715 .padding_top(1)
9716 .padding_bottom(1)
9717 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
9718 }
9719 }
9720}
9721
9722TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_3X1, padding_top_eq_1) {
9723 for (size_t input_height = 2; input_height < 20; input_height++) {
9724 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9725 DWConv2DMicrokernelTester()
9726 .input_width(input_width)
9727 .input_height(input_height)
9728 .kernel_height(3)
9729 .kernel_width(3)
9730 .subsampling(2)
9731 .padding_left(1)
9732 .padding_right(1)
9733 .padding_top(0)
9734 .padding_bottom(1)
9735 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
9736 }
9737 }
9738}
9739
9740TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_4X1, output_width_eq_1) {
9741 for (size_t input_width = 1; input_width < 3; input_width++) {
9742 DWConv2DMicrokernelTester()
9743 .input_width(input_width)
9744 .input_height(8)
9745 .kernel_height(3)
9746 .kernel_width(3)
9747 .subsampling(2)
9748 .padding_left(1)
9749 .padding_right(1)
9750 .padding_top(1)
9751 .padding_bottom(1)
9752 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
9753 }
9754}
9755
9756TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_4X1, output_width_gt_1) {
9757 for (size_t input_width = 3; input_width < 11; input_width++) {
9758 DWConv2DMicrokernelTester()
9759 .input_width(input_width)
9760 .input_height(8)
9761 .kernel_height(3)
9762 .kernel_width(3)
9763 .subsampling(2)
9764 .padding_left(1)
9765 .padding_right(1)
9766 .padding_top(1)
9767 .padding_bottom(1)
9768 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
9769 }
9770}
9771
9772TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_4X1, output_height_eq_4) {
9773 for (size_t input_height = 7; input_height < 9; input_height++) {
9774 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9775 DWConv2DMicrokernelTester()
9776 .input_width(input_width)
9777 .input_height(input_height)
9778 .kernel_height(3)
9779 .kernel_width(3)
9780 .subsampling(2)
9781 .padding_left(1)
9782 .padding_right(1)
9783 .padding_top(1)
9784 .padding_bottom(1)
9785 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
9786 }
9787 }
9788}
9789
9790TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_4X1, output_height_div_4) {
9791 for (size_t input_height = 16; input_height < 64; input_height += 8) {
9792 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9793 DWConv2DMicrokernelTester()
9794 .input_width(input_width)
9795 .input_height(input_height)
9796 .kernel_height(3)
9797 .kernel_width(3)
9798 .subsampling(2)
9799 .padding_left(1)
9800 .padding_right(1)
9801 .padding_top(1)
9802 .padding_bottom(1)
9803 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
9804 }
9805 }
9806}
9807
9808TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_4X1, output_height_lt_4) {
9809 for (size_t input_height = 1; input_height < 7; input_height++) {
9810 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9811 DWConv2DMicrokernelTester()
9812 .input_width(input_width)
9813 .input_height(input_height)
9814 .kernel_height(3)
9815 .kernel_width(3)
9816 .subsampling(2)
9817 .padding_left(1)
9818 .padding_right(1)
9819 .padding_top(1)
9820 .padding_bottom(1)
9821 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
9822 }
9823 }
9824}
9825
9826TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_4X1, output_height_gt_4) {
9827 for (size_t input_height = 9; input_height < 41; input_height++) {
9828 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9829 DWConv2DMicrokernelTester()
9830 .input_width(input_width)
9831 .input_height(input_height)
9832 .kernel_height(3)
9833 .kernel_width(3)
9834 .subsampling(2)
9835 .padding_left(1)
9836 .padding_right(1)
9837 .padding_top(1)
9838 .padding_bottom(1)
9839 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
9840 }
9841 }
9842}
9843
9844TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_4X1, padding_top_eq_1) {
9845 for (size_t input_height = 2; input_height < 26; input_height++) {
9846 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9847 DWConv2DMicrokernelTester()
9848 .input_width(input_width)
9849 .input_height(input_height)
9850 .kernel_height(3)
9851 .kernel_width(3)
9852 .subsampling(2)
9853 .padding_left(1)
9854 .padding_right(1)
9855 .padding_top(0)
9856 .padding_bottom(1)
9857 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1, DWConv2DMicrokernelTester::Variant::Scalar);
9858 }
9859 }
9860}
9861
9862TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC2, output_width_eq_1) {
9863 for (size_t input_width = 1; input_width < 3; input_width++) {
9864 DWConv2DMicrokernelTester()
9865 .input_width(input_width)
9866 .input_height(2)
9867 .kernel_height(3)
9868 .kernel_width(3)
9869 .subsampling(2)
9870 .padding_left(1)
9871 .padding_right(1)
9872 .padding_top(1)
9873 .padding_bottom(1)
9874 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9875 }
9876}
9877
9878TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC2, output_width_gt_1) {
9879 for (size_t input_width = 3; input_width < 11; input_width++) {
9880 DWConv2DMicrokernelTester()
9881 .input_width(input_width)
9882 .input_height(2)
9883 .kernel_height(3)
9884 .kernel_width(3)
9885 .subsampling(2)
9886 .padding_left(1)
9887 .padding_right(1)
9888 .padding_top(1)
9889 .padding_bottom(1)
9890 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9891 }
9892}
9893
9894TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC2, output_height_eq_1) {
9895 for (size_t input_height = 1; input_height < 3; input_height++) {
9896 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9897 DWConv2DMicrokernelTester()
9898 .input_width(input_width)
9899 .input_height(input_height)
9900 .kernel_height(3)
9901 .kernel_width(3)
9902 .subsampling(2)
9903 .padding_left(1)
9904 .padding_right(1)
9905 .padding_top(1)
9906 .padding_bottom(1)
9907 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9908 }
9909 }
9910}
9911
9912TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC2, output_height_gt_1) {
9913 for (size_t input_height = 3; input_height < 11; input_height++) {
9914 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9915 DWConv2DMicrokernelTester()
9916 .input_width(input_width)
9917 .input_height(input_height)
9918 .kernel_height(3)
9919 .kernel_width(3)
9920 .subsampling(2)
9921 .padding_left(1)
9922 .padding_right(1)
9923 .padding_top(1)
9924 .padding_bottom(1)
9925 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9926 }
9927 }
9928}
9929
9930TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC2, padding_top_eq_1) {
9931 for (size_t input_height = 2; input_height < 8; input_height++) {
9932 for (size_t input_width = 1; input_width < 11; input_width += 1) {
9933 DWConv2DMicrokernelTester()
9934 .input_width(input_width)
9935 .input_height(input_height)
9936 .kernel_height(3)
9937 .kernel_width(3)
9938 .subsampling(2)
9939 .padding_left(1)
9940 .padding_right(1)
9941 .padding_top(0)
9942 .padding_bottom(1)
9943 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
9944 }
9945 }
9946}
9947
Marat Dukhanbf715f92020-10-23 20:17:00 -07009948TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC3, output_width_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009949 for (size_t input_width = 1; input_width < 3; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07009950 DWConv2DMicrokernelTester()
Erich Elsenac4de802019-10-16 04:35:30 -07009951 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009952 .input_height(2)
Erich Elsenac4de802019-10-16 04:35:30 -07009953 .kernel_height(3)
9954 .kernel_width(3)
9955 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07009956 .padding_left(1)
9957 .padding_right(1)
9958 .padding_top(1)
9959 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07009960 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07009961 }
9962}
9963
Marat Dukhanbf715f92020-10-23 20:17:00 -07009964TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC3, output_width_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009965 for (size_t input_width = 3; input_width < 11; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07009966 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009967 .input_width(input_width)
9968 .input_height(2)
9969 .kernel_height(3)
9970 .kernel_width(3)
9971 .subsampling(2)
9972 .padding_left(1)
9973 .padding_right(1)
9974 .padding_top(1)
9975 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07009976 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009977 }
9978}
9979
Marat Dukhanbf715f92020-10-23 20:17:00 -07009980TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC3, output_height_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009981 for (size_t input_height = 1; input_height < 3; input_height++) {
9982 for (size_t input_width = 1; input_width < 11; input_width += 1) {
Marat Dukhanbf715f92020-10-23 20:17:00 -07009983 DWConv2DMicrokernelTester()
Erich Elsen4e5db3d2020-05-07 08:57:47 -07009984 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009985 .input_height(input_height)
9986 .kernel_height(3)
9987 .kernel_width(3)
9988 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -07009989 .padding_left(1)
9990 .padding_right(1)
9991 .padding_top(1)
9992 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -07009993 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen4e5db3d2020-05-07 08:57:47 -07009994 }
9995 }
9996}
9997
Marat Dukhanbf715f92020-10-23 20:17:00 -07009998TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC3, output_height_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -07009999 for (size_t input_height = 3; input_height < 11; input_height++) {
10000 for (size_t input_width = 1; input_width < 11; input_width += 1) {
Marat Dukhanbf715f92020-10-23 20:17:00 -070010001 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010002 .input_width(input_width)
10003 .input_height(input_height)
10004 .kernel_height(3)
10005 .kernel_width(3)
10006 .subsampling(2)
10007 .padding_left(1)
10008 .padding_right(1)
10009 .padding_top(1)
10010 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010011 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010012 }
10013 }
10014}
10015
Marat Dukhanbf715f92020-10-23 20:17:00 -070010016TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC3, padding_top_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010017 for (size_t input_height = 2; input_height < 8; input_height++) {
10018 for (size_t input_width = 1; input_width < 11; input_width += 1) {
Marat Dukhanbf715f92020-10-23 20:17:00 -070010019 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010020 .input_width(input_width)
10021 .input_height(input_height)
10022 .kernel_height(3)
10023 .kernel_width(3)
10024 .subsampling(2)
10025 .padding_left(1)
10026 .padding_right(1)
10027 .padding_top(0)
10028 .padding_bottom(1)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010029 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010030 }
10031 }
10032}
10033
Marat Dukhancf5b3c32020-10-25 19:21:10 -070010034TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC4, output_width_eq_1) {
10035 for (size_t input_width = 1; input_width < 3; input_width++) {
10036 DWConv2DMicrokernelTester()
10037 .input_width(input_width)
10038 .input_height(2)
10039 .kernel_height(3)
10040 .kernel_width(3)
10041 .subsampling(2)
10042 .padding_left(1)
10043 .padding_right(1)
10044 .padding_top(1)
10045 .padding_bottom(1)
10046 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
10047 }
10048}
10049
10050TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC4, output_width_gt_1) {
10051 for (size_t input_width = 3; input_width < 11; input_width++) {
10052 DWConv2DMicrokernelTester()
10053 .input_width(input_width)
10054 .input_height(2)
10055 .kernel_height(3)
10056 .kernel_width(3)
10057 .subsampling(2)
10058 .padding_left(1)
10059 .padding_right(1)
10060 .padding_top(1)
10061 .padding_bottom(1)
10062 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
10063 }
10064}
10065
10066TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC4, output_height_eq_1) {
10067 for (size_t input_height = 1; input_height < 3; input_height++) {
10068 for (size_t input_width = 1; input_width < 11; input_width += 1) {
10069 DWConv2DMicrokernelTester()
10070 .input_width(input_width)
10071 .input_height(input_height)
10072 .kernel_height(3)
10073 .kernel_width(3)
10074 .subsampling(2)
10075 .padding_left(1)
10076 .padding_right(1)
10077 .padding_top(1)
10078 .padding_bottom(1)
10079 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
10080 }
10081 }
10082}
10083
10084TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC4, output_height_gt_1) {
10085 for (size_t input_height = 3; input_height < 11; input_height++) {
10086 for (size_t input_width = 1; input_width < 11; input_width += 1) {
10087 DWConv2DMicrokernelTester()
10088 .input_width(input_width)
10089 .input_height(input_height)
10090 .kernel_height(3)
10091 .kernel_width(3)
10092 .subsampling(2)
10093 .padding_left(1)
10094 .padding_right(1)
10095 .padding_top(1)
10096 .padding_bottom(1)
10097 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
10098 }
10099 }
10100}
10101
10102TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_1X1_ACC4, padding_top_eq_1) {
10103 for (size_t input_height = 2; input_height < 8; input_height++) {
10104 for (size_t input_width = 1; input_width < 11; input_width += 1) {
10105 DWConv2DMicrokernelTester()
10106 .input_width(input_width)
10107 .input_height(input_height)
10108 .kernel_height(3)
10109 .kernel_width(3)
10110 .subsampling(2)
10111 .padding_left(1)
10112 .padding_right(1)
10113 .padding_top(0)
10114 .padding_bottom(1)
10115 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
10116 }
10117 }
10118}
10119
10120TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1_ACC2, output_width_eq_1) {
10121 for (size_t input_width = 1; input_width < 3; input_width++) {
10122 DWConv2DMicrokernelTester()
10123 .input_width(input_width)
10124 .input_height(4)
10125 .kernel_height(3)
10126 .kernel_width(3)
10127 .subsampling(2)
10128 .padding_left(1)
10129 .padding_right(1)
10130 .padding_top(1)
10131 .padding_bottom(1)
10132 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10133 }
10134}
10135
10136TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1_ACC2, output_width_gt_1) {
10137 for (size_t input_width = 3; input_width < 11; input_width++) {
10138 DWConv2DMicrokernelTester()
10139 .input_width(input_width)
10140 .input_height(4)
10141 .kernel_height(3)
10142 .kernel_width(3)
10143 .subsampling(2)
10144 .padding_left(1)
10145 .padding_right(1)
10146 .padding_top(1)
10147 .padding_bottom(1)
10148 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10149 }
10150}
10151
10152TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1_ACC2, output_height_eq_2) {
10153 for (size_t input_height = 3; input_height < 5; input_height++) {
10154 for (size_t input_width = 1; input_width < 11; input_width += 1) {
10155 DWConv2DMicrokernelTester()
10156 .input_width(input_width)
10157 .input_height(input_height)
10158 .kernel_height(3)
10159 .kernel_width(3)
10160 .subsampling(2)
10161 .padding_left(1)
10162 .padding_right(1)
10163 .padding_top(1)
10164 .padding_bottom(1)
10165 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10166 }
10167 }
10168}
10169
10170TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1_ACC2, output_height_div_2) {
10171 for (size_t input_height = 8; input_height < 32; input_height += 4) {
10172 for (size_t input_width = 1; input_width < 11; input_width += 1) {
10173 DWConv2DMicrokernelTester()
10174 .input_width(input_width)
10175 .input_height(input_height)
10176 .kernel_height(3)
10177 .kernel_width(3)
10178 .subsampling(2)
10179 .padding_left(1)
10180 .padding_right(1)
10181 .padding_top(1)
10182 .padding_bottom(1)
10183 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10184 }
10185 }
10186}
10187
10188TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1_ACC2, output_height_lt_2) {
10189 for (size_t input_height = 1; input_height < 3; input_height++) {
10190 for (size_t input_width = 1; input_width < 11; input_width += 1) {
10191 DWConv2DMicrokernelTester()
10192 .input_width(input_width)
10193 .input_height(input_height)
10194 .kernel_height(3)
10195 .kernel_width(3)
10196 .subsampling(2)
10197 .padding_left(1)
10198 .padding_right(1)
10199 .padding_top(1)
10200 .padding_bottom(1)
10201 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10202 }
10203 }
10204}
10205
10206TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1_ACC2, output_height_gt_2) {
10207 for (size_t input_height = 5; input_height < 21; input_height++) {
10208 for (size_t input_width = 1; input_width < 11; input_width += 1) {
10209 DWConv2DMicrokernelTester()
10210 .input_width(input_width)
10211 .input_height(input_height)
10212 .kernel_height(3)
10213 .kernel_width(3)
10214 .subsampling(2)
10215 .padding_left(1)
10216 .padding_right(1)
10217 .padding_top(1)
10218 .padding_bottom(1)
10219 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10220 }
10221 }
10222}
10223
10224TEST(F32_DWCONV2D_CHW_3X3S2P1__SCALAR_2X1_ACC2, padding_top_eq_1) {
10225 for (size_t input_height = 2; input_height < 14; input_height++) {
10226 for (size_t input_width = 1; input_width < 11; input_width += 1) {
10227 DWConv2DMicrokernelTester()
10228 .input_width(input_width)
10229 .input_height(input_height)
10230 .kernel_height(3)
10231 .kernel_width(3)
10232 .subsampling(2)
10233 .padding_left(1)
10234 .padding_right(1)
10235 .padding_top(0)
10236 .padding_bottom(1)
10237 .Test(xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10238 }
10239 }
10240}
10241
Marat Dukhanc4efb002020-10-25 23:14:47 -070010242TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1, output_width_eq_1) {
10243 DWConv2DMicrokernelTester()
10244 .input_width(1)
10245 .input_height(1)
10246 .kernel_height(5)
10247 .kernel_width(5)
10248 .subsampling(1)
10249 .padding_left(2)
10250 .padding_right(2)
10251 .padding_top(2)
10252 .padding_bottom(2)
10253 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
10254}
10255
10256TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1, output_width_gt_1) {
10257 for (size_t input_width = 2; input_width < 6; input_width++) {
10258 DWConv2DMicrokernelTester()
10259 .input_width(input_width)
10260 .input_height(1)
10261 .kernel_height(5)
10262 .kernel_width(5)
10263 .subsampling(1)
10264 .padding_left(2)
10265 .padding_right(2)
10266 .padding_top(2)
10267 .padding_bottom(2)
10268 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
10269 }
10270}
10271
10272TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1, output_height_gt_1) {
10273 for (size_t input_height = 2; input_height < 6; input_height++) {
10274 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10275 DWConv2DMicrokernelTester()
10276 .input_width(input_width)
10277 .input_height(input_height)
10278 .kernel_height(5)
10279 .kernel_width(5)
10280 .subsampling(1)
10281 .padding_left(2)
10282 .padding_right(2)
10283 .padding_top(2)
10284 .padding_bottom(2)
10285 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1, DWConv2DMicrokernelTester::Variant::Scalar);
10286 }
10287 }
10288}
10289
10290
10291TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1, output_width_eq_1) {
10292 DWConv2DMicrokernelTester()
10293 .input_width(1)
10294 .input_height(2)
10295 .kernel_height(5)
10296 .kernel_width(5)
10297 .subsampling(1)
10298 .padding_left(2)
10299 .padding_right(2)
10300 .padding_top(2)
10301 .padding_bottom(2)
10302 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
10303}
10304
10305TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1, output_width_gt_1) {
10306 for (size_t input_width = 2; input_width < 6; input_width++) {
10307 DWConv2DMicrokernelTester()
10308 .input_width(input_width)
10309 .input_height(2)
10310 .kernel_height(5)
10311 .kernel_width(5)
10312 .subsampling(1)
10313 .padding_left(2)
10314 .padding_right(2)
10315 .padding_top(2)
10316 .padding_bottom(2)
10317 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
10318 }
10319}
10320
10321TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1, output_height_div_2) {
10322 for (size_t input_height = 4; input_height < 16; input_height += 2) {
10323 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10324 DWConv2DMicrokernelTester()
10325 .input_width(input_width)
10326 .input_height(input_height)
10327 .kernel_height(5)
10328 .kernel_width(5)
10329 .subsampling(1)
10330 .padding_left(2)
10331 .padding_right(2)
10332 .padding_top(2)
10333 .padding_bottom(2)
10334 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
10335 }
10336 }
10337}
10338
10339TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1, output_height_lt_2) {
10340 for (size_t input_height = 1; input_height < 2; input_height++) {
10341 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10342 DWConv2DMicrokernelTester()
10343 .input_width(input_width)
10344 .input_height(input_height)
10345 .kernel_height(5)
10346 .kernel_width(5)
10347 .subsampling(1)
10348 .padding_left(2)
10349 .padding_right(2)
10350 .padding_top(2)
10351 .padding_bottom(2)
10352 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
10353 }
10354 }
10355}
10356
10357TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1, output_height_gt_2) {
10358 for (size_t input_height = 3; input_height < 11; input_height++) {
10359 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10360 DWConv2DMicrokernelTester()
10361 .input_width(input_width)
10362 .input_height(input_height)
10363 .kernel_height(5)
10364 .kernel_width(5)
10365 .subsampling(1)
10366 .padding_left(2)
10367 .padding_right(2)
10368 .padding_top(2)
10369 .padding_bottom(2)
10370 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1, DWConv2DMicrokernelTester::Variant::Scalar);
10371 }
10372 }
10373}
10374
10375
10376TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1, output_width_eq_1) {
10377 DWConv2DMicrokernelTester()
10378 .input_width(1)
10379 .input_height(3)
10380 .kernel_height(5)
10381 .kernel_width(5)
10382 .subsampling(1)
10383 .padding_left(2)
10384 .padding_right(2)
10385 .padding_top(2)
10386 .padding_bottom(2)
10387 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
10388}
10389
10390TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1, output_width_gt_1) {
10391 for (size_t input_width = 2; input_width < 6; input_width++) {
10392 DWConv2DMicrokernelTester()
10393 .input_width(input_width)
10394 .input_height(3)
10395 .kernel_height(5)
10396 .kernel_width(5)
10397 .subsampling(1)
10398 .padding_left(2)
10399 .padding_right(2)
10400 .padding_top(2)
10401 .padding_bottom(2)
10402 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
10403 }
10404}
10405
10406TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1, output_height_div_3) {
10407 for (size_t input_height = 6; input_height < 24; input_height += 3) {
10408 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10409 DWConv2DMicrokernelTester()
10410 .input_width(input_width)
10411 .input_height(input_height)
10412 .kernel_height(5)
10413 .kernel_width(5)
10414 .subsampling(1)
10415 .padding_left(2)
10416 .padding_right(2)
10417 .padding_top(2)
10418 .padding_bottom(2)
10419 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
10420 }
10421 }
10422}
10423
10424TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1, output_height_lt_3) {
10425 for (size_t input_height = 1; input_height < 3; input_height++) {
10426 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10427 DWConv2DMicrokernelTester()
10428 .input_width(input_width)
10429 .input_height(input_height)
10430 .kernel_height(5)
10431 .kernel_width(5)
10432 .subsampling(1)
10433 .padding_left(2)
10434 .padding_right(2)
10435 .padding_top(2)
10436 .padding_bottom(2)
10437 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
10438 }
10439 }
10440}
10441
10442TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1, output_height_gt_3) {
10443 for (size_t input_height = 4; input_height < 16; input_height++) {
10444 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10445 DWConv2DMicrokernelTester()
10446 .input_width(input_width)
10447 .input_height(input_height)
10448 .kernel_height(5)
10449 .kernel_width(5)
10450 .subsampling(1)
10451 .padding_left(2)
10452 .padding_right(2)
10453 .padding_top(2)
10454 .padding_bottom(2)
10455 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1, DWConv2DMicrokernelTester::Variant::Scalar);
10456 }
10457 }
10458}
10459
10460
10461TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC2, output_width_eq_1) {
10462 DWConv2DMicrokernelTester()
10463 .input_width(1)
10464 .input_height(1)
10465 .kernel_height(5)
10466 .kernel_width(5)
10467 .subsampling(1)
10468 .padding_left(2)
10469 .padding_right(2)
10470 .padding_top(2)
10471 .padding_bottom(2)
10472 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10473}
10474
10475TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC2, output_width_gt_1) {
10476 for (size_t input_width = 2; input_width < 6; input_width++) {
10477 DWConv2DMicrokernelTester()
10478 .input_width(input_width)
10479 .input_height(1)
10480 .kernel_height(5)
10481 .kernel_width(5)
10482 .subsampling(1)
10483 .padding_left(2)
10484 .padding_right(2)
10485 .padding_top(2)
10486 .padding_bottom(2)
10487 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10488 }
10489}
10490
10491TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC2, output_height_gt_1) {
10492 for (size_t input_height = 2; input_height < 6; input_height++) {
10493 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10494 DWConv2DMicrokernelTester()
10495 .input_width(input_width)
10496 .input_height(input_height)
10497 .kernel_height(5)
10498 .kernel_width(5)
10499 .subsampling(1)
10500 .padding_left(2)
10501 .padding_right(2)
10502 .padding_top(2)
10503 .padding_bottom(2)
10504 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10505 }
10506 }
10507}
10508
10509
10510TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC3, output_width_eq_1) {
10511 DWConv2DMicrokernelTester()
10512 .input_width(1)
10513 .input_height(1)
10514 .kernel_height(5)
10515 .kernel_width(5)
10516 .subsampling(1)
10517 .padding_left(2)
10518 .padding_right(2)
10519 .padding_top(2)
10520 .padding_bottom(2)
10521 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
10522}
10523
10524TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC3, output_width_gt_1) {
10525 for (size_t input_width = 2; input_width < 6; input_width++) {
10526 DWConv2DMicrokernelTester()
10527 .input_width(input_width)
10528 .input_height(1)
10529 .kernel_height(5)
10530 .kernel_width(5)
10531 .subsampling(1)
10532 .padding_left(2)
10533 .padding_right(2)
10534 .padding_top(2)
10535 .padding_bottom(2)
10536 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
10537 }
10538}
10539
10540TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC3, output_height_gt_1) {
10541 for (size_t input_height = 2; input_height < 6; input_height++) {
10542 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10543 DWConv2DMicrokernelTester()
10544 .input_width(input_width)
10545 .input_height(input_height)
10546 .kernel_height(5)
10547 .kernel_width(5)
10548 .subsampling(1)
10549 .padding_left(2)
10550 .padding_right(2)
10551 .padding_top(2)
10552 .padding_bottom(2)
10553 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
10554 }
10555 }
10556}
10557
10558
10559TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC4, output_width_eq_1) {
10560 DWConv2DMicrokernelTester()
10561 .input_width(1)
10562 .input_height(1)
10563 .kernel_height(5)
10564 .kernel_width(5)
10565 .subsampling(1)
10566 .padding_left(2)
10567 .padding_right(2)
10568 .padding_top(2)
10569 .padding_bottom(2)
10570 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
10571}
10572
10573TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC4, output_width_gt_1) {
10574 for (size_t input_width = 2; input_width < 6; input_width++) {
10575 DWConv2DMicrokernelTester()
10576 .input_width(input_width)
10577 .input_height(1)
10578 .kernel_height(5)
10579 .kernel_width(5)
10580 .subsampling(1)
10581 .padding_left(2)
10582 .padding_right(2)
10583 .padding_top(2)
10584 .padding_bottom(2)
10585 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
10586 }
10587}
10588
10589TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC4, output_height_gt_1) {
10590 for (size_t input_height = 2; input_height < 6; input_height++) {
10591 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10592 DWConv2DMicrokernelTester()
10593 .input_width(input_width)
10594 .input_height(input_height)
10595 .kernel_height(5)
10596 .kernel_width(5)
10597 .subsampling(1)
10598 .padding_left(2)
10599 .padding_right(2)
10600 .padding_top(2)
10601 .padding_bottom(2)
10602 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4, DWConv2DMicrokernelTester::Variant::Scalar);
10603 }
10604 }
10605}
10606
10607
Marat Dukhanbf715f92020-10-23 20:17:00 -070010608TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC5, output_width_eq_1) {
10609 DWConv2DMicrokernelTester()
Erich Elsen38709a62019-11-08 11:58:45 -080010610 .input_width(1)
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010611 .input_height(1)
10612 .kernel_height(5)
10613 .kernel_width(5)
10614 .subsampling(1)
Erich Elsen38709a62019-11-08 11:58:45 -080010615 .padding_left(2)
10616 .padding_right(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -070010617 .padding_top(2)
10618 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010619 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen38709a62019-11-08 11:58:45 -080010620}
10621
Marat Dukhanbf715f92020-10-23 20:17:00 -070010622TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC5, output_width_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010623 for (size_t input_width = 2; input_width < 6; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -070010624 DWConv2DMicrokernelTester()
Erich Elsen38709a62019-11-08 11:58:45 -080010625 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010626 .input_height(1)
10627 .kernel_height(5)
10628 .kernel_width(5)
10629 .subsampling(1)
Erich Elsen38709a62019-11-08 11:58:45 -080010630 .padding_left(2)
10631 .padding_right(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -070010632 .padding_top(2)
10633 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010634 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen38709a62019-11-08 11:58:45 -080010635 }
10636}
10637
Marat Dukhanbf715f92020-10-23 20:17:00 -070010638TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_1X1_ACC5, output_height_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010639 for (size_t input_height = 2; input_height < 6; input_height++) {
10640 for (size_t input_width = 1; input_width < 6; input_width += 1) {
Marat Dukhanbf715f92020-10-23 20:17:00 -070010641 DWConv2DMicrokernelTester()
Erich Elsen38709a62019-11-08 11:58:45 -080010642 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010643 .input_height(input_height)
10644 .kernel_height(5)
10645 .kernel_width(5)
10646 .subsampling(1)
Erich Elsen38709a62019-11-08 11:58:45 -080010647 .padding_left(2)
10648 .padding_right(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -070010649 .padding_top(2)
Marat Dukhanae7e8b22020-10-20 17:51:51 -070010650 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010651 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen38709a62019-11-08 11:58:45 -080010652 }
10653 }
10654}
10655
Erich Elsen38709a62019-11-08 11:58:45 -080010656
Marat Dukhanc4efb002020-10-25 23:14:47 -070010657TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC2, output_width_eq_1) {
10658 DWConv2DMicrokernelTester()
10659 .input_width(1)
10660 .input_height(2)
10661 .kernel_height(5)
10662 .kernel_width(5)
10663 .subsampling(1)
10664 .padding_left(2)
10665 .padding_right(2)
10666 .padding_top(2)
10667 .padding_bottom(2)
10668 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10669}
10670
10671TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC2, output_width_gt_1) {
10672 for (size_t input_width = 2; input_width < 6; input_width++) {
10673 DWConv2DMicrokernelTester()
10674 .input_width(input_width)
10675 .input_height(2)
10676 .kernel_height(5)
10677 .kernel_width(5)
10678 .subsampling(1)
10679 .padding_left(2)
10680 .padding_right(2)
10681 .padding_top(2)
10682 .padding_bottom(2)
10683 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10684 }
10685}
10686
10687TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC2, output_height_div_2) {
10688 for (size_t input_height = 4; input_height < 16; input_height += 2) {
10689 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10690 DWConv2DMicrokernelTester()
10691 .input_width(input_width)
10692 .input_height(input_height)
10693 .kernel_height(5)
10694 .kernel_width(5)
10695 .subsampling(1)
10696 .padding_left(2)
10697 .padding_right(2)
10698 .padding_top(2)
10699 .padding_bottom(2)
10700 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10701 }
10702 }
10703}
10704
10705TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC2, output_height_lt_2) {
10706 for (size_t input_height = 1; input_height < 2; input_height++) {
10707 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10708 DWConv2DMicrokernelTester()
10709 .input_width(input_width)
10710 .input_height(input_height)
10711 .kernel_height(5)
10712 .kernel_width(5)
10713 .subsampling(1)
10714 .padding_left(2)
10715 .padding_right(2)
10716 .padding_top(2)
10717 .padding_bottom(2)
10718 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10719 }
10720 }
10721}
10722
10723TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC2, output_height_gt_2) {
10724 for (size_t input_height = 3; input_height < 11; input_height++) {
10725 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10726 DWConv2DMicrokernelTester()
10727 .input_width(input_width)
10728 .input_height(input_height)
10729 .kernel_height(5)
10730 .kernel_width(5)
10731 .subsampling(1)
10732 .padding_left(2)
10733 .padding_right(2)
10734 .padding_top(2)
10735 .padding_bottom(2)
10736 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10737 }
10738 }
10739}
10740
10741
10742TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC3, output_width_eq_1) {
10743 DWConv2DMicrokernelTester()
10744 .input_width(1)
10745 .input_height(2)
10746 .kernel_height(5)
10747 .kernel_width(5)
10748 .subsampling(1)
10749 .padding_left(2)
10750 .padding_right(2)
10751 .padding_top(2)
10752 .padding_bottom(2)
10753 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
10754}
10755
10756TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC3, output_width_gt_1) {
10757 for (size_t input_width = 2; input_width < 6; input_width++) {
10758 DWConv2DMicrokernelTester()
10759 .input_width(input_width)
10760 .input_height(2)
10761 .kernel_height(5)
10762 .kernel_width(5)
10763 .subsampling(1)
10764 .padding_left(2)
10765 .padding_right(2)
10766 .padding_top(2)
10767 .padding_bottom(2)
10768 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
10769 }
10770}
10771
10772TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC3, output_height_div_2) {
10773 for (size_t input_height = 4; input_height < 16; input_height += 2) {
10774 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10775 DWConv2DMicrokernelTester()
10776 .input_width(input_width)
10777 .input_height(input_height)
10778 .kernel_height(5)
10779 .kernel_width(5)
10780 .subsampling(1)
10781 .padding_left(2)
10782 .padding_right(2)
10783 .padding_top(2)
10784 .padding_bottom(2)
10785 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
10786 }
10787 }
10788}
10789
10790TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC3, output_height_lt_2) {
10791 for (size_t input_height = 1; input_height < 2; input_height++) {
10792 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10793 DWConv2DMicrokernelTester()
10794 .input_width(input_width)
10795 .input_height(input_height)
10796 .kernel_height(5)
10797 .kernel_width(5)
10798 .subsampling(1)
10799 .padding_left(2)
10800 .padding_right(2)
10801 .padding_top(2)
10802 .padding_bottom(2)
10803 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
10804 }
10805 }
10806}
10807
10808TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_2X1_ACC3, output_height_gt_2) {
10809 for (size_t input_height = 3; input_height < 11; input_height++) {
10810 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10811 DWConv2DMicrokernelTester()
10812 .input_width(input_width)
10813 .input_height(input_height)
10814 .kernel_height(5)
10815 .kernel_width(5)
10816 .subsampling(1)
10817 .padding_left(2)
10818 .padding_right(2)
10819 .padding_top(2)
10820 .padding_bottom(2)
10821 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3, DWConv2DMicrokernelTester::Variant::Scalar);
10822 }
10823 }
10824}
10825
10826
10827TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1_ACC2, output_width_eq_1) {
10828 DWConv2DMicrokernelTester()
10829 .input_width(1)
10830 .input_height(3)
10831 .kernel_height(5)
10832 .kernel_width(5)
10833 .subsampling(1)
10834 .padding_left(2)
10835 .padding_right(2)
10836 .padding_top(2)
10837 .padding_bottom(2)
10838 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10839}
10840
10841TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1_ACC2, output_width_gt_1) {
10842 for (size_t input_width = 2; input_width < 6; input_width++) {
10843 DWConv2DMicrokernelTester()
10844 .input_width(input_width)
10845 .input_height(3)
10846 .kernel_height(5)
10847 .kernel_width(5)
10848 .subsampling(1)
10849 .padding_left(2)
10850 .padding_right(2)
10851 .padding_top(2)
10852 .padding_bottom(2)
10853 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10854 }
10855}
10856
10857TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1_ACC2, output_height_div_3) {
10858 for (size_t input_height = 6; input_height < 24; input_height += 3) {
10859 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10860 DWConv2DMicrokernelTester()
10861 .input_width(input_width)
10862 .input_height(input_height)
10863 .kernel_height(5)
10864 .kernel_width(5)
10865 .subsampling(1)
10866 .padding_left(2)
10867 .padding_right(2)
10868 .padding_top(2)
10869 .padding_bottom(2)
10870 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10871 }
10872 }
10873}
10874
10875TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1_ACC2, output_height_lt_3) {
10876 for (size_t input_height = 1; input_height < 3; input_height++) {
10877 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10878 DWConv2DMicrokernelTester()
10879 .input_width(input_width)
10880 .input_height(input_height)
10881 .kernel_height(5)
10882 .kernel_width(5)
10883 .subsampling(1)
10884 .padding_left(2)
10885 .padding_right(2)
10886 .padding_top(2)
10887 .padding_bottom(2)
10888 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10889 }
10890 }
10891}
10892
10893TEST(F32_DWCONV2D_CHW_5X5P2__SCALAR_3X1_ACC2, output_height_gt_3) {
10894 for (size_t input_height = 4; input_height < 16; input_height++) {
10895 for (size_t input_width = 1; input_width < 6; input_width += 1) {
10896 DWConv2DMicrokernelTester()
10897 .input_width(input_width)
10898 .input_height(input_height)
10899 .kernel_height(5)
10900 .kernel_width(5)
10901 .subsampling(1)
10902 .padding_left(2)
10903 .padding_right(2)
10904 .padding_top(2)
10905 .padding_bottom(2)
10906 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2, DWConv2DMicrokernelTester::Variant::Scalar);
10907 }
10908 }
10909}
10910
10911
Marat Dukhanbf715f92020-10-23 20:17:00 -070010912TEST(F32_DWCONV2D_CHW_5X5S2P2__SCALAR_1X1_ACC5, output_width_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010913 for (size_t input_width = 1; input_width < 3; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -070010914 DWConv2DMicrokernelTester()
Erich Elsen38709a62019-11-08 11:58:45 -080010915 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010916 .input_height(2)
10917 .kernel_height(5)
10918 .kernel_width(5)
10919 .subsampling(2)
Erich Elsen38709a62019-11-08 11:58:45 -080010920 .padding_left(2)
10921 .padding_right(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -070010922 .padding_top(2)
10923 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010924 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen38709a62019-11-08 11:58:45 -080010925 }
10926}
10927
Marat Dukhanbf715f92020-10-23 20:17:00 -070010928TEST(F32_DWCONV2D_CHW_5X5S2P2__SCALAR_1X1_ACC5, output_width_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010929 for (size_t input_width = 3; input_width < 11; input_width++) {
Marat Dukhanbf715f92020-10-23 20:17:00 -070010930 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010931 .input_width(input_width)
10932 .input_height(2)
10933 .kernel_height(5)
10934 .kernel_width(5)
10935 .subsampling(2)
10936 .padding_left(2)
10937 .padding_right(2)
10938 .padding_top(2)
10939 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010940 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010941 }
10942}
10943
Marat Dukhanbf715f92020-10-23 20:17:00 -070010944TEST(F32_DWCONV2D_CHW_5X5S2P2__SCALAR_1X1_ACC5, output_height_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010945 for (size_t input_height = 1; input_height < 3; input_height++) {
10946 for (size_t input_width = 1; input_width < 11; input_width += 1) {
Marat Dukhanbf715f92020-10-23 20:17:00 -070010947 DWConv2DMicrokernelTester()
Erich Elsen38709a62019-11-08 11:58:45 -080010948 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010949 .input_height(input_height)
10950 .kernel_height(5)
10951 .kernel_width(5)
10952 .subsampling(2)
Erich Elsen38709a62019-11-08 11:58:45 -080010953 .padding_left(2)
10954 .padding_right(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -070010955 .padding_top(2)
10956 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010957 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen38709a62019-11-08 11:58:45 -080010958 }
10959 }
10960}
10961
Marat Dukhanbf715f92020-10-23 20:17:00 -070010962TEST(F32_DWCONV2D_CHW_5X5S2P2__SCALAR_1X1_ACC5, output_height_gt_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010963 for (size_t input_height = 3; input_height < 11; input_height++) {
10964 for (size_t input_width = 1; input_width < 11; input_width += 1) {
Marat Dukhanbf715f92020-10-23 20:17:00 -070010965 DWConv2DMicrokernelTester()
Erich Elsen4e5db3d2020-05-07 08:57:47 -070010966 .input_width(input_width)
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010967 .input_height(input_height)
10968 .kernel_height(5)
10969 .kernel_width(5)
10970 .subsampling(2)
Erich Elsen4e5db3d2020-05-07 08:57:47 -070010971 .padding_left(2)
10972 .padding_right(2)
10973 .padding_top(2)
10974 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010975 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5, DWConv2DMicrokernelTester::Variant::Scalar);
Erich Elsen4e5db3d2020-05-07 08:57:47 -070010976 }
10977 }
10978}
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010979
Marat Dukhanbf715f92020-10-23 20:17:00 -070010980TEST(F32_DWCONV2D_CHW_5X5S2P2__SCALAR_1X1_ACC5, padding_top_eq_1) {
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010981 for (size_t input_height = 2; input_height < 8; input_height++) {
10982 for (size_t input_width = 1; input_width < 11; input_width += 1) {
Marat Dukhanbf715f92020-10-23 20:17:00 -070010983 DWConv2DMicrokernelTester()
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010984 .input_width(input_width)
10985 .input_height(input_height)
10986 .kernel_height(5)
10987 .kernel_width(5)
10988 .subsampling(2)
10989 .padding_left(2)
10990 .padding_right(2)
10991 .padding_top(1)
10992 .padding_bottom(2)
Marat Dukhanbf715f92020-10-23 20:17:00 -070010993 .Test(xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5, DWConv2DMicrokernelTester::Variant::Scalar);
Marat Dukhandc6c77f2020-10-23 19:09:10 -070010994 }
10995 }
10996}