blob: 6707dd698b7acf052cdb9cde930f1732fadd3649 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
XNNPACK Teamb455b122019-09-27 18:10:33 -07006#include <gtest/gtest.h>
7
Marat Dukhan1dadbf72019-10-01 10:46:20 -07008#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -07009#include <xnnpack/isa-checks.h>
10
Marat Dukhan1dadbf72019-10-01 10:46:20 -070011#include <xnnpack/conv.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include "conv-hwc-microkernel-tester.h"
13
14
Marat Dukhance7a3f82020-05-17 21:46:44 -070015#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_width_eq_4) {
17 TEST_REQUIRES_ARM_NEON;
18 ConvHWCMicrokernelTester()
19 .kernel_size(3)
20 .subsampling(2)
21 .padding_width(1)
22 .input_channels(3)
23 .output_channels_tile(8)
24 .output_channels(8)
25 .input_width(4)
26 .input_height(3)
27 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
28 }
29
30 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_width_div_4) {
31 TEST_REQUIRES_ARM_NEON;
32 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
33 ConvHWCMicrokernelTester()
34 .kernel_size(3)
35 .subsampling(2)
36 .padding_width(1)
37 .input_channels(3)
38 .output_channels_tile(8)
39 .output_channels(8)
40 .input_width(input_width)
41 .input_height(3)
42 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
43 }
44 }
45
46 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_width_lt_4) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t input_width = 1; input_width < 4; input_width++) {
49 ConvHWCMicrokernelTester()
50 .kernel_size(3)
51 .subsampling(2)
52 .padding_width(1)
53 .input_channels(3)
54 .output_channels_tile(8)
55 .output_channels(8)
56 .input_width(input_width)
57 .input_height(3)
58 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
59 }
60 }
61
62 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_width_gt_4) {
63 TEST_REQUIRES_ARM_NEON;
64 for (size_t input_width = 5; input_width < 8; input_width++) {
65 ConvHWCMicrokernelTester()
66 .kernel_size(3)
67 .subsampling(2)
68 .padding_width(1)
69 .input_channels(3)
70 .output_channels_tile(8)
71 .output_channels(8)
72 .input_width(input_width)
73 .input_height(3)
74 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
75 }
76 }
77
78 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_channels_lt_8) {
79 TEST_REQUIRES_ARM_NEON;
80 for (size_t output_channels = 1; output_channels < 8; output_channels++) {
81 for (size_t input_width = 1; input_width < 32; input_width += 7) {
82 ConvHWCMicrokernelTester()
83 .kernel_size(3)
84 .subsampling(2)
85 .padding_width(1)
86 .input_channels(3)
87 .output_channels_tile(8)
88 .output_channels(output_channels)
89 .input_width(input_width)
90 .input_height(3)
91 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
92 }
93 }
94 }
95
96 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_channels_div_8) {
97 TEST_REQUIRES_ARM_NEON;
98 for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) {
99 for (size_t input_width = 1; input_width < 32; input_width += 7) {
100 ConvHWCMicrokernelTester()
101 .kernel_size(3)
102 .subsampling(2)
103 .padding_width(1)
104 .input_channels(3)
105 .output_channels_tile(8)
106 .output_channels(output_channels)
107 .input_width(input_width)
108 .input_height(3)
109 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
110 }
111 }
112 }
113
114 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_channels_gt_8) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t output_channels = 9; output_channels < 16; output_channels++) {
117 for (size_t input_width = 1; input_width < 32; input_width += 7) {
118 ConvHWCMicrokernelTester()
119 .kernel_size(3)
120 .subsampling(2)
121 .padding_width(1)
122 .input_channels(3)
123 .output_channels_tile(8)
124 .output_channels(output_channels)
125 .input_width(input_width)
126 .input_height(3)
127 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
128 }
129 }
130 }
131
132 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_height_lt_3) {
133 TEST_REQUIRES_ARM_NEON;
134 for (size_t input_height = 1; input_height < 3; input_height++) {
135 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
136 for (size_t input_width = 1; input_width < 32; input_width += 7) {
137 ConvHWCMicrokernelTester()
138 .kernel_size(3)
139 .subsampling(2)
140 .padding(1) // padded input height of at least 3 required
141 .input_channels(3)
142 .output_channels_tile(8)
143 .output_channels(output_channels)
144 .input_width(input_width)
145 .input_height(input_height)
146 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
147 }
148 }
149 }
150 }
151
152 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_height_gt_3) {
153 TEST_REQUIRES_ARM_NEON;
154 for (size_t input_height = 4; input_height <= 9; input_height++) {
155 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
156 for (size_t input_width = 1; input_width < 32; input_width += 7) {
157 ConvHWCMicrokernelTester()
158 .kernel_size(3)
159 .subsampling(2)
160 .padding_width(1)
161 .input_channels(3)
162 .output_channels_tile(8)
163 .output_channels(output_channels)
164 .input_width(input_width)
165 .input_height(input_height)
166 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
167 }
168 }
169 }
170 }
171
172 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, padding_top) {
173 TEST_REQUIRES_ARM_NEON;
174 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
175 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
176 for (size_t input_width = 1; input_width < 32; input_width += 7) {
177 ConvHWCMicrokernelTester()
178 .kernel_size(3)
179 .subsampling(2)
180 .padding_width(1)
181 .padding_top(padding_top)
182 .input_channels(3)
183 .output_channels_tile(8)
184 .output_channels(output_channels)
185 .input_width(input_width)
186 .input_height(9)
187 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
188 }
189 }
190 }
191 }
192
193 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, padding_bottom) {
194 TEST_REQUIRES_ARM_NEON;
195 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
196 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
197 for (size_t input_width = 1; input_width < 32; input_width += 7) {
198 ConvHWCMicrokernelTester()
199 .kernel_size(3)
200 .subsampling(2)
201 .padding_width(1)
202 .padding_bottom(padding_bottom)
203 .input_channels(3)
204 .output_channels_tile(8)
205 .output_channels(output_channels)
206 .input_width(input_width)
207 .input_height(9)
208 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
209 }
210 }
211 }
212 }
213
214 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_y_start) {
215 TEST_REQUIRES_ARM_NEON;
216 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
217 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
218 for (size_t input_width = 1; input_width < 32; input_width += 7) {
219 ConvHWCMicrokernelTester()
220 .kernel_size(3)
221 .subsampling(2)
222 .padding_width(1)
223 .input_channels(3)
224 .output_channels_tile(8)
225 .output_channels(output_channels)
226 .input_width(input_width)
227 .input_height(9)
228 .output_y_start(output_y_start)
229 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
230 }
231 }
232 }
233 }
234
235 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_y_end) {
236 TEST_REQUIRES_ARM_NEON;
237 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
238 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
239 for (size_t input_width = 1; input_width < 32; input_width += 7) {
240 ConvHWCMicrokernelTester()
241 .kernel_size(3)
242 .subsampling(2)
243 .padding_width(1)
244 .input_channels(3)
245 .output_channels_tile(8)
246 .output_channels(output_channels)
247 .input_width(input_width)
248 .input_height(9)
249 .output_y_end(output_y_end)
250 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
251 }
252 }
253 }
254 }
255
256 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, qmin) {
257 TEST_REQUIRES_ARM_NEON;
258 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
259 for (size_t input_width = 1; input_width < 32; input_width += 7) {
260 ConvHWCMicrokernelTester()
261 .kernel_size(3)
262 .subsampling(2)
263 .padding_width(1)
264 .input_channels(3)
265 .output_channels_tile(8)
266 .output_channels(output_channels)
267 .input_width(input_width)
268 .input_height(6)
269 .qmin(128)
270 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
271 }
272 }
273 }
274
275 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, qmax) {
276 TEST_REQUIRES_ARM_NEON;
277 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
278 for (size_t input_width = 1; input_width < 32; input_width += 7) {
279 ConvHWCMicrokernelTester()
280 .kernel_size(3)
281 .subsampling(2)
282 .padding_width(1)
283 .input_channels(3)
284 .output_channels_tile(8)
285 .output_channels(output_channels)
286 .input_width(input_width)
287 .input_height(6)
288 .qmax(128)
289 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2);
290 }
291 }
292 }
293#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
294
295#if XNN_ARCH_ARM || XNN_ARCH_ARM64
296 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_width_eq_4) {
297 TEST_REQUIRES_ARM_NEON;
298 ConvHWCMicrokernelTester()
299 .kernel_size(3)
300 .subsampling(2)
301 .padding_width(1)
302 .input_channels(3)
303 .output_channels_tile(4)
304 .output_channels(4)
305 .input_width(4)
306 .input_height(3)
307 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
308 }
309
310 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_width_div_4) {
311 TEST_REQUIRES_ARM_NEON;
312 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
313 ConvHWCMicrokernelTester()
314 .kernel_size(3)
315 .subsampling(2)
316 .padding_width(1)
317 .input_channels(3)
318 .output_channels_tile(4)
319 .output_channels(4)
320 .input_width(input_width)
321 .input_height(3)
322 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
323 }
324 }
325
326 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_width_lt_4) {
327 TEST_REQUIRES_ARM_NEON;
328 for (size_t input_width = 1; input_width < 4; input_width++) {
329 ConvHWCMicrokernelTester()
330 .kernel_size(3)
331 .subsampling(2)
332 .padding_width(1)
333 .input_channels(3)
334 .output_channels_tile(4)
335 .output_channels(4)
336 .input_width(input_width)
337 .input_height(3)
338 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
339 }
340 }
341
342 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_width_gt_4) {
343 TEST_REQUIRES_ARM_NEON;
344 for (size_t input_width = 5; input_width < 8; input_width++) {
345 ConvHWCMicrokernelTester()
346 .kernel_size(3)
347 .subsampling(2)
348 .padding_width(1)
349 .input_channels(3)
350 .output_channels_tile(4)
351 .output_channels(4)
352 .input_width(input_width)
353 .input_height(3)
354 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
355 }
356 }
357
358 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_channels_lt_4) {
359 TEST_REQUIRES_ARM_NEON;
360 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
361 for (size_t input_width = 1; input_width < 32; input_width += 7) {
362 ConvHWCMicrokernelTester()
363 .kernel_size(3)
364 .subsampling(2)
365 .padding_width(1)
366 .input_channels(3)
367 .output_channels_tile(4)
368 .output_channels(output_channels)
369 .input_width(input_width)
370 .input_height(3)
371 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
372 }
373 }
374 }
375
376 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_channels_div_4) {
377 TEST_REQUIRES_ARM_NEON;
378 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
379 for (size_t input_width = 1; input_width < 32; input_width += 7) {
380 ConvHWCMicrokernelTester()
381 .kernel_size(3)
382 .subsampling(2)
383 .padding_width(1)
384 .input_channels(3)
385 .output_channels_tile(4)
386 .output_channels(output_channels)
387 .input_width(input_width)
388 .input_height(3)
389 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
390 }
391 }
392 }
393
394 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_channels_gt_4) {
395 TEST_REQUIRES_ARM_NEON;
396 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
397 for (size_t input_width = 1; input_width < 32; input_width += 7) {
398 ConvHWCMicrokernelTester()
399 .kernel_size(3)
400 .subsampling(2)
401 .padding_width(1)
402 .input_channels(3)
403 .output_channels_tile(4)
404 .output_channels(output_channels)
405 .input_width(input_width)
406 .input_height(3)
407 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
408 }
409 }
410 }
411
412 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_height_lt_3) {
413 TEST_REQUIRES_ARM_NEON;
414 for (size_t input_height = 1; input_height < 3; input_height++) {
415 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
416 for (size_t input_width = 1; input_width < 32; input_width += 7) {
417 ConvHWCMicrokernelTester()
418 .kernel_size(3)
419 .subsampling(2)
420 .padding(1)
421 .input_channels(3) // padded input height of at least 3 required
422 .output_channels_tile(4)
423 .output_channels(output_channels)
424 .input_width(input_width)
425 .input_height(input_height)
426 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
427 }
428 }
429 }
430 }
431
432 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_height_gt_3) {
433 TEST_REQUIRES_ARM_NEON;
434 for (size_t input_height = 4; input_height <= 9; input_height++) {
435 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
436 for (size_t input_width = 1; input_width < 32; input_width += 7) {
437 ConvHWCMicrokernelTester()
438 .kernel_size(3)
439 .subsampling(2)
440 .padding_width(1)
441 .input_channels(3)
442 .output_channels_tile(4)
443 .output_channels(output_channels)
444 .input_width(input_width)
445 .input_height(input_height)
446 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
447 }
448 }
449 }
450 }
451
452 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, padding_top) {
453 TEST_REQUIRES_ARM_NEON;
454 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
455 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
456 for (size_t input_width = 1; input_width < 32; input_width += 7) {
457 ConvHWCMicrokernelTester()
458 .kernel_size(3)
459 .subsampling(2)
460 .padding_width(1)
461 .padding_top(padding_top)
462 .input_channels(3)
463 .output_channels_tile(4)
464 .output_channels(output_channels)
465 .input_width(input_width)
466 .input_height(9)
467 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
468 }
469 }
470 }
471 }
472
473 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, padding_bottom) {
474 TEST_REQUIRES_ARM_NEON;
475 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
476 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
477 for (size_t input_width = 1; input_width < 32; input_width += 7) {
478 ConvHWCMicrokernelTester()
479 .kernel_size(3)
480 .subsampling(2)
481 .padding_width(1)
482 .padding_bottom(padding_bottom)
483 .input_channels(3)
484 .output_channels_tile(4)
485 .output_channels(output_channels)
486 .input_width(input_width)
487 .input_height(9)
488 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
489 }
490 }
491 }
492 }
493
494 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_y_start) {
495 TEST_REQUIRES_ARM_NEON;
496 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
497 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
498 for (size_t input_width = 1; input_width < 32; input_width += 7) {
499 ConvHWCMicrokernelTester()
500 .kernel_size(3)
501 .subsampling(2)
502 .padding_width(1)
503 .input_channels(3)
504 .output_channels_tile(4)
505 .output_channels(output_channels)
506 .input_width(input_width)
507 .input_height(9)
508 .output_y_start(output_y_start)
509 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
510 }
511 }
512 }
513 }
514
515 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_y_end) {
516 TEST_REQUIRES_ARM_NEON;
517 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
518 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
519 for (size_t input_width = 1; input_width < 32; input_width += 7) {
520 ConvHWCMicrokernelTester()
521 .kernel_size(3)
522 .subsampling(2)
523 .padding_width(1)
524 .input_channels(3)
525 .output_channels_tile(4)
526 .output_channels(output_channels)
527 .input_width(input_width)
528 .input_height(9)
529 .output_y_end(output_y_end)
530 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
531 }
532 }
533 }
534 }
535
536 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, qmin) {
537 TEST_REQUIRES_ARM_NEON;
538 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
539 for (size_t input_width = 1; input_width < 32; input_width += 7) {
540 ConvHWCMicrokernelTester()
541 .kernel_size(3)
542 .subsampling(2)
543 .padding_width(1)
544 .input_channels(3)
545 .output_channels_tile(4)
546 .output_channels(output_channels)
547 .input_width(input_width)
548 .input_height(6)
549 .qmin(128)
550 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
551 }
552 }
553 }
554
555 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, qmax) {
556 TEST_REQUIRES_ARM_NEON;
557 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
558 for (size_t input_width = 1; input_width < 32; input_width += 7) {
559 ConvHWCMicrokernelTester()
560 .kernel_size(3)
561 .subsampling(2)
562 .padding_width(1)
563 .input_channels(3)
564 .output_channels_tile(4)
565 .output_channels(output_channels)
566 .input_width(input_width)
567 .input_height(6)
568 .qmax(128)
569 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2);
570 }
571 }
572 }
573#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
574
575#if XNN_ARCH_ARM || XNN_ARCH_ARM64
576 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_width_eq_4) {
577 TEST_REQUIRES_ARM_NEON;
578 ConvHWCMicrokernelTester()
579 .kernel_size(3)
580 .subsampling(2)
581 .padding_right(1)
582 .input_channels(3)
583 .output_channels_tile(8)
584 .output_channels(8)
585 .input_width(4)
586 .input_height(3)
587 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
588 }
589
590 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_width_div_4) {
591 TEST_REQUIRES_ARM_NEON;
592 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
593 ConvHWCMicrokernelTester()
594 .kernel_size(3)
595 .subsampling(2)
596 .padding_right(1)
597 .input_channels(3)
598 .output_channels_tile(8)
599 .output_channels(8)
600 .input_width(input_width)
601 .input_height(3)
602 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
603 }
604 }
605
606 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_width_lt_4) {
607 TEST_REQUIRES_ARM_NEON;
608 for (size_t input_width = 2; input_width < 4; input_width++) {
609 ConvHWCMicrokernelTester()
610 .kernel_size(3)
611 .subsampling(2)
612 .padding_right(1)
613 .input_channels(3)
614 .output_channels_tile(8)
615 .output_channels(8)
616 .input_width(input_width)
617 .input_height(3)
618 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
619 }
620 }
621
622 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_width_gt_4) {
623 TEST_REQUIRES_ARM_NEON;
624 for (size_t input_width = 5; input_width < 8; input_width++) {
625 ConvHWCMicrokernelTester()
626 .kernel_size(3)
627 .subsampling(2)
628 .padding_right(1)
629 .input_channels(3)
630 .output_channels_tile(8)
631 .output_channels(8)
632 .input_width(input_width)
633 .input_height(3)
634 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
635 }
636 }
637
638 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_channels_lt_8) {
639 TEST_REQUIRES_ARM_NEON;
640 for (size_t output_channels = 1; output_channels < 8; output_channels++) {
641 for (size_t input_width = 2; input_width < 32; input_width += 7) {
642 ConvHWCMicrokernelTester()
643 .kernel_size(3)
644 .subsampling(2)
645 .padding_right(1)
646 .input_channels(3)
647 .output_channels_tile(8)
648 .output_channels(output_channels)
649 .input_width(input_width)
650 .input_height(3)
651 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
652 }
653 }
654 }
655
656 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_channels_div_8) {
657 TEST_REQUIRES_ARM_NEON;
658 for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) {
659 for (size_t input_width = 2; input_width < 32; input_width += 7) {
660 ConvHWCMicrokernelTester()
661 .kernel_size(3)
662 .subsampling(2)
663 .padding_right(1)
664 .input_channels(3)
665 .output_channels_tile(8)
666 .output_channels(output_channels)
667 .input_width(input_width)
668 .input_height(3)
669 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
670 }
671 }
672 }
673
674 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_channels_gt_8) {
675 TEST_REQUIRES_ARM_NEON;
676 for (size_t output_channels = 9; output_channels < 16; output_channels++) {
677 for (size_t input_width = 2; input_width < 32; input_width += 7) {
678 ConvHWCMicrokernelTester()
679 .kernel_size(3)
680 .subsampling(2)
681 .padding_right(1)
682 .input_channels(3)
683 .output_channels_tile(8)
684 .output_channels(output_channels)
685 .input_width(input_width)
686 .input_height(3)
687 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
688 }
689 }
690 }
691
692 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_height_lt_3) {
693 TEST_REQUIRES_ARM_NEON;
694 for (size_t input_height = 1; input_height < 3; input_height++) {
695 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
696 for (size_t input_width = 2; input_width < 32; input_width += 7) {
697 ConvHWCMicrokernelTester()
698 .kernel_size(3)
699 .subsampling(2)
700 .padding_height(1)
701 .padding_right(1)
702 .input_channels(3)
703 .output_channels_tile(8)
704 .output_channels(output_channels)
705 .input_width(input_width)
706 .input_height(input_height)
707 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
708 }
709 }
710 }
711 }
712
713 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_height_gt_3) {
714 TEST_REQUIRES_ARM_NEON;
715 for (size_t input_height = 4; input_height <= 9; input_height++) {
716 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
717 for (size_t input_width = 2; input_width < 32; input_width += 7) {
718 ConvHWCMicrokernelTester()
719 .kernel_size(3)
720 .subsampling(2)
721 .padding_right(1)
722 .input_channels(3)
723 .output_channels_tile(8)
724 .output_channels(output_channels)
725 .input_width(input_width)
726 .input_height(input_height)
727 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
728 }
729 }
730 }
731 }
732
733 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, padding_top) {
734 TEST_REQUIRES_ARM_NEON;
735 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
736 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
737 for (size_t input_width = 2; input_width < 32; input_width += 7) {
738 ConvHWCMicrokernelTester()
739 .kernel_size(3)
740 .subsampling(2)
741 .padding_right(1)
742 .padding_top(padding_top)
743 .input_channels(3)
744 .output_channels_tile(8)
745 .output_channels(output_channels)
746 .input_width(input_width)
747 .input_height(9)
748 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
749 }
750 }
751 }
752 }
753
754 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, padding_bottom) {
755 TEST_REQUIRES_ARM_NEON;
756 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
757 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
758 for (size_t input_width = 2; input_width < 32; input_width += 7) {
759 ConvHWCMicrokernelTester()
760 .kernel_size(3)
761 .subsampling(2)
762 .padding_right(1)
763 .padding_bottom(padding_bottom)
764 .input_channels(3)
765 .output_channels_tile(8)
766 .output_channels(output_channels)
767 .input_width(input_width)
768 .input_height(9)
769 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
770 }
771 }
772 }
773 }
774
775 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_y_start) {
776 TEST_REQUIRES_ARM_NEON;
777 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
778 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
779 for (size_t input_width = 2; input_width < 32; input_width += 7) {
780 ConvHWCMicrokernelTester()
781 .kernel_size(3)
782 .subsampling(2)
783 .padding_right(1)
784 .input_channels(3)
785 .output_channels_tile(8)
786 .output_channels(output_channels)
787 .input_width(input_width)
788 .input_height(9)
789 .output_y_start(output_y_start)
790 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
791 }
792 }
793 }
794 }
795
796 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_y_end) {
797 TEST_REQUIRES_ARM_NEON;
798 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
799 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
800 for (size_t input_width = 2; input_width < 32; input_width += 7) {
801 ConvHWCMicrokernelTester()
802 .kernel_size(3)
803 .subsampling(2)
804 .padding_right(1)
805 .input_channels(3)
806 .output_channels_tile(8)
807 .output_channels(output_channels)
808 .input_width(input_width)
809 .input_height(9)
810 .output_y_end(output_y_end)
811 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
812 }
813 }
814 }
815 }
816
817 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, qmin) {
818 TEST_REQUIRES_ARM_NEON;
819 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
820 for (size_t input_width = 2; input_width < 32; input_width += 7) {
821 ConvHWCMicrokernelTester()
822 .kernel_size(3)
823 .subsampling(2)
824 .padding_right(1)
825 .input_channels(3)
826 .output_channels_tile(8)
827 .output_channels(output_channels)
828 .input_width(input_width)
829 .input_height(6)
830 .qmin(128)
831 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
832 }
833 }
834 }
835
836 TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, qmax) {
837 TEST_REQUIRES_ARM_NEON;
838 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
839 for (size_t input_width = 2; input_width < 32; input_width += 7) {
840 ConvHWCMicrokernelTester()
841 .kernel_size(3)
842 .subsampling(2)
843 .padding_right(1)
844 .input_channels(3)
845 .output_channels_tile(8)
846 .output_channels(output_channels)
847 .input_width(input_width)
848 .input_height(6)
849 .qmax(128)
850 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2);
851 }
852 }
853 }
854#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
855
856#if XNN_ARCH_ARM || XNN_ARCH_ARM64
857 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_width_eq_4) {
858 TEST_REQUIRES_ARM_NEON;
859 ConvHWCMicrokernelTester()
860 .kernel_size(3)
861 .subsampling(2)
862 .padding_right(1)
863 .input_channels(3)
864 .output_channels_tile(4)
865 .output_channels(4)
866 .input_width(5)
867 .input_height(3)
868 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
869 }
870
871 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_width_div_4) {
872 TEST_REQUIRES_ARM_NEON;
873 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
874 ConvHWCMicrokernelTester()
875 .kernel_size(3)
876 .subsampling(2)
877 .padding_right(1)
878 .input_channels(3)
879 .output_channels_tile(4)
880 .output_channels(4)
881 .input_width(input_width)
882 .input_height(3)
883 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
884 }
885 }
886
887 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_width_lt_4) {
888 TEST_REQUIRES_ARM_NEON;
889 for (size_t input_width = 2; input_width < 4; input_width++) {
890 ConvHWCMicrokernelTester()
891 .kernel_size(3)
892 .subsampling(2)
893 .padding_right(1)
894 .input_channels(3)
895 .output_channels_tile(4)
896 .output_channels(4)
897 .input_width(input_width)
898 .input_height(3)
899 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
900 }
901 }
902
903 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_width_gt_4) {
904 TEST_REQUIRES_ARM_NEON;
905 for (size_t input_width = 5; input_width < 8; input_width++) {
906 ConvHWCMicrokernelTester()
907 .kernel_size(3)
908 .subsampling(2)
909 .padding_right(1)
910 .input_channels(3)
911 .output_channels_tile(4)
912 .output_channels(4)
913 .input_width(input_width)
914 .input_height(3)
915 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
916 }
917 }
918
919 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_channels_lt_4) {
920 TEST_REQUIRES_ARM_NEON;
921 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
922 for (size_t input_width = 2; input_width < 32; input_width += 7) {
923 ConvHWCMicrokernelTester()
924 .kernel_size(3)
925 .subsampling(2)
926 .padding_right(1)
927 .input_channels(3)
928 .output_channels_tile(4)
929 .output_channels(output_channels)
930 .input_width(input_width)
931 .input_height(3)
932 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
933 }
934 }
935 }
936
937 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_channels_div_4) {
938 TEST_REQUIRES_ARM_NEON;
939 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
940 for (size_t input_width = 2; input_width < 32; input_width += 7) {
941 ConvHWCMicrokernelTester()
942 .kernel_size(3)
943 .subsampling(2)
944 .padding_right(1)
945 .input_channels(3)
946 .output_channels_tile(4)
947 .output_channels(output_channels)
948 .input_width(input_width)
949 .input_height(3)
950 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
951 }
952 }
953 }
954
955 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_channels_gt_4) {
956 TEST_REQUIRES_ARM_NEON;
957 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
958 for (size_t input_width = 2; input_width < 32; input_width += 7) {
959 ConvHWCMicrokernelTester()
960 .kernel_size(3)
961 .subsampling(2)
962 .padding_right(1)
963 .input_channels(3)
964 .output_channels_tile(4)
965 .output_channels(output_channels)
966 .input_width(input_width)
967 .input_height(3)
968 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
969 }
970 }
971 }
972
973 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_height_lt_3) {
974 TEST_REQUIRES_ARM_NEON;
975 for (size_t input_height = 1; input_height < 3; input_height++) {
976 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
977 for (size_t input_width = 2; input_width < 32; input_width += 7) {
978 ConvHWCMicrokernelTester()
979 .kernel_size(3)
980 .subsampling(2)
981 .padding_right(1)
982 .padding_height(1) // padded input height of at least 3 required
983 .input_channels(3)
984 .output_channels_tile(4)
985 .output_channels(output_channels)
986 .input_width(input_width)
987 .input_height(input_height)
988 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
989 }
990 }
991 }
992 }
993
994 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_height_gt_3) {
995 TEST_REQUIRES_ARM_NEON;
996 for (size_t input_height = 4; input_height <= 9; input_height++) {
997 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
998 for (size_t input_width = 2; input_width < 32; input_width += 7) {
999 ConvHWCMicrokernelTester()
1000 .kernel_size(3)
1001 .subsampling(2)
1002 .padding_right(1)
1003 .input_channels(3)
1004 .output_channels_tile(4)
1005 .output_channels(output_channels)
1006 .input_width(input_width)
1007 .input_height(input_height)
1008 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
1009 }
1010 }
1011 }
1012 }
1013
1014 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, padding_top) {
1015 TEST_REQUIRES_ARM_NEON;
1016 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1017 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1018 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1019 ConvHWCMicrokernelTester()
1020 .kernel_size(3)
1021 .subsampling(2)
1022 .padding_right(1)
1023 .padding_top(padding_top)
1024 .input_channels(3)
1025 .output_channels_tile(4)
1026 .output_channels(output_channels)
1027 .input_width(input_width)
1028 .input_height(9)
1029 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
1030 }
1031 }
1032 }
1033 }
1034
1035 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, padding_bottom) {
1036 TEST_REQUIRES_ARM_NEON;
1037 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1038 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1039 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1040 ConvHWCMicrokernelTester()
1041 .kernel_size(3)
1042 .subsampling(2)
1043 .padding_right(1)
1044 .padding_bottom(padding_bottom)
1045 .input_channels(3)
1046 .output_channels_tile(4)
1047 .output_channels(output_channels)
1048 .input_width(input_width)
1049 .input_height(9)
1050 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
1051 }
1052 }
1053 }
1054 }
1055
1056 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_y_start) {
1057 TEST_REQUIRES_ARM_NEON;
1058 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1059 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1060 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1061 ConvHWCMicrokernelTester()
1062 .kernel_size(3)
1063 .subsampling(2)
1064 .padding_right(1)
1065 .input_channels(3)
1066 .output_channels_tile(4)
1067 .output_channels(output_channels)
1068 .input_width(input_width)
1069 .input_height(9)
1070 .output_y_start(output_y_start)
1071 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
1072 }
1073 }
1074 }
1075 }
1076
1077 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_y_end) {
1078 TEST_REQUIRES_ARM_NEON;
1079 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1080 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1081 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1082 ConvHWCMicrokernelTester()
1083 .kernel_size(3)
1084 .subsampling(2)
1085 .padding_right(1)
1086 .input_channels(3)
1087 .output_channels_tile(4)
1088 .output_channels(output_channels)
1089 .input_width(input_width)
1090 .input_height(9)
1091 .output_y_end(output_y_end)
1092 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
1093 }
1094 }
1095 }
1096 }
1097
1098 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, qmin) {
1099 TEST_REQUIRES_ARM_NEON;
1100 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1101 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1102 ConvHWCMicrokernelTester()
1103 .kernel_size(3)
1104 .subsampling(2)
1105 .padding_right(1)
1106 .input_channels(3)
1107 .output_channels_tile(4)
1108 .output_channels(output_channels)
1109 .input_width(input_width)
1110 .input_height(6)
1111 .qmin(128)
1112 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
1113 }
1114 }
1115 }
1116
1117 TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, qmax) {
1118 TEST_REQUIRES_ARM_NEON;
1119 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1120 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1121 ConvHWCMicrokernelTester()
1122 .kernel_size(3)
1123 .subsampling(2)
1124 .padding_right(1)
1125 .input_channels(3)
1126 .output_channels_tile(4)
1127 .output_channels(output_channels)
1128 .input_width(input_width)
1129 .input_height(6)
1130 .qmax(128)
1131 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2);
1132 }
1133 }
1134 }
1135#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1136
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001137#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001138 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_eq_4) {
1139 TEST_REQUIRES_ARM_NEON_FMA;
1140 ConvHWCMicrokernelTester()
1141 .kernel_size(3)
1142 .subsampling(2)
1143 .padding_width(1)
1144 .input_channels(3)
1145 .output_channels_tile(8)
1146 .output_channels(8)
1147 .input_width(4)
1148 .input_height(3)
1149 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1150 }
1151
1152 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_div_4) {
1153 TEST_REQUIRES_ARM_NEON_FMA;
1154 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
1155 ConvHWCMicrokernelTester()
1156 .kernel_size(3)
1157 .subsampling(2)
1158 .padding_width(1)
1159 .input_channels(3)
1160 .output_channels_tile(8)
1161 .output_channels(8)
1162 .input_width(input_width)
1163 .input_height(3)
1164 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1165 }
1166 }
1167
1168 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_lt_4) {
1169 TEST_REQUIRES_ARM_NEON_FMA;
1170 for (size_t input_width = 1; input_width < 4; input_width++) {
1171 ConvHWCMicrokernelTester()
1172 .kernel_size(3)
1173 .subsampling(2)
1174 .padding_width(1)
1175 .input_channels(3)
1176 .output_channels_tile(8)
1177 .output_channels(8)
1178 .input_width(input_width)
1179 .input_height(3)
1180 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1181 }
1182 }
1183
1184 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_gt_4) {
1185 TEST_REQUIRES_ARM_NEON_FMA;
1186 for (size_t input_width = 5; input_width < 8; input_width++) {
1187 ConvHWCMicrokernelTester()
1188 .kernel_size(3)
1189 .subsampling(2)
1190 .padding_width(1)
1191 .input_channels(3)
1192 .output_channels_tile(8)
1193 .output_channels(8)
1194 .input_width(input_width)
1195 .input_height(3)
1196 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1197 }
1198 }
1199
1200 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_channels_lt_8) {
1201 TEST_REQUIRES_ARM_NEON_FMA;
1202 for (size_t output_channels = 1; output_channels < 8; output_channels++) {
1203 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1204 ConvHWCMicrokernelTester()
1205 .kernel_size(3)
1206 .subsampling(2)
1207 .padding_width(1)
1208 .input_channels(3)
1209 .output_channels_tile(8)
1210 .output_channels(output_channels)
1211 .input_width(input_width)
1212 .input_height(3)
1213 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1214 }
1215 }
1216 }
1217
1218 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_channels_div_8) {
1219 TEST_REQUIRES_ARM_NEON_FMA;
1220 for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) {
1221 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1222 ConvHWCMicrokernelTester()
1223 .kernel_size(3)
1224 .subsampling(2)
1225 .padding_width(1)
1226 .input_channels(3)
1227 .output_channels_tile(8)
1228 .output_channels(output_channels)
1229 .input_width(input_width)
1230 .input_height(3)
1231 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1232 }
1233 }
1234 }
1235
1236 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_channels_gt_8) {
1237 TEST_REQUIRES_ARM_NEON_FMA;
1238 for (size_t output_channels = 9; output_channels < 16; output_channels++) {
1239 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1240 ConvHWCMicrokernelTester()
1241 .kernel_size(3)
1242 .subsampling(2)
1243 .padding_width(1)
1244 .input_channels(3)
1245 .output_channels_tile(8)
1246 .output_channels(output_channels)
1247 .input_width(input_width)
1248 .input_height(3)
1249 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1250 }
1251 }
1252 }
1253
1254 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_height_lt_3) {
1255 TEST_REQUIRES_ARM_NEON_FMA;
1256 for (size_t input_height = 1; input_height < 3; input_height++) {
1257 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1258 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1259 ConvHWCMicrokernelTester()
1260 .kernel_size(3)
1261 .subsampling(2)
1262 .padding(1) // padded input height of at least 3 required
1263 .input_channels(3)
1264 .output_channels_tile(8)
1265 .output_channels(output_channels)
1266 .input_width(input_width)
1267 .input_height(input_height)
1268 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1269 }
1270 }
1271 }
1272 }
1273
1274 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_height_gt_3) {
1275 TEST_REQUIRES_ARM_NEON_FMA;
1276 for (size_t input_height = 4; input_height <= 9; input_height++) {
1277 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1278 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1279 ConvHWCMicrokernelTester()
1280 .kernel_size(3)
1281 .subsampling(2)
1282 .padding_width(1)
1283 .input_channels(3)
1284 .output_channels_tile(8)
1285 .output_channels(output_channels)
1286 .input_width(input_width)
1287 .input_height(input_height)
1288 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1289 }
1290 }
1291 }
1292 }
1293
1294 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, padding_top) {
1295 TEST_REQUIRES_ARM_NEON_FMA;
1296 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1297 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1298 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1299 ConvHWCMicrokernelTester()
1300 .kernel_size(3)
1301 .subsampling(2)
1302 .padding_width(1)
1303 .padding_top(padding_top)
1304 .input_channels(3)
1305 .output_channels_tile(8)
1306 .output_channels(output_channels)
1307 .input_width(input_width)
1308 .input_height(9)
1309 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1310 }
1311 }
1312 }
1313 }
1314
1315 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, padding_bottom) {
1316 TEST_REQUIRES_ARM_NEON_FMA;
1317 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1318 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1319 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1320 ConvHWCMicrokernelTester()
1321 .kernel_size(3)
1322 .subsampling(2)
1323 .padding_width(1)
1324 .padding_bottom(padding_bottom)
1325 .input_channels(3)
1326 .output_channels_tile(8)
1327 .output_channels(output_channels)
1328 .input_width(input_width)
1329 .input_height(9)
1330 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1331 }
1332 }
1333 }
1334 }
1335
1336 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_y_start) {
1337 TEST_REQUIRES_ARM_NEON_FMA;
1338 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1339 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1340 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1341 ConvHWCMicrokernelTester()
1342 .kernel_size(3)
1343 .subsampling(2)
1344 .padding_width(1)
1345 .input_channels(3)
1346 .output_channels_tile(8)
1347 .output_channels(output_channels)
1348 .input_width(input_width)
1349 .input_height(9)
1350 .output_y_start(output_y_start)
1351 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1352 }
1353 }
1354 }
1355 }
1356
1357 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_y_end) {
1358 TEST_REQUIRES_ARM_NEON_FMA;
1359 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1360 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1361 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1362 ConvHWCMicrokernelTester()
1363 .kernel_size(3)
1364 .subsampling(2)
1365 .padding_width(1)
1366 .input_channels(3)
1367 .output_channels_tile(8)
1368 .output_channels(output_channels)
1369 .input_width(input_width)
1370 .input_height(9)
1371 .output_y_end(output_y_end)
1372 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1373 }
1374 }
1375 }
1376 }
1377
1378 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, qmin) {
1379 TEST_REQUIRES_ARM_NEON_FMA;
1380 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1381 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1382 ConvHWCMicrokernelTester()
1383 .kernel_size(3)
1384 .subsampling(2)
1385 .padding_width(1)
1386 .input_channels(3)
1387 .output_channels_tile(8)
1388 .output_channels(output_channels)
1389 .input_width(input_width)
1390 .input_height(6)
1391 .qmin(128)
1392 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1393 }
1394 }
1395 }
1396
1397 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, qmax) {
1398 TEST_REQUIRES_ARM_NEON_FMA;
1399 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1400 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1401 ConvHWCMicrokernelTester()
1402 .kernel_size(3)
1403 .subsampling(2)
1404 .padding_width(1)
1405 .input_channels(3)
1406 .output_channels_tile(8)
1407 .output_channels(output_channels)
1408 .input_width(input_width)
1409 .input_height(6)
1410 .qmax(128)
1411 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
1412 }
1413 }
1414 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001415#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001416
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001417#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001418 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
1419 TEST_REQUIRES_ARM_NEON_FMA;
1420 ConvHWCMicrokernelTester()
1421 .kernel_size(3)
1422 .subsampling(2)
1423 .padding_width(1)
1424 .input_channels(3)
1425 .output_channels_tile(4)
1426 .output_channels(4)
1427 .input_width(4)
1428 .input_height(3)
1429 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1430 }
1431
1432 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_div_4) {
1433 TEST_REQUIRES_ARM_NEON_FMA;
1434 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
1435 ConvHWCMicrokernelTester()
1436 .kernel_size(3)
1437 .subsampling(2)
1438 .padding_width(1)
1439 .input_channels(3)
1440 .output_channels_tile(4)
1441 .output_channels(4)
1442 .input_width(input_width)
1443 .input_height(3)
1444 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1445 }
1446 }
1447
1448 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_lt_4) {
1449 TEST_REQUIRES_ARM_NEON_FMA;
1450 for (size_t input_width = 1; input_width < 4; input_width++) {
1451 ConvHWCMicrokernelTester()
1452 .kernel_size(3)
1453 .subsampling(2)
1454 .padding_width(1)
1455 .input_channels(3)
1456 .output_channels_tile(4)
1457 .output_channels(4)
1458 .input_width(input_width)
1459 .input_height(3)
1460 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1461 }
1462 }
1463
1464 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_gt_4) {
1465 TEST_REQUIRES_ARM_NEON_FMA;
1466 for (size_t input_width = 5; input_width < 8; input_width++) {
1467 ConvHWCMicrokernelTester()
1468 .kernel_size(3)
1469 .subsampling(2)
1470 .padding_width(1)
1471 .input_channels(3)
1472 .output_channels_tile(4)
1473 .output_channels(4)
1474 .input_width(input_width)
1475 .input_height(3)
1476 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1477 }
1478 }
1479
1480 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_channels_lt_4) {
1481 TEST_REQUIRES_ARM_NEON_FMA;
1482 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
1483 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1484 ConvHWCMicrokernelTester()
1485 .kernel_size(3)
1486 .subsampling(2)
1487 .padding_width(1)
1488 .input_channels(3)
1489 .output_channels_tile(4)
1490 .output_channels(output_channels)
1491 .input_width(input_width)
1492 .input_height(3)
1493 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1494 }
1495 }
1496 }
1497
1498 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_channels_div_4) {
1499 TEST_REQUIRES_ARM_NEON_FMA;
1500 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
1501 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1502 ConvHWCMicrokernelTester()
1503 .kernel_size(3)
1504 .subsampling(2)
1505 .padding_width(1)
1506 .input_channels(3)
1507 .output_channels_tile(4)
1508 .output_channels(output_channels)
1509 .input_width(input_width)
1510 .input_height(3)
1511 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1512 }
1513 }
1514 }
1515
1516 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_channels_gt_4) {
1517 TEST_REQUIRES_ARM_NEON_FMA;
1518 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
1519 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1520 ConvHWCMicrokernelTester()
1521 .kernel_size(3)
1522 .subsampling(2)
1523 .padding_width(1)
1524 .input_channels(3)
1525 .output_channels_tile(4)
1526 .output_channels(output_channels)
1527 .input_width(input_width)
1528 .input_height(3)
1529 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1530 }
1531 }
1532 }
1533
1534 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_height_lt_3) {
1535 TEST_REQUIRES_ARM_NEON_FMA;
1536 for (size_t input_height = 1; input_height < 3; input_height++) {
1537 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1538 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1539 ConvHWCMicrokernelTester()
1540 .kernel_size(3)
1541 .subsampling(2)
1542 .padding(1)
1543 .input_channels(3) // padded input height of at least 3 required
1544 .output_channels_tile(4)
1545 .output_channels(output_channels)
1546 .input_width(input_width)
1547 .input_height(input_height)
1548 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1549 }
1550 }
1551 }
1552 }
1553
1554 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_height_gt_3) {
1555 TEST_REQUIRES_ARM_NEON_FMA;
1556 for (size_t input_height = 4; input_height <= 9; input_height++) {
1557 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1558 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1559 ConvHWCMicrokernelTester()
1560 .kernel_size(3)
1561 .subsampling(2)
1562 .padding_width(1)
1563 .input_channels(3)
1564 .output_channels_tile(4)
1565 .output_channels(output_channels)
1566 .input_width(input_width)
1567 .input_height(input_height)
1568 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1569 }
1570 }
1571 }
1572 }
1573
1574 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, padding_top) {
1575 TEST_REQUIRES_ARM_NEON_FMA;
1576 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1577 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1578 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1579 ConvHWCMicrokernelTester()
1580 .kernel_size(3)
1581 .subsampling(2)
1582 .padding_width(1)
1583 .padding_top(padding_top)
1584 .input_channels(3)
1585 .output_channels_tile(4)
1586 .output_channels(output_channels)
1587 .input_width(input_width)
1588 .input_height(9)
1589 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1590 }
1591 }
1592 }
1593 }
1594
1595 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, padding_bottom) {
1596 TEST_REQUIRES_ARM_NEON_FMA;
1597 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1598 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1599 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1600 ConvHWCMicrokernelTester()
1601 .kernel_size(3)
1602 .subsampling(2)
1603 .padding_width(1)
1604 .padding_bottom(padding_bottom)
1605 .input_channels(3)
1606 .output_channels_tile(4)
1607 .output_channels(output_channels)
1608 .input_width(input_width)
1609 .input_height(9)
1610 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1611 }
1612 }
1613 }
1614 }
1615
1616 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_y_start) {
1617 TEST_REQUIRES_ARM_NEON_FMA;
1618 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1619 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1620 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1621 ConvHWCMicrokernelTester()
1622 .kernel_size(3)
1623 .subsampling(2)
1624 .padding_width(1)
1625 .input_channels(3)
1626 .output_channels_tile(4)
1627 .output_channels(output_channels)
1628 .input_width(input_width)
1629 .input_height(9)
1630 .output_y_start(output_y_start)
1631 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1632 }
1633 }
1634 }
1635 }
1636
1637 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_y_end) {
1638 TEST_REQUIRES_ARM_NEON_FMA;
1639 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1640 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1641 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1642 ConvHWCMicrokernelTester()
1643 .kernel_size(3)
1644 .subsampling(2)
1645 .padding_width(1)
1646 .input_channels(3)
1647 .output_channels_tile(4)
1648 .output_channels(output_channels)
1649 .input_width(input_width)
1650 .input_height(9)
1651 .output_y_end(output_y_end)
1652 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1653 }
1654 }
1655 }
1656 }
1657
1658 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, qmin) {
1659 TEST_REQUIRES_ARM_NEON_FMA;
1660 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1661 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1662 ConvHWCMicrokernelTester()
1663 .kernel_size(3)
1664 .subsampling(2)
1665 .padding_width(1)
1666 .input_channels(3)
1667 .output_channels_tile(4)
1668 .output_channels(output_channels)
1669 .input_width(input_width)
1670 .input_height(6)
1671 .qmin(128)
1672 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1673 }
1674 }
1675 }
1676
1677 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, qmax) {
1678 TEST_REQUIRES_ARM_NEON_FMA;
1679 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1680 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1681 ConvHWCMicrokernelTester()
1682 .kernel_size(3)
1683 .subsampling(2)
1684 .padding_width(1)
1685 .input_channels(3)
1686 .output_channels_tile(4)
1687 .output_channels(output_channels)
1688 .input_width(input_width)
1689 .input_height(6)
1690 .qmax(128)
1691 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
1692 }
1693 }
1694 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001695#endif // XNN_ARCH_ARM64
Marat Dukhan6b7dfae2019-12-04 16:00:52 -08001696
Marat Dukhan7e4ca402020-05-15 18:50:12 -07001697#if XNN_ARCH_ARM64
Marat Dukhance7a3f82020-05-17 21:46:44 -07001698 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_width_eq_4) {
1699 TEST_REQUIRES_ARM_NEON_FMA;
1700 ConvHWCMicrokernelTester()
1701 .kernel_size(3)
1702 .subsampling(2)
1703 .padding_right(1)
1704 .input_channels(3)
1705 .output_channels_tile(8)
1706 .output_channels(8)
1707 .input_width(4)
1708 .input_height(3)
1709 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1710 }
1711
1712 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_width_div_4) {
1713 TEST_REQUIRES_ARM_NEON_FMA;
1714 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
1715 ConvHWCMicrokernelTester()
1716 .kernel_size(3)
1717 .subsampling(2)
1718 .padding_right(1)
1719 .input_channels(3)
1720 .output_channels_tile(8)
1721 .output_channels(8)
1722 .input_width(input_width)
1723 .input_height(3)
1724 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1725 }
1726 }
1727
1728 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_width_lt_4) {
1729 TEST_REQUIRES_ARM_NEON_FMA;
1730 for (size_t input_width = 2; input_width < 4; input_width++) {
1731 ConvHWCMicrokernelTester()
1732 .kernel_size(3)
1733 .subsampling(2)
1734 .padding_right(1)
1735 .input_channels(3)
1736 .output_channels_tile(8)
1737 .output_channels(8)
1738 .input_width(input_width)
1739 .input_height(3)
1740 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1741 }
1742 }
1743
1744 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_width_gt_4) {
1745 TEST_REQUIRES_ARM_NEON_FMA;
1746 for (size_t input_width = 5; input_width < 8; input_width++) {
1747 ConvHWCMicrokernelTester()
1748 .kernel_size(3)
1749 .subsampling(2)
1750 .padding_right(1)
1751 .input_channels(3)
1752 .output_channels_tile(8)
1753 .output_channels(8)
1754 .input_width(input_width)
1755 .input_height(3)
1756 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1757 }
1758 }
1759
1760 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_channels_lt_8) {
1761 TEST_REQUIRES_ARM_NEON_FMA;
1762 for (size_t output_channels = 1; output_channels < 8; output_channels++) {
1763 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1764 ConvHWCMicrokernelTester()
1765 .kernel_size(3)
1766 .subsampling(2)
1767 .padding_right(1)
1768 .input_channels(3)
1769 .output_channels_tile(8)
1770 .output_channels(output_channels)
1771 .input_width(input_width)
1772 .input_height(3)
1773 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1774 }
1775 }
1776 }
1777
1778 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_channels_div_8) {
1779 TEST_REQUIRES_ARM_NEON_FMA;
1780 for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) {
1781 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1782 ConvHWCMicrokernelTester()
1783 .kernel_size(3)
1784 .subsampling(2)
1785 .padding_right(1)
1786 .input_channels(3)
1787 .output_channels_tile(8)
1788 .output_channels(output_channels)
1789 .input_width(input_width)
1790 .input_height(3)
1791 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1792 }
1793 }
1794 }
1795
1796 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_channels_gt_8) {
1797 TEST_REQUIRES_ARM_NEON_FMA;
1798 for (size_t output_channels = 9; output_channels < 16; output_channels++) {
1799 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1800 ConvHWCMicrokernelTester()
1801 .kernel_size(3)
1802 .subsampling(2)
1803 .padding_right(1)
1804 .input_channels(3)
1805 .output_channels_tile(8)
1806 .output_channels(output_channels)
1807 .input_width(input_width)
1808 .input_height(3)
1809 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1810 }
1811 }
1812 }
1813
1814 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_height_lt_3) {
1815 TEST_REQUIRES_ARM_NEON_FMA;
1816 for (size_t input_height = 1; input_height < 3; input_height++) {
1817 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1818 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1819 ConvHWCMicrokernelTester()
1820 .kernel_size(3)
1821 .subsampling(2)
1822 .padding_height(1)
1823 .padding_right(1)
1824 .input_channels(3)
1825 .output_channels_tile(8)
1826 .output_channels(output_channels)
1827 .input_width(input_width)
1828 .input_height(input_height)
1829 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1830 }
1831 }
1832 }
1833 }
1834
1835 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_height_gt_3) {
1836 TEST_REQUIRES_ARM_NEON_FMA;
1837 for (size_t input_height = 4; input_height <= 9; input_height++) {
1838 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1839 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1840 ConvHWCMicrokernelTester()
1841 .kernel_size(3)
1842 .subsampling(2)
1843 .padding_right(1)
1844 .input_channels(3)
1845 .output_channels_tile(8)
1846 .output_channels(output_channels)
1847 .input_width(input_width)
1848 .input_height(input_height)
1849 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1850 }
1851 }
1852 }
1853 }
1854
1855 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, padding_top) {
1856 TEST_REQUIRES_ARM_NEON_FMA;
1857 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1858 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1859 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1860 ConvHWCMicrokernelTester()
1861 .kernel_size(3)
1862 .subsampling(2)
1863 .padding_right(1)
1864 .padding_top(padding_top)
1865 .input_channels(3)
1866 .output_channels_tile(8)
1867 .output_channels(output_channels)
1868 .input_width(input_width)
1869 .input_height(9)
1870 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1871 }
1872 }
1873 }
1874 }
1875
1876 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, padding_bottom) {
1877 TEST_REQUIRES_ARM_NEON_FMA;
1878 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1879 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1880 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1881 ConvHWCMicrokernelTester()
1882 .kernel_size(3)
1883 .subsampling(2)
1884 .padding_right(1)
1885 .padding_bottom(padding_bottom)
1886 .input_channels(3)
1887 .output_channels_tile(8)
1888 .output_channels(output_channels)
1889 .input_width(input_width)
1890 .input_height(9)
1891 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1892 }
1893 }
1894 }
1895 }
1896
1897 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_y_start) {
1898 TEST_REQUIRES_ARM_NEON_FMA;
1899 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1900 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1901 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1902 ConvHWCMicrokernelTester()
1903 .kernel_size(3)
1904 .subsampling(2)
1905 .padding_right(1)
1906 .input_channels(3)
1907 .output_channels_tile(8)
1908 .output_channels(output_channels)
1909 .input_width(input_width)
1910 .input_height(9)
1911 .output_y_start(output_y_start)
1912 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1913 }
1914 }
1915 }
1916 }
1917
1918 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_y_end) {
1919 TEST_REQUIRES_ARM_NEON_FMA;
1920 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1921 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1922 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1923 ConvHWCMicrokernelTester()
1924 .kernel_size(3)
1925 .subsampling(2)
1926 .padding_right(1)
1927 .input_channels(3)
1928 .output_channels_tile(8)
1929 .output_channels(output_channels)
1930 .input_width(input_width)
1931 .input_height(9)
1932 .output_y_end(output_y_end)
1933 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1934 }
1935 }
1936 }
1937 }
1938
1939 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, qmin) {
1940 TEST_REQUIRES_ARM_NEON_FMA;
1941 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1942 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1943 ConvHWCMicrokernelTester()
1944 .kernel_size(3)
1945 .subsampling(2)
1946 .padding_right(1)
1947 .input_channels(3)
1948 .output_channels_tile(8)
1949 .output_channels(output_channels)
1950 .input_width(input_width)
1951 .input_height(6)
1952 .qmin(128)
1953 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1954 }
1955 }
1956 }
1957
1958 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, qmax) {
1959 TEST_REQUIRES_ARM_NEON_FMA;
1960 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1961 for (size_t input_width = 2; input_width < 32; input_width += 7) {
1962 ConvHWCMicrokernelTester()
1963 .kernel_size(3)
1964 .subsampling(2)
1965 .padding_right(1)
1966 .input_channels(3)
1967 .output_channels_tile(8)
1968 .output_channels(output_channels)
1969 .input_width(input_width)
1970 .input_height(6)
1971 .qmax(128)
1972 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2);
1973 }
1974 }
1975 }
1976#endif // XNN_ARCH_ARM64
1977
1978#if XNN_ARCH_ARM64
Marat Dukhan7e4ca402020-05-15 18:50:12 -07001979 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_width_eq_4) {
1980 TEST_REQUIRES_ARM_NEON_FMA;
1981 ConvHWCMicrokernelTester()
1982 .kernel_size(3)
1983 .subsampling(2)
1984 .padding_right(1)
1985 .input_channels(3)
1986 .output_channels_tile(4)
1987 .output_channels(4)
1988 .input_width(5)
1989 .input_height(3)
1990 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
1991 }
1992
1993 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_width_div_4) {
1994 TEST_REQUIRES_ARM_NEON_FMA;
1995 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
1996 ConvHWCMicrokernelTester()
1997 .kernel_size(3)
1998 .subsampling(2)
1999 .padding_right(1)
2000 .input_channels(3)
2001 .output_channels_tile(4)
2002 .output_channels(4)
2003 .input_width(input_width)
2004 .input_height(3)
2005 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2006 }
2007 }
2008
2009 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_width_lt_4) {
2010 TEST_REQUIRES_ARM_NEON_FMA;
2011 for (size_t input_width = 2; input_width < 4; input_width++) {
2012 ConvHWCMicrokernelTester()
2013 .kernel_size(3)
2014 .subsampling(2)
2015 .padding_right(1)
2016 .input_channels(3)
2017 .output_channels_tile(4)
2018 .output_channels(4)
2019 .input_width(input_width)
2020 .input_height(3)
2021 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2022 }
2023 }
2024
2025 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_width_gt_4) {
2026 TEST_REQUIRES_ARM_NEON_FMA;
2027 for (size_t input_width = 5; input_width < 8; input_width++) {
2028 ConvHWCMicrokernelTester()
2029 .kernel_size(3)
2030 .subsampling(2)
2031 .padding_right(1)
2032 .input_channels(3)
2033 .output_channels_tile(4)
2034 .output_channels(4)
2035 .input_width(input_width)
2036 .input_height(3)
2037 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2038 }
2039 }
2040
2041 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_channels_lt_4) {
2042 TEST_REQUIRES_ARM_NEON_FMA;
2043 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
2044 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2045 ConvHWCMicrokernelTester()
2046 .kernel_size(3)
2047 .subsampling(2)
2048 .padding_right(1)
2049 .input_channels(3)
2050 .output_channels_tile(4)
2051 .output_channels(output_channels)
2052 .input_width(input_width)
2053 .input_height(3)
2054 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2055 }
2056 }
2057 }
2058
2059 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_channels_div_4) {
2060 TEST_REQUIRES_ARM_NEON_FMA;
2061 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
2062 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2063 ConvHWCMicrokernelTester()
2064 .kernel_size(3)
2065 .subsampling(2)
2066 .padding_right(1)
2067 .input_channels(3)
2068 .output_channels_tile(4)
2069 .output_channels(output_channels)
2070 .input_width(input_width)
2071 .input_height(3)
2072 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2073 }
2074 }
2075 }
2076
2077 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_channels_gt_4) {
2078 TEST_REQUIRES_ARM_NEON_FMA;
2079 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
2080 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2081 ConvHWCMicrokernelTester()
2082 .kernel_size(3)
2083 .subsampling(2)
2084 .padding_right(1)
2085 .input_channels(3)
2086 .output_channels_tile(4)
2087 .output_channels(output_channels)
2088 .input_width(input_width)
2089 .input_height(3)
2090 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2091 }
2092 }
2093 }
2094
2095 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_height_lt_3) {
2096 TEST_REQUIRES_ARM_NEON_FMA;
2097 for (size_t input_height = 1; input_height < 3; input_height++) {
2098 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2099 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2100 ConvHWCMicrokernelTester()
2101 .kernel_size(3)
2102 .subsampling(2)
2103 .padding_right(1)
2104 .padding_height(1) // padded input height of at least 3 required
2105 .input_channels(3)
2106 .output_channels_tile(4)
2107 .output_channels(output_channels)
2108 .input_width(input_width)
2109 .input_height(input_height)
2110 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2111 }
2112 }
2113 }
2114 }
2115
2116 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_height_gt_3) {
2117 TEST_REQUIRES_ARM_NEON_FMA;
2118 for (size_t input_height = 4; input_height <= 9; input_height++) {
2119 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2120 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2121 ConvHWCMicrokernelTester()
2122 .kernel_size(3)
2123 .subsampling(2)
2124 .padding_right(1)
2125 .input_channels(3)
2126 .output_channels_tile(4)
2127 .output_channels(output_channels)
2128 .input_width(input_width)
2129 .input_height(input_height)
2130 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2131 }
2132 }
2133 }
2134 }
2135
2136 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, padding_top) {
2137 TEST_REQUIRES_ARM_NEON_FMA;
2138 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
2139 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2140 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2141 ConvHWCMicrokernelTester()
2142 .kernel_size(3)
2143 .subsampling(2)
2144 .padding_right(1)
2145 .padding_top(padding_top)
2146 .input_channels(3)
2147 .output_channels_tile(4)
2148 .output_channels(output_channels)
2149 .input_width(input_width)
2150 .input_height(9)
2151 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2152 }
2153 }
2154 }
2155 }
2156
2157 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, padding_bottom) {
2158 TEST_REQUIRES_ARM_NEON_FMA;
2159 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
2160 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2161 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2162 ConvHWCMicrokernelTester()
2163 .kernel_size(3)
2164 .subsampling(2)
2165 .padding_right(1)
2166 .padding_bottom(padding_bottom)
2167 .input_channels(3)
2168 .output_channels_tile(4)
2169 .output_channels(output_channels)
2170 .input_width(input_width)
2171 .input_height(9)
2172 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2173 }
2174 }
2175 }
2176 }
2177
2178 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_y_start) {
2179 TEST_REQUIRES_ARM_NEON_FMA;
2180 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
2181 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2182 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2183 ConvHWCMicrokernelTester()
2184 .kernel_size(3)
2185 .subsampling(2)
2186 .padding_right(1)
2187 .input_channels(3)
2188 .output_channels_tile(4)
2189 .output_channels(output_channels)
2190 .input_width(input_width)
2191 .input_height(9)
2192 .output_y_start(output_y_start)
2193 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2194 }
2195 }
2196 }
2197 }
2198
2199 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_y_end) {
2200 TEST_REQUIRES_ARM_NEON_FMA;
2201 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
2202 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2203 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2204 ConvHWCMicrokernelTester()
2205 .kernel_size(3)
2206 .subsampling(2)
2207 .padding_right(1)
2208 .input_channels(3)
2209 .output_channels_tile(4)
2210 .output_channels(output_channels)
2211 .input_width(input_width)
2212 .input_height(9)
2213 .output_y_end(output_y_end)
2214 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2215 }
2216 }
2217 }
2218 }
2219
2220 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, qmin) {
2221 TEST_REQUIRES_ARM_NEON_FMA;
2222 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2223 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2224 ConvHWCMicrokernelTester()
2225 .kernel_size(3)
2226 .subsampling(2)
2227 .padding_right(1)
2228 .input_channels(3)
2229 .output_channels_tile(4)
2230 .output_channels(output_channels)
2231 .input_width(input_width)
2232 .input_height(6)
2233 .qmin(128)
2234 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2235 }
2236 }
2237 }
2238
2239 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, qmax) {
2240 TEST_REQUIRES_ARM_NEON_FMA;
2241 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2242 for (size_t input_width = 2; input_width < 32; input_width += 7) {
2243 ConvHWCMicrokernelTester()
2244 .kernel_size(3)
2245 .subsampling(2)
2246 .padding_right(1)
2247 .input_channels(3)
2248 .output_channels_tile(4)
2249 .output_channels(output_channels)
2250 .input_width(input_width)
2251 .input_height(6)
2252 .qmax(128)
2253 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2);
2254 }
2255 }
2256 }
2257#endif // XNN_ARCH_ARM64
Marat Dukhan6b7dfae2019-12-04 16:00:52 -08002258
Marat Dukhan56b10cd2020-05-18 09:35:49 -07002259#if XNN_ARCH_ARM64
2260 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_width_eq_2) {
2261 TEST_REQUIRES_ARM_NEON_FMA;
2262 ConvHWCMicrokernelTester()
2263 .kernel_size(3)
2264 .subsampling(2)
2265 .padding_right(1)
2266 .input_channels(3)
2267 .output_channels_tile(8)
2268 .output_channels(8)
2269 .input_width(2)
2270 .input_height(3)
2271 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2272 }
2273
2274 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_width_div_2) {
2275 TEST_REQUIRES_ARM_NEON_FMA;
2276 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
2277 ConvHWCMicrokernelTester()
2278 .kernel_size(3)
2279 .subsampling(2)
2280 .padding_right(1)
2281 .input_channels(3)
2282 .output_channels_tile(8)
2283 .output_channels(8)
2284 .input_width(input_width)
2285 .input_height(3)
2286 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2287 }
2288 }
2289
2290 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_width_gt_2) {
2291 TEST_REQUIRES_ARM_NEON_FMA;
2292 for (size_t input_width = 3; input_width < 4; input_width++) {
2293 ConvHWCMicrokernelTester()
2294 .kernel_size(3)
2295 .subsampling(2)
2296 .padding_right(1)
2297 .input_channels(3)
2298 .output_channels_tile(8)
2299 .output_channels(8)
2300 .input_width(input_width)
2301 .input_height(3)
2302 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2303 }
2304 }
2305
2306 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_channels_lt_8) {
2307 TEST_REQUIRES_ARM_NEON_FMA;
2308 for (size_t output_channels = 1; output_channels < 8; output_channels++) {
2309 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2310 ConvHWCMicrokernelTester()
2311 .kernel_size(3)
2312 .subsampling(2)
2313 .padding_right(1)
2314 .input_channels(3)
2315 .output_channels_tile(8)
2316 .output_channels(output_channels)
2317 .input_width(input_width)
2318 .input_height(3)
2319 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2320 }
2321 }
2322 }
2323
2324 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_channels_div_8) {
2325 TEST_REQUIRES_ARM_NEON_FMA;
2326 for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) {
2327 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2328 ConvHWCMicrokernelTester()
2329 .kernel_size(3)
2330 .subsampling(2)
2331 .padding_right(1)
2332 .input_channels(3)
2333 .output_channels_tile(8)
2334 .output_channels(output_channels)
2335 .input_width(input_width)
2336 .input_height(3)
2337 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2338 }
2339 }
2340 }
2341
2342 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_channels_gt_8) {
2343 TEST_REQUIRES_ARM_NEON_FMA;
2344 for (size_t output_channels = 9; output_channels < 16; output_channels++) {
2345 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2346 ConvHWCMicrokernelTester()
2347 .kernel_size(3)
2348 .subsampling(2)
2349 .padding_right(1)
2350 .input_channels(3)
2351 .output_channels_tile(8)
2352 .output_channels(output_channels)
2353 .input_width(input_width)
2354 .input_height(3)
2355 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2356 }
2357 }
2358 }
2359
2360 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_height_lt_3) {
2361 TEST_REQUIRES_ARM_NEON_FMA;
2362 for (size_t input_height = 1; input_height < 3; input_height++) {
2363 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2364 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2365 ConvHWCMicrokernelTester()
2366 .kernel_size(3)
2367 .subsampling(2)
2368 .padding_height(1)
2369 .padding_right(1)
2370 .input_channels(3)
2371 .output_channels_tile(8)
2372 .output_channels(output_channels)
2373 .input_width(input_width)
2374 .input_height(input_height)
2375 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2376 }
2377 }
2378 }
2379 }
2380
2381 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_height_gt_3) {
2382 TEST_REQUIRES_ARM_NEON_FMA;
2383 for (size_t input_height = 4; input_height <= 9; input_height++) {
2384 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2385 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2386 ConvHWCMicrokernelTester()
2387 .kernel_size(3)
2388 .subsampling(2)
2389 .padding_right(1)
2390 .input_channels(3)
2391 .output_channels_tile(8)
2392 .output_channels(output_channels)
2393 .input_width(input_width)
2394 .input_height(input_height)
2395 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2396 }
2397 }
2398 }
2399 }
2400
2401 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, padding_top) {
2402 TEST_REQUIRES_ARM_NEON_FMA;
2403 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
2404 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2405 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2406 ConvHWCMicrokernelTester()
2407 .kernel_size(3)
2408 .subsampling(2)
2409 .padding_right(1)
2410 .padding_top(padding_top)
2411 .input_channels(3)
2412 .output_channels_tile(8)
2413 .output_channels(output_channels)
2414 .input_width(input_width)
2415 .input_height(9)
2416 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2417 }
2418 }
2419 }
2420 }
2421
2422 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, padding_bottom) {
2423 TEST_REQUIRES_ARM_NEON_FMA;
2424 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
2425 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2426 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2427 ConvHWCMicrokernelTester()
2428 .kernel_size(3)
2429 .subsampling(2)
2430 .padding_right(1)
2431 .padding_bottom(padding_bottom)
2432 .input_channels(3)
2433 .output_channels_tile(8)
2434 .output_channels(output_channels)
2435 .input_width(input_width)
2436 .input_height(9)
2437 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2438 }
2439 }
2440 }
2441 }
2442
2443 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_y_start) {
2444 TEST_REQUIRES_ARM_NEON_FMA;
2445 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
2446 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2447 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2448 ConvHWCMicrokernelTester()
2449 .kernel_size(3)
2450 .subsampling(2)
2451 .padding_right(1)
2452 .input_channels(3)
2453 .output_channels_tile(8)
2454 .output_channels(output_channels)
2455 .input_width(input_width)
2456 .input_height(9)
2457 .output_y_start(output_y_start)
2458 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2459 }
2460 }
2461 }
2462 }
2463
2464 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_y_end) {
2465 TEST_REQUIRES_ARM_NEON_FMA;
2466 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
2467 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2468 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2469 ConvHWCMicrokernelTester()
2470 .kernel_size(3)
2471 .subsampling(2)
2472 .padding_right(1)
2473 .input_channels(3)
2474 .output_channels_tile(8)
2475 .output_channels(output_channels)
2476 .input_width(input_width)
2477 .input_height(9)
2478 .output_y_end(output_y_end)
2479 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2480 }
2481 }
2482 }
2483 }
2484
2485 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, qmin) {
2486 TEST_REQUIRES_ARM_NEON_FMA;
2487 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2488 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2489 ConvHWCMicrokernelTester()
2490 .kernel_size(3)
2491 .subsampling(2)
2492 .padding_right(1)
2493 .input_channels(3)
2494 .output_channels_tile(8)
2495 .output_channels(output_channels)
2496 .input_width(input_width)
2497 .input_height(6)
2498 .qmin(128)
2499 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2500 }
2501 }
2502 }
2503
2504 TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, qmax) {
2505 TEST_REQUIRES_ARM_NEON_FMA;
2506 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2507 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2508 ConvHWCMicrokernelTester()
2509 .kernel_size(3)
2510 .subsampling(2)
2511 .padding_right(1)
2512 .input_channels(3)
2513 .output_channels_tile(8)
2514 .output_channels(output_channels)
2515 .input_width(input_width)
2516 .input_height(6)
2517 .qmax(128)
2518 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1);
2519 }
2520 }
2521 }
2522#endif // XNN_ARCH_ARM64
2523
2524#if XNN_ARCH_ARM64
2525 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_width_eq_2) {
2526 TEST_REQUIRES_ARM_NEON_FMA;
2527 ConvHWCMicrokernelTester()
2528 .kernel_size(3)
2529 .subsampling(2)
2530 .padding_right(1)
2531 .input_channels(3)
2532 .output_channels_tile(4)
2533 .output_channels(4)
2534 .input_width(2)
2535 .input_height(3)
2536 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2537 }
2538
2539 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_width_div_2) {
2540 TEST_REQUIRES_ARM_NEON_FMA;
2541 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
2542 ConvHWCMicrokernelTester()
2543 .kernel_size(3)
2544 .subsampling(2)
2545 .padding_right(1)
2546 .input_channels(3)
2547 .output_channels_tile(4)
2548 .output_channels(4)
2549 .input_width(input_width)
2550 .input_height(3)
2551 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2552 }
2553 }
2554
2555 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_width_gt_2) {
2556 TEST_REQUIRES_ARM_NEON_FMA;
2557 for (size_t input_width = 3; input_width < 4; input_width++) {
2558 ConvHWCMicrokernelTester()
2559 .kernel_size(3)
2560 .subsampling(2)
2561 .padding_right(1)
2562 .input_channels(3)
2563 .output_channels_tile(4)
2564 .output_channels(4)
2565 .input_width(input_width)
2566 .input_height(3)
2567 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2568 }
2569 }
2570
2571 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_channels_lt_4) {
2572 TEST_REQUIRES_ARM_NEON_FMA;
2573 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
2574 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2575 ConvHWCMicrokernelTester()
2576 .kernel_size(3)
2577 .subsampling(2)
2578 .padding_right(1)
2579 .input_channels(3)
2580 .output_channels_tile(4)
2581 .output_channels(output_channels)
2582 .input_width(input_width)
2583 .input_height(3)
2584 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2585 }
2586 }
2587 }
2588
2589 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_channels_div_4) {
2590 TEST_REQUIRES_ARM_NEON_FMA;
2591 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
2592 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2593 ConvHWCMicrokernelTester()
2594 .kernel_size(3)
2595 .subsampling(2)
2596 .padding_right(1)
2597 .input_channels(3)
2598 .output_channels_tile(4)
2599 .output_channels(output_channels)
2600 .input_width(input_width)
2601 .input_height(3)
2602 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2603 }
2604 }
2605 }
2606
2607 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_channels_gt_4) {
2608 TEST_REQUIRES_ARM_NEON_FMA;
2609 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
2610 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2611 ConvHWCMicrokernelTester()
2612 .kernel_size(3)
2613 .subsampling(2)
2614 .padding_right(1)
2615 .input_channels(3)
2616 .output_channels_tile(4)
2617 .output_channels(output_channels)
2618 .input_width(input_width)
2619 .input_height(3)
2620 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2621 }
2622 }
2623 }
2624
2625 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_height_lt_3) {
2626 TEST_REQUIRES_ARM_NEON_FMA;
2627 for (size_t input_height = 1; input_height < 3; input_height++) {
2628 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2629 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2630 ConvHWCMicrokernelTester()
2631 .kernel_size(3)
2632 .subsampling(2)
2633 .padding_right(1)
2634 .padding_height(1) // padded input height of at least 3 required
2635 .input_channels(3)
2636 .output_channels_tile(4)
2637 .output_channels(output_channels)
2638 .input_width(input_width)
2639 .input_height(input_height)
2640 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2641 }
2642 }
2643 }
2644 }
2645
2646 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_height_gt_3) {
2647 TEST_REQUIRES_ARM_NEON_FMA;
2648 for (size_t input_height = 4; input_height <= 9; input_height++) {
2649 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2650 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2651 ConvHWCMicrokernelTester()
2652 .kernel_size(3)
2653 .subsampling(2)
2654 .padding_right(1)
2655 .input_channels(3)
2656 .output_channels_tile(4)
2657 .output_channels(output_channels)
2658 .input_width(input_width)
2659 .input_height(input_height)
2660 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2661 }
2662 }
2663 }
2664 }
2665
2666 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, padding_top) {
2667 TEST_REQUIRES_ARM_NEON_FMA;
2668 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
2669 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2670 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2671 ConvHWCMicrokernelTester()
2672 .kernel_size(3)
2673 .subsampling(2)
2674 .padding_right(1)
2675 .padding_top(padding_top)
2676 .input_channels(3)
2677 .output_channels_tile(4)
2678 .output_channels(output_channels)
2679 .input_width(input_width)
2680 .input_height(9)
2681 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2682 }
2683 }
2684 }
2685 }
2686
2687 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, padding_bottom) {
2688 TEST_REQUIRES_ARM_NEON_FMA;
2689 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
2690 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2691 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2692 ConvHWCMicrokernelTester()
2693 .kernel_size(3)
2694 .subsampling(2)
2695 .padding_right(1)
2696 .padding_bottom(padding_bottom)
2697 .input_channels(3)
2698 .output_channels_tile(4)
2699 .output_channels(output_channels)
2700 .input_width(input_width)
2701 .input_height(9)
2702 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2703 }
2704 }
2705 }
2706 }
2707
2708 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_y_start) {
2709 TEST_REQUIRES_ARM_NEON_FMA;
2710 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
2711 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2712 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2713 ConvHWCMicrokernelTester()
2714 .kernel_size(3)
2715 .subsampling(2)
2716 .padding_right(1)
2717 .input_channels(3)
2718 .output_channels_tile(4)
2719 .output_channels(output_channels)
2720 .input_width(input_width)
2721 .input_height(9)
2722 .output_y_start(output_y_start)
2723 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2724 }
2725 }
2726 }
2727 }
2728
2729 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_y_end) {
2730 TEST_REQUIRES_ARM_NEON_FMA;
2731 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
2732 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2733 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2734 ConvHWCMicrokernelTester()
2735 .kernel_size(3)
2736 .subsampling(2)
2737 .padding_right(1)
2738 .input_channels(3)
2739 .output_channels_tile(4)
2740 .output_channels(output_channels)
2741 .input_width(input_width)
2742 .input_height(9)
2743 .output_y_end(output_y_end)
2744 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2745 }
2746 }
2747 }
2748 }
2749
2750 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, qmin) {
2751 TEST_REQUIRES_ARM_NEON_FMA;
2752 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2753 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2754 ConvHWCMicrokernelTester()
2755 .kernel_size(3)
2756 .subsampling(2)
2757 .padding_right(1)
2758 .input_channels(3)
2759 .output_channels_tile(4)
2760 .output_channels(output_channels)
2761 .input_width(input_width)
2762 .input_height(6)
2763 .qmin(128)
2764 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2765 }
2766 }
2767 }
2768
2769 TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, qmax) {
2770 TEST_REQUIRES_ARM_NEON_FMA;
2771 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
2772 for (size_t input_width = 2; input_width < 16; input_width += 3) {
2773 ConvHWCMicrokernelTester()
2774 .kernel_size(3)
2775 .subsampling(2)
2776 .padding_right(1)
2777 .input_channels(3)
2778 .output_channels_tile(4)
2779 .output_channels(output_channels)
2780 .input_width(input_width)
2781 .input_height(6)
2782 .qmax(128)
2783 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1);
2784 }
2785 }
2786 }
2787#endif // XNN_ARCH_ARM64
2788
2789#if XNN_ARCH_ARM64
2790 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_width_eq_2) {
2791 TEST_REQUIRES_ARM_NEON_FMA;
2792 ConvHWCMicrokernelTester()
2793 .kernel_size(3)
2794 .subsampling(2)
2795 .padding_width(1)
2796 .input_channels(3)
2797 .output_channels_tile(8)
2798 .output_channels(8)
2799 .input_width(2)
2800 .input_height(3)
2801 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2802 }
2803
2804 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_width_div_2) {
2805 TEST_REQUIRES_ARM_NEON_FMA;
2806 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
2807 ConvHWCMicrokernelTester()
2808 .kernel_size(3)
2809 .subsampling(2)
2810 .padding_width(1)
2811 .input_channels(3)
2812 .output_channels_tile(8)
2813 .output_channels(8)
2814 .input_width(input_width)
2815 .input_height(3)
2816 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2817 }
2818 }
2819
2820 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_width_gt_2) {
2821 TEST_REQUIRES_ARM_NEON_FMA;
2822 for (size_t input_width = 3; input_width < 4; input_width++) {
2823 ConvHWCMicrokernelTester()
2824 .kernel_size(3)
2825 .subsampling(2)
2826 .padding_width(1)
2827 .input_channels(3)
2828 .output_channels_tile(8)
2829 .output_channels(8)
2830 .input_width(input_width)
2831 .input_height(3)
2832 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2833 }
2834 }
2835
2836 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_channels_lt_8) {
2837 TEST_REQUIRES_ARM_NEON_FMA;
2838 for (size_t output_channels = 1; output_channels < 8; output_channels++) {
2839 for (size_t input_width = 1; input_width < 16; input_width += 3) {
2840 ConvHWCMicrokernelTester()
2841 .kernel_size(3)
2842 .subsampling(2)
2843 .padding_width(1)
2844 .input_channels(3)
2845 .output_channels_tile(8)
2846 .output_channels(output_channels)
2847 .input_width(input_width)
2848 .input_height(3)
2849 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2850 }
2851 }
2852 }
2853
2854 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_channels_div_8) {
2855 TEST_REQUIRES_ARM_NEON_FMA;
2856 for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) {
2857 for (size_t input_width = 1; input_width < 16; input_width += 3) {
2858 ConvHWCMicrokernelTester()
2859 .kernel_size(3)
2860 .subsampling(2)
2861 .padding_width(1)
2862 .input_channels(3)
2863 .output_channels_tile(8)
2864 .output_channels(output_channels)
2865 .input_width(input_width)
2866 .input_height(3)
2867 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2868 }
2869 }
2870 }
2871
2872 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_channels_gt_8) {
2873 TEST_REQUIRES_ARM_NEON_FMA;
2874 for (size_t output_channels = 9; output_channels < 16; output_channels++) {
2875 for (size_t input_width = 1; input_width < 16; input_width += 3) {
2876 ConvHWCMicrokernelTester()
2877 .kernel_size(3)
2878 .subsampling(2)
2879 .padding_width(1)
2880 .input_channels(3)
2881 .output_channels_tile(8)
2882 .output_channels(output_channels)
2883 .input_width(input_width)
2884 .input_height(3)
2885 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2886 }
2887 }
2888 }
2889
2890 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_height_lt_3) {
2891 TEST_REQUIRES_ARM_NEON_FMA;
2892 for (size_t input_height = 1; input_height < 3; input_height++) {
2893 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2894 for (size_t input_width = 1; input_width < 16; input_width += 3) {
2895 ConvHWCMicrokernelTester()
2896 .kernel_size(3)
2897 .subsampling(2)
2898 .padding_height(1)
2899 .padding_width(1)
2900 .input_channels(3)
2901 .output_channels_tile(8)
2902 .output_channels(output_channels)
2903 .input_width(input_width)
2904 .input_height(input_height)
2905 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2906 }
2907 }
2908 }
2909 }
2910
2911 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_height_gt_3) {
2912 TEST_REQUIRES_ARM_NEON_FMA;
2913 for (size_t input_height = 4; input_height <= 9; input_height++) {
2914 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2915 for (size_t input_width = 1; input_width < 16; input_width += 3) {
2916 ConvHWCMicrokernelTester()
2917 .kernel_size(3)
2918 .subsampling(2)
2919 .padding_width(1)
2920 .input_channels(3)
2921 .output_channels_tile(8)
2922 .output_channels(output_channels)
2923 .input_width(input_width)
2924 .input_height(input_height)
2925 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2926 }
2927 }
2928 }
2929 }
2930
2931 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, padding_top) {
2932 TEST_REQUIRES_ARM_NEON_FMA;
2933 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
2934 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2935 for (size_t input_width = 1; input_width < 16; input_width += 3) {
2936 ConvHWCMicrokernelTester()
2937 .kernel_size(3)
2938 .subsampling(2)
2939 .padding_width(1)
2940 .padding_top(padding_top)
2941 .input_channels(3)
2942 .output_channels_tile(8)
2943 .output_channels(output_channels)
2944 .input_width(input_width)
2945 .input_height(9)
2946 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2947 }
2948 }
2949 }
2950 }
2951
2952 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, padding_bottom) {
2953 TEST_REQUIRES_ARM_NEON_FMA;
2954 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
2955 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2956 for (size_t input_width = 1; input_width < 16; input_width += 3) {
2957 ConvHWCMicrokernelTester()
2958 .kernel_size(3)
2959 .subsampling(2)
2960 .padding_width(1)
2961 .padding_bottom(padding_bottom)
2962 .input_channels(3)
2963 .output_channels_tile(8)
2964 .output_channels(output_channels)
2965 .input_width(input_width)
2966 .input_height(9)
2967 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2968 }
2969 }
2970 }
2971 }
2972
2973 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_y_start) {
2974 TEST_REQUIRES_ARM_NEON_FMA;
2975 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
2976 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2977 for (size_t input_width = 1; input_width < 16; input_width += 3) {
2978 ConvHWCMicrokernelTester()
2979 .kernel_size(3)
2980 .subsampling(2)
2981 .padding_width(1)
2982 .input_channels(3)
2983 .output_channels_tile(8)
2984 .output_channels(output_channels)
2985 .input_width(input_width)
2986 .input_height(9)
2987 .output_y_start(output_y_start)
2988 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
2989 }
2990 }
2991 }
2992 }
2993
2994 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_y_end) {
2995 TEST_REQUIRES_ARM_NEON_FMA;
2996 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
2997 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
2998 for (size_t input_width = 1; input_width < 16; input_width += 3) {
2999 ConvHWCMicrokernelTester()
3000 .kernel_size(3)
3001 .subsampling(2)
3002 .padding_width(1)
3003 .input_channels(3)
3004 .output_channels_tile(8)
3005 .output_channels(output_channels)
3006 .input_width(input_width)
3007 .input_height(9)
3008 .output_y_end(output_y_end)
3009 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
3010 }
3011 }
3012 }
3013 }
3014
3015 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, qmin) {
3016 TEST_REQUIRES_ARM_NEON_FMA;
3017 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3018 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3019 ConvHWCMicrokernelTester()
3020 .kernel_size(3)
3021 .subsampling(2)
3022 .padding_width(1)
3023 .input_channels(3)
3024 .output_channels_tile(8)
3025 .output_channels(output_channels)
3026 .input_width(input_width)
3027 .input_height(6)
3028 .qmin(128)
3029 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
3030 }
3031 }
3032 }
3033
3034 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, qmax) {
3035 TEST_REQUIRES_ARM_NEON_FMA;
3036 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3037 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3038 ConvHWCMicrokernelTester()
3039 .kernel_size(3)
3040 .subsampling(2)
3041 .padding_width(1)
3042 .input_channels(3)
3043 .output_channels_tile(8)
3044 .output_channels(output_channels)
3045 .input_width(input_width)
3046 .input_height(6)
3047 .qmax(128)
3048 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1);
3049 }
3050 }
3051 }
3052#endif // XNN_ARCH_ARM64
3053
3054#if XNN_ARCH_ARM64
3055 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_width_eq_2) {
3056 TEST_REQUIRES_ARM_NEON_FMA;
3057 ConvHWCMicrokernelTester()
3058 .kernel_size(3)
3059 .subsampling(2)
3060 .padding_width(1)
3061 .input_channels(3)
3062 .output_channels_tile(4)
3063 .output_channels(4)
3064 .input_width(2)
3065 .input_height(3)
3066 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3067 }
3068
3069 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_width_div_2) {
3070 TEST_REQUIRES_ARM_NEON_FMA;
3071 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
3072 ConvHWCMicrokernelTester()
3073 .kernel_size(3)
3074 .subsampling(2)
3075 .padding_width(1)
3076 .input_channels(3)
3077 .output_channels_tile(4)
3078 .output_channels(4)
3079 .input_width(input_width)
3080 .input_height(3)
3081 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3082 }
3083 }
3084
3085 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_width_gt_2) {
3086 TEST_REQUIRES_ARM_NEON_FMA;
3087 for (size_t input_width = 3; input_width < 4; input_width++) {
3088 ConvHWCMicrokernelTester()
3089 .kernel_size(3)
3090 .subsampling(2)
3091 .padding_width(1)
3092 .input_channels(3)
3093 .output_channels_tile(4)
3094 .output_channels(4)
3095 .input_width(input_width)
3096 .input_height(3)
3097 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3098 }
3099 }
3100
3101 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_channels_lt_4) {
3102 TEST_REQUIRES_ARM_NEON_FMA;
3103 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
3104 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3105 ConvHWCMicrokernelTester()
3106 .kernel_size(3)
3107 .subsampling(2)
3108 .padding_width(1)
3109 .input_channels(3)
3110 .output_channels_tile(4)
3111 .output_channels(output_channels)
3112 .input_width(input_width)
3113 .input_height(3)
3114 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3115 }
3116 }
3117 }
3118
3119 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_channels_div_4) {
3120 TEST_REQUIRES_ARM_NEON_FMA;
3121 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
3122 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3123 ConvHWCMicrokernelTester()
3124 .kernel_size(3)
3125 .subsampling(2)
3126 .padding_width(1)
3127 .input_channels(3)
3128 .output_channels_tile(4)
3129 .output_channels(output_channels)
3130 .input_width(input_width)
3131 .input_height(3)
3132 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3133 }
3134 }
3135 }
3136
3137 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_channels_gt_4) {
3138 TEST_REQUIRES_ARM_NEON_FMA;
3139 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
3140 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3141 ConvHWCMicrokernelTester()
3142 .kernel_size(3)
3143 .subsampling(2)
3144 .padding_width(1)
3145 .input_channels(3)
3146 .output_channels_tile(4)
3147 .output_channels(output_channels)
3148 .input_width(input_width)
3149 .input_height(3)
3150 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3151 }
3152 }
3153 }
3154
3155 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_height_lt_3) {
3156 TEST_REQUIRES_ARM_NEON_FMA;
3157 for (size_t input_height = 1; input_height < 3; input_height++) {
3158 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3159 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3160 ConvHWCMicrokernelTester()
3161 .kernel_size(3)
3162 .subsampling(2)
3163 .padding_width(1)
3164 .padding_height(1) // padded input height of at least 3 required
3165 .input_channels(3)
3166 .output_channels_tile(4)
3167 .output_channels(output_channels)
3168 .input_width(input_width)
3169 .input_height(input_height)
3170 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3171 }
3172 }
3173 }
3174 }
3175
3176 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_height_gt_3) {
3177 TEST_REQUIRES_ARM_NEON_FMA;
3178 for (size_t input_height = 4; input_height <= 9; input_height++) {
3179 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3180 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3181 ConvHWCMicrokernelTester()
3182 .kernel_size(3)
3183 .subsampling(2)
3184 .padding_width(1)
3185 .input_channels(3)
3186 .output_channels_tile(4)
3187 .output_channels(output_channels)
3188 .input_width(input_width)
3189 .input_height(input_height)
3190 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3191 }
3192 }
3193 }
3194 }
3195
3196 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, padding_top) {
3197 TEST_REQUIRES_ARM_NEON_FMA;
3198 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
3199 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3200 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3201 ConvHWCMicrokernelTester()
3202 .kernel_size(3)
3203 .subsampling(2)
3204 .padding_width(1)
3205 .padding_top(padding_top)
3206 .input_channels(3)
3207 .output_channels_tile(4)
3208 .output_channels(output_channels)
3209 .input_width(input_width)
3210 .input_height(9)
3211 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3212 }
3213 }
3214 }
3215 }
3216
3217 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, padding_bottom) {
3218 TEST_REQUIRES_ARM_NEON_FMA;
3219 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
3220 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3221 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3222 ConvHWCMicrokernelTester()
3223 .kernel_size(3)
3224 .subsampling(2)
3225 .padding_width(1)
3226 .padding_bottom(padding_bottom)
3227 .input_channels(3)
3228 .output_channels_tile(4)
3229 .output_channels(output_channels)
3230 .input_width(input_width)
3231 .input_height(9)
3232 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3233 }
3234 }
3235 }
3236 }
3237
3238 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_y_start) {
3239 TEST_REQUIRES_ARM_NEON_FMA;
3240 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
3241 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3242 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3243 ConvHWCMicrokernelTester()
3244 .kernel_size(3)
3245 .subsampling(2)
3246 .padding_width(1)
3247 .input_channels(3)
3248 .output_channels_tile(4)
3249 .output_channels(output_channels)
3250 .input_width(input_width)
3251 .input_height(9)
3252 .output_y_start(output_y_start)
3253 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3254 }
3255 }
3256 }
3257 }
3258
3259 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_y_end) {
3260 TEST_REQUIRES_ARM_NEON_FMA;
3261 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
3262 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3263 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3264 ConvHWCMicrokernelTester()
3265 .kernel_size(3)
3266 .subsampling(2)
3267 .padding_width(1)
3268 .input_channels(3)
3269 .output_channels_tile(4)
3270 .output_channels(output_channels)
3271 .input_width(input_width)
3272 .input_height(9)
3273 .output_y_end(output_y_end)
3274 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3275 }
3276 }
3277 }
3278 }
3279
3280 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, qmin) {
3281 TEST_REQUIRES_ARM_NEON_FMA;
3282 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3283 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3284 ConvHWCMicrokernelTester()
3285 .kernel_size(3)
3286 .subsampling(2)
3287 .padding_width(1)
3288 .input_channels(3)
3289 .output_channels_tile(4)
3290 .output_channels(output_channels)
3291 .input_width(input_width)
3292 .input_height(6)
3293 .qmin(128)
3294 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3295 }
3296 }
3297 }
3298
3299 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, qmax) {
3300 TEST_REQUIRES_ARM_NEON_FMA;
3301 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3302 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3303 ConvHWCMicrokernelTester()
3304 .kernel_size(3)
3305 .subsampling(2)
3306 .padding_width(1)
3307 .input_channels(3)
3308 .output_channels_tile(4)
3309 .output_channels(output_channels)
3310 .input_width(input_width)
3311 .input_height(6)
3312 .qmax(128)
3313 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1);
3314 }
3315 }
3316 }
3317#endif // XNN_ARCH_ARM64
3318
3319#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3320 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_width_eq_2) {
3321 TEST_REQUIRES_ARM_NEON;
3322 ConvHWCMicrokernelTester()
3323 .kernel_size(3)
3324 .subsampling(2)
3325 .padding_width(1)
3326 .input_channels(3)
3327 .output_channels_tile(8)
3328 .output_channels(8)
3329 .input_width(2)
3330 .input_height(3)
3331 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3332 }
3333
3334 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_width_div_2) {
3335 TEST_REQUIRES_ARM_NEON;
3336 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
3337 ConvHWCMicrokernelTester()
3338 .kernel_size(3)
3339 .subsampling(2)
3340 .padding_width(1)
3341 .input_channels(3)
3342 .output_channels_tile(8)
3343 .output_channels(8)
3344 .input_width(input_width)
3345 .input_height(3)
3346 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3347 }
3348 }
3349
3350 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_width_gt_2) {
3351 TEST_REQUIRES_ARM_NEON;
3352 for (size_t input_width = 3; input_width < 4; input_width++) {
3353 ConvHWCMicrokernelTester()
3354 .kernel_size(3)
3355 .subsampling(2)
3356 .padding_width(1)
3357 .input_channels(3)
3358 .output_channels_tile(8)
3359 .output_channels(8)
3360 .input_width(input_width)
3361 .input_height(3)
3362 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3363 }
3364 }
3365
3366 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_channels_lt_8) {
3367 TEST_REQUIRES_ARM_NEON;
3368 for (size_t output_channels = 1; output_channels < 8; output_channels++) {
3369 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3370 ConvHWCMicrokernelTester()
3371 .kernel_size(3)
3372 .subsampling(2)
3373 .padding_width(1)
3374 .input_channels(3)
3375 .output_channels_tile(8)
3376 .output_channels(output_channels)
3377 .input_width(input_width)
3378 .input_height(3)
3379 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3380 }
3381 }
3382 }
3383
3384 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_channels_div_8) {
3385 TEST_REQUIRES_ARM_NEON;
3386 for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) {
3387 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3388 ConvHWCMicrokernelTester()
3389 .kernel_size(3)
3390 .subsampling(2)
3391 .padding_width(1)
3392 .input_channels(3)
3393 .output_channels_tile(8)
3394 .output_channels(output_channels)
3395 .input_width(input_width)
3396 .input_height(3)
3397 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3398 }
3399 }
3400 }
3401
3402 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_channels_gt_8) {
3403 TEST_REQUIRES_ARM_NEON;
3404 for (size_t output_channels = 9; output_channels < 16; output_channels++) {
3405 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3406 ConvHWCMicrokernelTester()
3407 .kernel_size(3)
3408 .subsampling(2)
3409 .padding_width(1)
3410 .input_channels(3)
3411 .output_channels_tile(8)
3412 .output_channels(output_channels)
3413 .input_width(input_width)
3414 .input_height(3)
3415 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3416 }
3417 }
3418 }
3419
3420 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_height_lt_3) {
3421 TEST_REQUIRES_ARM_NEON;
3422 for (size_t input_height = 1; input_height < 3; input_height++) {
3423 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3424 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3425 ConvHWCMicrokernelTester()
3426 .kernel_size(3)
3427 .subsampling(2)
3428 .padding_height(1)
3429 .padding_width(1)
3430 .input_channels(3)
3431 .output_channels_tile(8)
3432 .output_channels(output_channels)
3433 .input_width(input_width)
3434 .input_height(input_height)
3435 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3436 }
3437 }
3438 }
3439 }
3440
3441 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_height_gt_3) {
3442 TEST_REQUIRES_ARM_NEON;
3443 for (size_t input_height = 4; input_height <= 9; input_height++) {
3444 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3445 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3446 ConvHWCMicrokernelTester()
3447 .kernel_size(3)
3448 .subsampling(2)
3449 .padding_width(1)
3450 .input_channels(3)
3451 .output_channels_tile(8)
3452 .output_channels(output_channels)
3453 .input_width(input_width)
3454 .input_height(input_height)
3455 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3456 }
3457 }
3458 }
3459 }
3460
3461 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, padding_top) {
3462 TEST_REQUIRES_ARM_NEON;
3463 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
3464 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3465 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3466 ConvHWCMicrokernelTester()
3467 .kernel_size(3)
3468 .subsampling(2)
3469 .padding_width(1)
3470 .padding_top(padding_top)
3471 .input_channels(3)
3472 .output_channels_tile(8)
3473 .output_channels(output_channels)
3474 .input_width(input_width)
3475 .input_height(9)
3476 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3477 }
3478 }
3479 }
3480 }
3481
3482 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, padding_bottom) {
3483 TEST_REQUIRES_ARM_NEON;
3484 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
3485 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3486 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3487 ConvHWCMicrokernelTester()
3488 .kernel_size(3)
3489 .subsampling(2)
3490 .padding_width(1)
3491 .padding_bottom(padding_bottom)
3492 .input_channels(3)
3493 .output_channels_tile(8)
3494 .output_channels(output_channels)
3495 .input_width(input_width)
3496 .input_height(9)
3497 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3498 }
3499 }
3500 }
3501 }
3502
3503 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_y_start) {
3504 TEST_REQUIRES_ARM_NEON;
3505 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
3506 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3507 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3508 ConvHWCMicrokernelTester()
3509 .kernel_size(3)
3510 .subsampling(2)
3511 .padding_width(1)
3512 .input_channels(3)
3513 .output_channels_tile(8)
3514 .output_channels(output_channels)
3515 .input_width(input_width)
3516 .input_height(9)
3517 .output_y_start(output_y_start)
3518 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3519 }
3520 }
3521 }
3522 }
3523
3524 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_y_end) {
3525 TEST_REQUIRES_ARM_NEON;
3526 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
3527 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3528 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3529 ConvHWCMicrokernelTester()
3530 .kernel_size(3)
3531 .subsampling(2)
3532 .padding_width(1)
3533 .input_channels(3)
3534 .output_channels_tile(8)
3535 .output_channels(output_channels)
3536 .input_width(input_width)
3537 .input_height(9)
3538 .output_y_end(output_y_end)
3539 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3540 }
3541 }
3542 }
3543 }
3544
3545 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, qmin) {
3546 TEST_REQUIRES_ARM_NEON;
3547 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3548 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3549 ConvHWCMicrokernelTester()
3550 .kernel_size(3)
3551 .subsampling(2)
3552 .padding_width(1)
3553 .input_channels(3)
3554 .output_channels_tile(8)
3555 .output_channels(output_channels)
3556 .input_width(input_width)
3557 .input_height(6)
3558 .qmin(128)
3559 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3560 }
3561 }
3562 }
3563
3564 TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, qmax) {
3565 TEST_REQUIRES_ARM_NEON;
3566 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3567 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3568 ConvHWCMicrokernelTester()
3569 .kernel_size(3)
3570 .subsampling(2)
3571 .padding_width(1)
3572 .input_channels(3)
3573 .output_channels_tile(8)
3574 .output_channels(output_channels)
3575 .input_width(input_width)
3576 .input_height(6)
3577 .qmax(128)
3578 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1);
3579 }
3580 }
3581 }
3582#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3583
3584#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3585 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_width_eq_2) {
3586 TEST_REQUIRES_ARM_NEON;
3587 ConvHWCMicrokernelTester()
3588 .kernel_size(3)
3589 .subsampling(2)
3590 .padding_width(1)
3591 .input_channels(3)
3592 .output_channels_tile(4)
3593 .output_channels(4)
3594 .input_width(2)
3595 .input_height(3)
3596 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3597 }
3598
3599 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_width_div_2) {
3600 TEST_REQUIRES_ARM_NEON;
3601 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
3602 ConvHWCMicrokernelTester()
3603 .kernel_size(3)
3604 .subsampling(2)
3605 .padding_width(1)
3606 .input_channels(3)
3607 .output_channels_tile(4)
3608 .output_channels(4)
3609 .input_width(input_width)
3610 .input_height(3)
3611 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3612 }
3613 }
3614
3615 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_width_gt_2) {
3616 TEST_REQUIRES_ARM_NEON;
3617 for (size_t input_width = 3; input_width < 4; input_width++) {
3618 ConvHWCMicrokernelTester()
3619 .kernel_size(3)
3620 .subsampling(2)
3621 .padding_width(1)
3622 .input_channels(3)
3623 .output_channels_tile(4)
3624 .output_channels(4)
3625 .input_width(input_width)
3626 .input_height(3)
3627 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3628 }
3629 }
3630
3631 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_channels_lt_4) {
3632 TEST_REQUIRES_ARM_NEON;
3633 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
3634 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3635 ConvHWCMicrokernelTester()
3636 .kernel_size(3)
3637 .subsampling(2)
3638 .padding_width(1)
3639 .input_channels(3)
3640 .output_channels_tile(4)
3641 .output_channels(output_channels)
3642 .input_width(input_width)
3643 .input_height(3)
3644 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3645 }
3646 }
3647 }
3648
3649 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_channels_div_4) {
3650 TEST_REQUIRES_ARM_NEON;
3651 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
3652 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3653 ConvHWCMicrokernelTester()
3654 .kernel_size(3)
3655 .subsampling(2)
3656 .padding_width(1)
3657 .input_channels(3)
3658 .output_channels_tile(4)
3659 .output_channels(output_channels)
3660 .input_width(input_width)
3661 .input_height(3)
3662 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3663 }
3664 }
3665 }
3666
3667 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_channels_gt_4) {
3668 TEST_REQUIRES_ARM_NEON;
3669 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
3670 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3671 ConvHWCMicrokernelTester()
3672 .kernel_size(3)
3673 .subsampling(2)
3674 .padding_width(1)
3675 .input_channels(3)
3676 .output_channels_tile(4)
3677 .output_channels(output_channels)
3678 .input_width(input_width)
3679 .input_height(3)
3680 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3681 }
3682 }
3683 }
3684
3685 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_height_lt_3) {
3686 TEST_REQUIRES_ARM_NEON;
3687 for (size_t input_height = 1; input_height < 3; input_height++) {
3688 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3689 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3690 ConvHWCMicrokernelTester()
3691 .kernel_size(3)
3692 .subsampling(2)
3693 .padding_width(1)
3694 .padding_height(1) // padded input height of at least 3 required
3695 .input_channels(3)
3696 .output_channels_tile(4)
3697 .output_channels(output_channels)
3698 .input_width(input_width)
3699 .input_height(input_height)
3700 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3701 }
3702 }
3703 }
3704 }
3705
3706 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_height_gt_3) {
3707 TEST_REQUIRES_ARM_NEON;
3708 for (size_t input_height = 4; input_height <= 9; input_height++) {
3709 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3710 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3711 ConvHWCMicrokernelTester()
3712 .kernel_size(3)
3713 .subsampling(2)
3714 .padding_width(1)
3715 .input_channels(3)
3716 .output_channels_tile(4)
3717 .output_channels(output_channels)
3718 .input_width(input_width)
3719 .input_height(input_height)
3720 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3721 }
3722 }
3723 }
3724 }
3725
3726 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, padding_top) {
3727 TEST_REQUIRES_ARM_NEON;
3728 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
3729 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3730 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3731 ConvHWCMicrokernelTester()
3732 .kernel_size(3)
3733 .subsampling(2)
3734 .padding_width(1)
3735 .padding_top(padding_top)
3736 .input_channels(3)
3737 .output_channels_tile(4)
3738 .output_channels(output_channels)
3739 .input_width(input_width)
3740 .input_height(9)
3741 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3742 }
3743 }
3744 }
3745 }
3746
3747 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, padding_bottom) {
3748 TEST_REQUIRES_ARM_NEON;
3749 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
3750 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
3751 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3752 ConvHWCMicrokernelTester()
3753 .kernel_size(3)
3754 .subsampling(2)
3755 .padding_width(1)
3756 .padding_bottom(padding_bottom)
3757 .input_channels(3)
3758 .output_channels_tile(4)
3759 .output_channels(output_channels)
3760 .input_width(input_width)
3761 .input_height(9)
3762 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3763 }
3764 }
3765 }
3766 }
3767
3768 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_y_start) {
3769 TEST_REQUIRES_ARM_NEON;
3770 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
3771 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3772 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3773 ConvHWCMicrokernelTester()
3774 .kernel_size(3)
3775 .subsampling(2)
3776 .padding_width(1)
3777 .input_channels(3)
3778 .output_channels_tile(4)
3779 .output_channels(output_channels)
3780 .input_width(input_width)
3781 .input_height(9)
3782 .output_y_start(output_y_start)
3783 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3784 }
3785 }
3786 }
3787 }
3788
3789 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_y_end) {
3790 TEST_REQUIRES_ARM_NEON;
3791 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
3792 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3793 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3794 ConvHWCMicrokernelTester()
3795 .kernel_size(3)
3796 .subsampling(2)
3797 .padding_width(1)
3798 .input_channels(3)
3799 .output_channels_tile(4)
3800 .output_channels(output_channels)
3801 .input_width(input_width)
3802 .input_height(9)
3803 .output_y_end(output_y_end)
3804 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3805 }
3806 }
3807 }
3808 }
3809
3810 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, qmin) {
3811 TEST_REQUIRES_ARM_NEON;
3812 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3813 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3814 ConvHWCMicrokernelTester()
3815 .kernel_size(3)
3816 .subsampling(2)
3817 .padding_width(1)
3818 .input_channels(3)
3819 .output_channels_tile(4)
3820 .output_channels(output_channels)
3821 .input_width(input_width)
3822 .input_height(6)
3823 .qmin(128)
3824 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3825 }
3826 }
3827 }
3828
3829 TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, qmax) {
3830 TEST_REQUIRES_ARM_NEON;
3831 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3832 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3833 ConvHWCMicrokernelTester()
3834 .kernel_size(3)
3835 .subsampling(2)
3836 .padding_width(1)
3837 .input_channels(3)
3838 .output_channels_tile(4)
3839 .output_channels(output_channels)
3840 .input_width(input_width)
3841 .input_height(6)
3842 .qmax(128)
3843 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1);
3844 }
3845 }
3846 }
3847#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3848
Marat Dukhan6b7dfae2019-12-04 16:00:52 -08003849TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_eq_2) {
3850 TEST_REQUIRES_ARM_NEON_FMA;
3851 ConvHWCMicrokernelTester()
3852 .kernel_size(3)
3853 .subsampling(2)
3854 .padding_width(1)
3855 .input_channels(3)
3856 .output_channels_tile(4)
3857 .output_channels(4)
3858 .input_width(2)
3859 .input_height(3)
3860 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
3861}
3862
3863TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_div_2) {
3864 TEST_REQUIRES_ARM_NEON_FMA;
3865 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
3866 ConvHWCMicrokernelTester()
3867 .kernel_size(3)
3868 .subsampling(2)
3869 .padding_width(1)
3870 .input_channels(3)
3871 .output_channels_tile(4)
3872 .output_channels(4)
3873 .input_width(input_width)
3874 .input_height(3)
3875 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
3876 }
3877}
3878
3879TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_lt_2) {
3880 TEST_REQUIRES_ARM_NEON_FMA;
3881 for (size_t input_width = 1; input_width < 2; input_width++) {
3882 ConvHWCMicrokernelTester()
3883 .kernel_size(3)
3884 .subsampling(2)
3885 .padding_width(1)
3886 .input_channels(3)
3887 .output_channels_tile(4)
3888 .output_channels(4)
3889 .input_width(input_width)
3890 .input_height(3)
3891 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
3892 }
3893}
3894
3895TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_gt_2) {
3896 TEST_REQUIRES_ARM_NEON_FMA;
3897 for (size_t input_width = 3; input_width < 4; input_width++) {
3898 ConvHWCMicrokernelTester()
3899 .kernel_size(3)
3900 .subsampling(2)
3901 .padding_width(1)
3902 .input_channels(3)
3903 .output_channels_tile(4)
3904 .output_channels(4)
3905 .input_width(input_width)
3906 .input_height(3)
3907 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
3908 }
3909}
3910
3911TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_channels_lt_4) {
3912 TEST_REQUIRES_ARM_NEON_FMA;
3913 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
3914 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3915 ConvHWCMicrokernelTester()
3916 .kernel_size(3)
3917 .subsampling(2)
3918 .padding_width(1)
3919 .input_channels(3)
3920 .output_channels_tile(4)
3921 .output_channels(output_channels)
3922 .input_width(input_width)
3923 .input_height(3)
3924 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
3925 }
3926 }
3927}
3928
3929TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_channels_div_4) {
3930 TEST_REQUIRES_ARM_NEON_FMA;
3931 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
3932 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3933 ConvHWCMicrokernelTester()
3934 .kernel_size(3)
3935 .subsampling(2)
3936 .padding_width(1)
3937 .input_channels(3)
3938 .output_channels_tile(4)
3939 .output_channels(output_channels)
3940 .input_width(input_width)
3941 .input_height(3)
3942 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
3943 }
3944 }
3945}
3946
3947TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_channels_gt_4) {
3948 TEST_REQUIRES_ARM_NEON_FMA;
3949 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
3950 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3951 ConvHWCMicrokernelTester()
3952 .kernel_size(3)
3953 .subsampling(2)
3954 .padding_width(1)
3955 .input_channels(3)
3956 .output_channels_tile(4)
3957 .output_channels(output_channels)
3958 .input_width(input_width)
3959 .input_height(3)
3960 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
3961 }
3962 }
3963}
3964
3965TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_height_lt_3) {
3966 TEST_REQUIRES_ARM_NEON_FMA;
3967 for (size_t input_height = 1; input_height < 3; input_height++) {
3968 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3969 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3970 ConvHWCMicrokernelTester()
3971 .kernel_size(3)
3972 .subsampling(2)
3973 .padding(1)
3974 .input_channels(3) // padded input height of at least 3 required
3975 .output_channels_tile(4)
3976 .output_channels(output_channels)
3977 .input_width(input_width)
3978 .input_height(input_height)
3979 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
3980 }
3981 }
3982 }
3983}
3984
3985TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_height_gt_3) {
3986 TEST_REQUIRES_ARM_NEON_FMA;
3987 for (size_t input_height = 4; input_height <= 9; input_height++) {
3988 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
3989 for (size_t input_width = 1; input_width < 16; input_width += 3) {
3990 ConvHWCMicrokernelTester()
3991 .kernel_size(3)
3992 .subsampling(2)
3993 .padding_width(1)
3994 .input_channels(3)
3995 .output_channels_tile(4)
3996 .output_channels(output_channels)
3997 .input_width(input_width)
3998 .input_height(input_height)
3999 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
4000 }
4001 }
4002 }
4003}
4004
4005TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, padding_top) {
4006 TEST_REQUIRES_ARM_NEON_FMA;
4007 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
4008 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
4009 for (size_t input_width = 1; input_width < 16; input_width += 3) {
4010 ConvHWCMicrokernelTester()
4011 .kernel_size(3)
4012 .subsampling(2)
4013 .padding_width(1)
4014 .padding_top(padding_top)
4015 .input_channels(3)
4016 .output_channels_tile(4)
4017 .output_channels(output_channels)
4018 .input_width(input_width)
4019 .input_height(9)
4020 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
4021 }
4022 }
4023 }
4024}
4025
4026TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, padding_bottom) {
4027 TEST_REQUIRES_ARM_NEON_FMA;
4028 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
4029 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
4030 for (size_t input_width = 1; input_width < 16; input_width += 3) {
4031 ConvHWCMicrokernelTester()
4032 .kernel_size(3)
4033 .subsampling(2)
4034 .padding_width(1)
4035 .padding_bottom(padding_bottom)
4036 .input_channels(3)
4037 .output_channels_tile(4)
4038 .output_channels(output_channels)
4039 .input_width(input_width)
4040 .input_height(9)
4041 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
4042 }
4043 }
4044 }
4045}
4046
4047TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_y_start) {
4048 TEST_REQUIRES_ARM_NEON_FMA;
4049 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
4050 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4051 for (size_t input_width = 1; input_width < 16; input_width += 3) {
4052 ConvHWCMicrokernelTester()
4053 .kernel_size(3)
4054 .subsampling(2)
4055 .padding_width(1)
4056 .input_channels(3)
4057 .output_channels_tile(4)
4058 .output_channels(output_channels)
4059 .input_width(input_width)
4060 .input_height(9)
4061 .output_y_start(output_y_start)
4062 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
4063 }
4064 }
4065 }
4066}
4067
4068TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_y_end) {
4069 TEST_REQUIRES_ARM_NEON_FMA;
4070 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
4071 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4072 for (size_t input_width = 1; input_width < 16; input_width += 3) {
4073 ConvHWCMicrokernelTester()
4074 .kernel_size(3)
4075 .subsampling(2)
4076 .padding_width(1)
4077 .input_channels(3)
4078 .output_channels_tile(4)
4079 .output_channels(output_channels)
4080 .input_width(input_width)
4081 .input_height(9)
4082 .output_y_end(output_y_end)
4083 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
4084 }
4085 }
4086 }
4087}
4088
4089TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, qmin) {
4090 TEST_REQUIRES_ARM_NEON_FMA;
4091 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4092 for (size_t input_width = 1; input_width < 16; input_width += 3) {
4093 ConvHWCMicrokernelTester()
4094 .kernel_size(3)
4095 .subsampling(2)
4096 .padding_width(1)
4097 .input_channels(3)
4098 .output_channels_tile(4)
4099 .output_channels(output_channels)
4100 .input_width(input_width)
4101 .input_height(6)
4102 .qmin(128)
4103 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
4104 }
4105 }
4106}
4107
4108TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, qmax) {
4109 TEST_REQUIRES_ARM_NEON_FMA;
4110 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4111 for (size_t input_width = 1; input_width < 16; input_width += 3) {
4112 ConvHWCMicrokernelTester()
4113 .kernel_size(3)
4114 .subsampling(2)
4115 .padding_width(1)
4116 .input_channels(3)
4117 .output_channels_tile(4)
4118 .output_channels(output_channels)
4119 .input_width(input_width)
4120 .input_height(6)
4121 .qmax(128)
4122 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
4123 }
4124 }
4125}
Marat Dukhan441e2212019-12-04 18:30:49 -08004126
4127
4128TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_width_eq_2) {
4129 TEST_REQUIRES_ARM_NEON_FMA;
4130 ConvHWCMicrokernelTester()
4131 .kernel_size(3)
4132 .subsampling(2)
4133 .padding_right(1)
4134 .input_channels(3)
4135 .output_channels_tile(4)
4136 .output_channels(4)
4137 .input_width(2)
4138 .input_height(3)
4139 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4140}
4141
4142TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_width_div_2) {
4143 TEST_REQUIRES_ARM_NEON_FMA;
4144 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
4145 ConvHWCMicrokernelTester()
4146 .kernel_size(3)
4147 .subsampling(2)
4148 .padding_right(1)
4149 .input_channels(3)
4150 .output_channels_tile(4)
4151 .output_channels(4)
4152 .input_width(input_width)
4153 .input_height(3)
4154 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4155 }
4156}
4157
4158TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_width_gt_2) {
4159 TEST_REQUIRES_ARM_NEON_FMA;
4160 for (size_t input_width = 3; input_width < 4; input_width++) {
4161 ConvHWCMicrokernelTester()
4162 .kernel_size(3)
4163 .subsampling(2)
4164 .padding_right(1)
4165 .input_channels(3)
4166 .output_channels_tile(4)
4167 .output_channels(4)
4168 .input_width(input_width)
4169 .input_height(3)
4170 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4171 }
4172}
4173
4174TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_channels_lt_4) {
4175 TEST_REQUIRES_ARM_NEON_FMA;
4176 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
4177 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4178 ConvHWCMicrokernelTester()
4179 .kernel_size(3)
4180 .subsampling(2)
4181 .padding_right(1)
4182 .input_channels(3)
4183 .output_channels_tile(4)
4184 .output_channels(output_channels)
4185 .input_width(input_width)
4186 .input_height(3)
4187 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4188 }
4189 }
4190}
4191
4192TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_channels_div_4) {
4193 TEST_REQUIRES_ARM_NEON_FMA;
4194 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
4195 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4196 ConvHWCMicrokernelTester()
4197 .kernel_size(3)
4198 .subsampling(2)
4199 .padding_right(1)
4200 .input_channels(3)
4201 .output_channels_tile(4)
4202 .output_channels(output_channels)
4203 .input_width(input_width)
4204 .input_height(3)
4205 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4206 }
4207 }
4208}
4209
4210TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_channels_gt_4) {
4211 TEST_REQUIRES_ARM_NEON_FMA;
4212 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
4213 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4214 ConvHWCMicrokernelTester()
4215 .kernel_size(3)
4216 .subsampling(2)
4217 .padding_right(1)
4218 .input_channels(3)
4219 .output_channels_tile(4)
4220 .output_channels(output_channels)
4221 .input_width(input_width)
4222 .input_height(3)
4223 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4224 }
4225 }
4226}
4227
4228TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_height_lt_3) {
4229 TEST_REQUIRES_ARM_NEON_FMA;
4230 for (size_t input_height = 1; input_height < 3; input_height++) {
4231 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4232 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4233 ConvHWCMicrokernelTester()
4234 .kernel_size(3)
4235 .subsampling(2)
4236 .padding_right(1)
Marat Dukhan7e4ca402020-05-15 18:50:12 -07004237 .padding_height(1) // padded input height of at least 3 required
4238 .input_channels(3)
Marat Dukhan441e2212019-12-04 18:30:49 -08004239 .output_channels_tile(4)
4240 .output_channels(output_channels)
4241 .input_width(input_width)
4242 .input_height(input_height)
4243 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4244 }
4245 }
4246 }
4247}
4248
4249TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_height_gt_3) {
4250 TEST_REQUIRES_ARM_NEON_FMA;
4251 for (size_t input_height = 4; input_height <= 9; input_height++) {
4252 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4253 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4254 ConvHWCMicrokernelTester()
4255 .kernel_size(3)
4256 .subsampling(2)
4257 .padding_right(1)
4258 .input_channels(3)
4259 .output_channels_tile(4)
4260 .output_channels(output_channels)
4261 .input_width(input_width)
4262 .input_height(input_height)
4263 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4264 }
4265 }
4266 }
4267}
4268
4269TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, padding_top) {
4270 TEST_REQUIRES_ARM_NEON_FMA;
4271 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
4272 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
4273 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4274 ConvHWCMicrokernelTester()
4275 .kernel_size(3)
4276 .subsampling(2)
4277 .padding_right(1)
4278 .padding_top(padding_top)
4279 .input_channels(3)
4280 .output_channels_tile(4)
4281 .output_channels(output_channels)
4282 .input_width(input_width)
4283 .input_height(9)
4284 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4285 }
4286 }
4287 }
4288}
4289
4290TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, padding_bottom) {
4291 TEST_REQUIRES_ARM_NEON_FMA;
4292 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
4293 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
4294 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4295 ConvHWCMicrokernelTester()
4296 .kernel_size(3)
4297 .subsampling(2)
4298 .padding_right(1)
4299 .padding_bottom(padding_bottom)
4300 .input_channels(3)
4301 .output_channels_tile(4)
4302 .output_channels(output_channels)
4303 .input_width(input_width)
4304 .input_height(9)
4305 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4306 }
4307 }
4308 }
4309}
4310
4311TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_y_start) {
4312 TEST_REQUIRES_ARM_NEON_FMA;
4313 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
4314 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4315 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4316 ConvHWCMicrokernelTester()
4317 .kernel_size(3)
4318 .subsampling(2)
4319 .padding_right(1)
4320 .input_channels(3)
4321 .output_channels_tile(4)
4322 .output_channels(output_channels)
4323 .input_width(input_width)
4324 .input_height(9)
4325 .output_y_start(output_y_start)
4326 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4327 }
4328 }
4329 }
4330}
4331
4332TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_y_end) {
4333 TEST_REQUIRES_ARM_NEON_FMA;
4334 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
4335 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4336 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4337 ConvHWCMicrokernelTester()
4338 .kernel_size(3)
4339 .subsampling(2)
4340 .padding_right(1)
4341 .input_channels(3)
4342 .output_channels_tile(4)
4343 .output_channels(output_channels)
4344 .input_width(input_width)
4345 .input_height(9)
4346 .output_y_end(output_y_end)
4347 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4348 }
4349 }
4350 }
4351}
4352
4353TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, qmin) {
4354 TEST_REQUIRES_ARM_NEON_FMA;
4355 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4356 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4357 ConvHWCMicrokernelTester()
4358 .kernel_size(3)
4359 .subsampling(2)
4360 .padding_right(1)
4361 .input_channels(3)
4362 .output_channels_tile(4)
4363 .output_channels(output_channels)
4364 .input_width(input_width)
4365 .input_height(6)
4366 .qmin(128)
4367 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4368 }
4369 }
4370}
4371
4372TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, qmax) {
4373 TEST_REQUIRES_ARM_NEON_FMA;
4374 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
4375 for (size_t input_width = 2; input_width < 16; input_width += 3) {
4376 ConvHWCMicrokernelTester()
4377 .kernel_size(3)
4378 .subsampling(2)
4379 .padding_right(1)
4380 .input_channels(3)
4381 .output_channels_tile(4)
4382 .output_channels(output_channels)
4383 .input_width(input_width)
4384 .input_height(6)
4385 .qmax(128)
4386 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
4387 }
4388 }
4389}