blob: b3f6db39a159f652411953d351081cca9b077e2e [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
XNNPACK Teamb455b122019-09-27 18:10:33 -07006#include <gtest/gtest.h>
7
Marat Dukhan1dadbf72019-10-01 10:46:20 -07008#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -07009#include <xnnpack/isa-checks.h>
10
Marat Dukhan1dadbf72019-10-01 10:46:20 -070011#include <xnnpack/conv.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include "conv-hwc-microkernel-tester.h"
13
14
Marat Dukhan1dadbf72019-10-01 10:46:20 -070015#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070016 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_eq_4) {
17 TEST_REQUIRES_ARM_NEON_FMA;
18 ConvHWCMicrokernelTester()
19 .kernel_size(3)
20 .subsampling(2)
21 .padding_width(1)
22 .input_channels(3)
23 .output_channels_tile(8)
24 .output_channels(8)
25 .input_width(4)
26 .input_height(3)
27 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
28 }
29
30 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_div_4) {
31 TEST_REQUIRES_ARM_NEON_FMA;
32 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
33 ConvHWCMicrokernelTester()
34 .kernel_size(3)
35 .subsampling(2)
36 .padding_width(1)
37 .input_channels(3)
38 .output_channels_tile(8)
39 .output_channels(8)
40 .input_width(input_width)
41 .input_height(3)
42 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
43 }
44 }
45
46 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_lt_4) {
47 TEST_REQUIRES_ARM_NEON_FMA;
48 for (size_t input_width = 1; input_width < 4; input_width++) {
49 ConvHWCMicrokernelTester()
50 .kernel_size(3)
51 .subsampling(2)
52 .padding_width(1)
53 .input_channels(3)
54 .output_channels_tile(8)
55 .output_channels(8)
56 .input_width(input_width)
57 .input_height(3)
58 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
59 }
60 }
61
62 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_gt_4) {
63 TEST_REQUIRES_ARM_NEON_FMA;
64 for (size_t input_width = 5; input_width < 8; input_width++) {
65 ConvHWCMicrokernelTester()
66 .kernel_size(3)
67 .subsampling(2)
68 .padding_width(1)
69 .input_channels(3)
70 .output_channels_tile(8)
71 .output_channels(8)
72 .input_width(input_width)
73 .input_height(3)
74 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
75 }
76 }
77
78 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_channels_lt_8) {
79 TEST_REQUIRES_ARM_NEON_FMA;
80 for (size_t output_channels = 1; output_channels < 8; output_channels++) {
81 for (size_t input_width = 1; input_width < 32; input_width += 7) {
82 ConvHWCMicrokernelTester()
83 .kernel_size(3)
84 .subsampling(2)
85 .padding_width(1)
86 .input_channels(3)
87 .output_channels_tile(8)
88 .output_channels(output_channels)
89 .input_width(input_width)
90 .input_height(3)
91 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
92 }
93 }
94 }
95
96 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_channels_div_8) {
97 TEST_REQUIRES_ARM_NEON_FMA;
98 for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) {
99 for (size_t input_width = 1; input_width < 32; input_width += 7) {
100 ConvHWCMicrokernelTester()
101 .kernel_size(3)
102 .subsampling(2)
103 .padding_width(1)
104 .input_channels(3)
105 .output_channels_tile(8)
106 .output_channels(output_channels)
107 .input_width(input_width)
108 .input_height(3)
109 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
110 }
111 }
112 }
113
114 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_channels_gt_8) {
115 TEST_REQUIRES_ARM_NEON_FMA;
116 for (size_t output_channels = 9; output_channels < 16; output_channels++) {
117 for (size_t input_width = 1; input_width < 32; input_width += 7) {
118 ConvHWCMicrokernelTester()
119 .kernel_size(3)
120 .subsampling(2)
121 .padding_width(1)
122 .input_channels(3)
123 .output_channels_tile(8)
124 .output_channels(output_channels)
125 .input_width(input_width)
126 .input_height(3)
127 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
128 }
129 }
130 }
131
132 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_height_lt_3) {
133 TEST_REQUIRES_ARM_NEON_FMA;
134 for (size_t input_height = 1; input_height < 3; input_height++) {
135 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
136 for (size_t input_width = 1; input_width < 32; input_width += 7) {
137 ConvHWCMicrokernelTester()
138 .kernel_size(3)
139 .subsampling(2)
140 .padding(1) // padded input height of at least 3 required
141 .input_channels(3)
142 .output_channels_tile(8)
143 .output_channels(output_channels)
144 .input_width(input_width)
145 .input_height(input_height)
146 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
147 }
148 }
149 }
150 }
151
152 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_height_gt_3) {
153 TEST_REQUIRES_ARM_NEON_FMA;
154 for (size_t input_height = 4; input_height <= 9; input_height++) {
155 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
156 for (size_t input_width = 1; input_width < 32; input_width += 7) {
157 ConvHWCMicrokernelTester()
158 .kernel_size(3)
159 .subsampling(2)
160 .padding_width(1)
161 .input_channels(3)
162 .output_channels_tile(8)
163 .output_channels(output_channels)
164 .input_width(input_width)
165 .input_height(input_height)
166 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
167 }
168 }
169 }
170 }
171
172 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, padding_top) {
173 TEST_REQUIRES_ARM_NEON_FMA;
174 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
175 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
176 for (size_t input_width = 1; input_width < 32; input_width += 7) {
177 ConvHWCMicrokernelTester()
178 .kernel_size(3)
179 .subsampling(2)
180 .padding_width(1)
181 .padding_top(padding_top)
182 .input_channels(3)
183 .output_channels_tile(8)
184 .output_channels(output_channels)
185 .input_width(input_width)
186 .input_height(9)
187 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
188 }
189 }
190 }
191 }
192
193 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, padding_bottom) {
194 TEST_REQUIRES_ARM_NEON_FMA;
195 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
196 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
197 for (size_t input_width = 1; input_width < 32; input_width += 7) {
198 ConvHWCMicrokernelTester()
199 .kernel_size(3)
200 .subsampling(2)
201 .padding_width(1)
202 .padding_bottom(padding_bottom)
203 .input_channels(3)
204 .output_channels_tile(8)
205 .output_channels(output_channels)
206 .input_width(input_width)
207 .input_height(9)
208 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
209 }
210 }
211 }
212 }
213
214 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_y_start) {
215 TEST_REQUIRES_ARM_NEON_FMA;
216 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
217 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
218 for (size_t input_width = 1; input_width < 32; input_width += 7) {
219 ConvHWCMicrokernelTester()
220 .kernel_size(3)
221 .subsampling(2)
222 .padding_width(1)
223 .input_channels(3)
224 .output_channels_tile(8)
225 .output_channels(output_channels)
226 .input_width(input_width)
227 .input_height(9)
228 .output_y_start(output_y_start)
229 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
230 }
231 }
232 }
233 }
234
235 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_y_end) {
236 TEST_REQUIRES_ARM_NEON_FMA;
237 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
238 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
239 for (size_t input_width = 1; input_width < 32; input_width += 7) {
240 ConvHWCMicrokernelTester()
241 .kernel_size(3)
242 .subsampling(2)
243 .padding_width(1)
244 .input_channels(3)
245 .output_channels_tile(8)
246 .output_channels(output_channels)
247 .input_width(input_width)
248 .input_height(9)
249 .output_y_end(output_y_end)
250 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
251 }
252 }
253 }
254 }
255
256 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, qmin) {
257 TEST_REQUIRES_ARM_NEON_FMA;
258 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
259 for (size_t input_width = 1; input_width < 32; input_width += 7) {
260 ConvHWCMicrokernelTester()
261 .kernel_size(3)
262 .subsampling(2)
263 .padding_width(1)
264 .input_channels(3)
265 .output_channels_tile(8)
266 .output_channels(output_channels)
267 .input_width(input_width)
268 .input_height(6)
269 .qmin(128)
270 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
271 }
272 }
273 }
274
275 TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, qmax) {
276 TEST_REQUIRES_ARM_NEON_FMA;
277 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
278 for (size_t input_width = 1; input_width < 32; input_width += 7) {
279 ConvHWCMicrokernelTester()
280 .kernel_size(3)
281 .subsampling(2)
282 .padding_width(1)
283 .input_channels(3)
284 .output_channels_tile(8)
285 .output_channels(output_channels)
286 .input_width(input_width)
287 .input_height(6)
288 .qmax(128)
289 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2);
290 }
291 }
292 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700293#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700294
295
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700296#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700297 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
298 TEST_REQUIRES_ARM_NEON_FMA;
299 ConvHWCMicrokernelTester()
300 .kernel_size(3)
301 .subsampling(2)
302 .padding_width(1)
303 .input_channels(3)
304 .output_channels_tile(4)
305 .output_channels(4)
306 .input_width(4)
307 .input_height(3)
308 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
309 }
310
311 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_div_4) {
312 TEST_REQUIRES_ARM_NEON_FMA;
313 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
314 ConvHWCMicrokernelTester()
315 .kernel_size(3)
316 .subsampling(2)
317 .padding_width(1)
318 .input_channels(3)
319 .output_channels_tile(4)
320 .output_channels(4)
321 .input_width(input_width)
322 .input_height(3)
323 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
324 }
325 }
326
327 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_lt_4) {
328 TEST_REQUIRES_ARM_NEON_FMA;
329 for (size_t input_width = 1; input_width < 4; input_width++) {
330 ConvHWCMicrokernelTester()
331 .kernel_size(3)
332 .subsampling(2)
333 .padding_width(1)
334 .input_channels(3)
335 .output_channels_tile(4)
336 .output_channels(4)
337 .input_width(input_width)
338 .input_height(3)
339 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
340 }
341 }
342
343 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_gt_4) {
344 TEST_REQUIRES_ARM_NEON_FMA;
345 for (size_t input_width = 5; input_width < 8; input_width++) {
346 ConvHWCMicrokernelTester()
347 .kernel_size(3)
348 .subsampling(2)
349 .padding_width(1)
350 .input_channels(3)
351 .output_channels_tile(4)
352 .output_channels(4)
353 .input_width(input_width)
354 .input_height(3)
355 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
356 }
357 }
358
359 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_channels_lt_4) {
360 TEST_REQUIRES_ARM_NEON_FMA;
361 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
362 for (size_t input_width = 1; input_width < 32; input_width += 7) {
363 ConvHWCMicrokernelTester()
364 .kernel_size(3)
365 .subsampling(2)
366 .padding_width(1)
367 .input_channels(3)
368 .output_channels_tile(4)
369 .output_channels(output_channels)
370 .input_width(input_width)
371 .input_height(3)
372 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
373 }
374 }
375 }
376
377 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_channels_div_4) {
378 TEST_REQUIRES_ARM_NEON_FMA;
379 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
380 for (size_t input_width = 1; input_width < 32; input_width += 7) {
381 ConvHWCMicrokernelTester()
382 .kernel_size(3)
383 .subsampling(2)
384 .padding_width(1)
385 .input_channels(3)
386 .output_channels_tile(4)
387 .output_channels(output_channels)
388 .input_width(input_width)
389 .input_height(3)
390 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
391 }
392 }
393 }
394
395 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_channels_gt_4) {
396 TEST_REQUIRES_ARM_NEON_FMA;
397 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
398 for (size_t input_width = 1; input_width < 32; input_width += 7) {
399 ConvHWCMicrokernelTester()
400 .kernel_size(3)
401 .subsampling(2)
402 .padding_width(1)
403 .input_channels(3)
404 .output_channels_tile(4)
405 .output_channels(output_channels)
406 .input_width(input_width)
407 .input_height(3)
408 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
409 }
410 }
411 }
412
413 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_height_lt_3) {
414 TEST_REQUIRES_ARM_NEON_FMA;
415 for (size_t input_height = 1; input_height < 3; input_height++) {
416 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
417 for (size_t input_width = 1; input_width < 32; input_width += 7) {
418 ConvHWCMicrokernelTester()
419 .kernel_size(3)
420 .subsampling(2)
421 .padding(1)
422 .input_channels(3) // padded input height of at least 3 required
423 .output_channels_tile(4)
424 .output_channels(output_channels)
425 .input_width(input_width)
426 .input_height(input_height)
427 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
428 }
429 }
430 }
431 }
432
433 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_height_gt_3) {
434 TEST_REQUIRES_ARM_NEON_FMA;
435 for (size_t input_height = 4; input_height <= 9; input_height++) {
436 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
437 for (size_t input_width = 1; input_width < 32; input_width += 7) {
438 ConvHWCMicrokernelTester()
439 .kernel_size(3)
440 .subsampling(2)
441 .padding_width(1)
442 .input_channels(3)
443 .output_channels_tile(4)
444 .output_channels(output_channels)
445 .input_width(input_width)
446 .input_height(input_height)
447 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
448 }
449 }
450 }
451 }
452
453 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, padding_top) {
454 TEST_REQUIRES_ARM_NEON_FMA;
455 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
456 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
457 for (size_t input_width = 1; input_width < 32; input_width += 7) {
458 ConvHWCMicrokernelTester()
459 .kernel_size(3)
460 .subsampling(2)
461 .padding_width(1)
462 .padding_top(padding_top)
463 .input_channels(3)
464 .output_channels_tile(4)
465 .output_channels(output_channels)
466 .input_width(input_width)
467 .input_height(9)
468 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
469 }
470 }
471 }
472 }
473
474 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, padding_bottom) {
475 TEST_REQUIRES_ARM_NEON_FMA;
476 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
477 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
478 for (size_t input_width = 1; input_width < 32; input_width += 7) {
479 ConvHWCMicrokernelTester()
480 .kernel_size(3)
481 .subsampling(2)
482 .padding_width(1)
483 .padding_bottom(padding_bottom)
484 .input_channels(3)
485 .output_channels_tile(4)
486 .output_channels(output_channels)
487 .input_width(input_width)
488 .input_height(9)
489 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
490 }
491 }
492 }
493 }
494
495 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_y_start) {
496 TEST_REQUIRES_ARM_NEON_FMA;
497 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
498 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
499 for (size_t input_width = 1; input_width < 32; input_width += 7) {
500 ConvHWCMicrokernelTester()
501 .kernel_size(3)
502 .subsampling(2)
503 .padding_width(1)
504 .input_channels(3)
505 .output_channels_tile(4)
506 .output_channels(output_channels)
507 .input_width(input_width)
508 .input_height(9)
509 .output_y_start(output_y_start)
510 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
511 }
512 }
513 }
514 }
515
516 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_y_end) {
517 TEST_REQUIRES_ARM_NEON_FMA;
518 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
519 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
520 for (size_t input_width = 1; input_width < 32; input_width += 7) {
521 ConvHWCMicrokernelTester()
522 .kernel_size(3)
523 .subsampling(2)
524 .padding_width(1)
525 .input_channels(3)
526 .output_channels_tile(4)
527 .output_channels(output_channels)
528 .input_width(input_width)
529 .input_height(9)
530 .output_y_end(output_y_end)
531 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
532 }
533 }
534 }
535 }
536
537 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, qmin) {
538 TEST_REQUIRES_ARM_NEON_FMA;
539 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
540 for (size_t input_width = 1; input_width < 32; input_width += 7) {
541 ConvHWCMicrokernelTester()
542 .kernel_size(3)
543 .subsampling(2)
544 .padding_width(1)
545 .input_channels(3)
546 .output_channels_tile(4)
547 .output_channels(output_channels)
548 .input_width(input_width)
549 .input_height(6)
550 .qmin(128)
551 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
552 }
553 }
554 }
555
556 TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, qmax) {
557 TEST_REQUIRES_ARM_NEON_FMA;
558 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
559 for (size_t input_width = 1; input_width < 32; input_width += 7) {
560 ConvHWCMicrokernelTester()
561 .kernel_size(3)
562 .subsampling(2)
563 .padding_width(1)
564 .input_channels(3)
565 .output_channels_tile(4)
566 .output_channels(output_channels)
567 .input_width(input_width)
568 .input_height(6)
569 .qmax(128)
570 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2);
571 }
572 }
573 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700574#endif // XNN_ARCH_ARM64
Marat Dukhan6b7dfae2019-12-04 16:00:52 -0800575
576
577TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_eq_2) {
578 TEST_REQUIRES_ARM_NEON_FMA;
579 ConvHWCMicrokernelTester()
580 .kernel_size(3)
581 .subsampling(2)
582 .padding_width(1)
583 .input_channels(3)
584 .output_channels_tile(4)
585 .output_channels(4)
586 .input_width(2)
587 .input_height(3)
588 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
589}
590
591TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_div_2) {
592 TEST_REQUIRES_ARM_NEON_FMA;
593 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
594 ConvHWCMicrokernelTester()
595 .kernel_size(3)
596 .subsampling(2)
597 .padding_width(1)
598 .input_channels(3)
599 .output_channels_tile(4)
600 .output_channels(4)
601 .input_width(input_width)
602 .input_height(3)
603 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
604 }
605}
606
607TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_lt_2) {
608 TEST_REQUIRES_ARM_NEON_FMA;
609 for (size_t input_width = 1; input_width < 2; input_width++) {
610 ConvHWCMicrokernelTester()
611 .kernel_size(3)
612 .subsampling(2)
613 .padding_width(1)
614 .input_channels(3)
615 .output_channels_tile(4)
616 .output_channels(4)
617 .input_width(input_width)
618 .input_height(3)
619 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
620 }
621}
622
623TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_gt_2) {
624 TEST_REQUIRES_ARM_NEON_FMA;
625 for (size_t input_width = 3; input_width < 4; input_width++) {
626 ConvHWCMicrokernelTester()
627 .kernel_size(3)
628 .subsampling(2)
629 .padding_width(1)
630 .input_channels(3)
631 .output_channels_tile(4)
632 .output_channels(4)
633 .input_width(input_width)
634 .input_height(3)
635 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
636 }
637}
638
639TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_channels_lt_4) {
640 TEST_REQUIRES_ARM_NEON_FMA;
641 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
642 for (size_t input_width = 1; input_width < 16; input_width += 3) {
643 ConvHWCMicrokernelTester()
644 .kernel_size(3)
645 .subsampling(2)
646 .padding_width(1)
647 .input_channels(3)
648 .output_channels_tile(4)
649 .output_channels(output_channels)
650 .input_width(input_width)
651 .input_height(3)
652 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
653 }
654 }
655}
656
657TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_channels_div_4) {
658 TEST_REQUIRES_ARM_NEON_FMA;
659 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
660 for (size_t input_width = 1; input_width < 16; input_width += 3) {
661 ConvHWCMicrokernelTester()
662 .kernel_size(3)
663 .subsampling(2)
664 .padding_width(1)
665 .input_channels(3)
666 .output_channels_tile(4)
667 .output_channels(output_channels)
668 .input_width(input_width)
669 .input_height(3)
670 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
671 }
672 }
673}
674
675TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_channels_gt_4) {
676 TEST_REQUIRES_ARM_NEON_FMA;
677 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
678 for (size_t input_width = 1; input_width < 16; input_width += 3) {
679 ConvHWCMicrokernelTester()
680 .kernel_size(3)
681 .subsampling(2)
682 .padding_width(1)
683 .input_channels(3)
684 .output_channels_tile(4)
685 .output_channels(output_channels)
686 .input_width(input_width)
687 .input_height(3)
688 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
689 }
690 }
691}
692
693TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_height_lt_3) {
694 TEST_REQUIRES_ARM_NEON_FMA;
695 for (size_t input_height = 1; input_height < 3; input_height++) {
696 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
697 for (size_t input_width = 1; input_width < 16; input_width += 3) {
698 ConvHWCMicrokernelTester()
699 .kernel_size(3)
700 .subsampling(2)
701 .padding(1)
702 .input_channels(3) // padded input height of at least 3 required
703 .output_channels_tile(4)
704 .output_channels(output_channels)
705 .input_width(input_width)
706 .input_height(input_height)
707 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
708 }
709 }
710 }
711}
712
713TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_height_gt_3) {
714 TEST_REQUIRES_ARM_NEON_FMA;
715 for (size_t input_height = 4; input_height <= 9; input_height++) {
716 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
717 for (size_t input_width = 1; input_width < 16; input_width += 3) {
718 ConvHWCMicrokernelTester()
719 .kernel_size(3)
720 .subsampling(2)
721 .padding_width(1)
722 .input_channels(3)
723 .output_channels_tile(4)
724 .output_channels(output_channels)
725 .input_width(input_width)
726 .input_height(input_height)
727 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
728 }
729 }
730 }
731}
732
733TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, padding_top) {
734 TEST_REQUIRES_ARM_NEON_FMA;
735 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
736 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
737 for (size_t input_width = 1; input_width < 16; input_width += 3) {
738 ConvHWCMicrokernelTester()
739 .kernel_size(3)
740 .subsampling(2)
741 .padding_width(1)
742 .padding_top(padding_top)
743 .input_channels(3)
744 .output_channels_tile(4)
745 .output_channels(output_channels)
746 .input_width(input_width)
747 .input_height(9)
748 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
749 }
750 }
751 }
752}
753
754TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, padding_bottom) {
755 TEST_REQUIRES_ARM_NEON_FMA;
756 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
757 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
758 for (size_t input_width = 1; input_width < 16; input_width += 3) {
759 ConvHWCMicrokernelTester()
760 .kernel_size(3)
761 .subsampling(2)
762 .padding_width(1)
763 .padding_bottom(padding_bottom)
764 .input_channels(3)
765 .output_channels_tile(4)
766 .output_channels(output_channels)
767 .input_width(input_width)
768 .input_height(9)
769 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
770 }
771 }
772 }
773}
774
775TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_y_start) {
776 TEST_REQUIRES_ARM_NEON_FMA;
777 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
778 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
779 for (size_t input_width = 1; input_width < 16; input_width += 3) {
780 ConvHWCMicrokernelTester()
781 .kernel_size(3)
782 .subsampling(2)
783 .padding_width(1)
784 .input_channels(3)
785 .output_channels_tile(4)
786 .output_channels(output_channels)
787 .input_width(input_width)
788 .input_height(9)
789 .output_y_start(output_y_start)
790 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
791 }
792 }
793 }
794}
795
796TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_y_end) {
797 TEST_REQUIRES_ARM_NEON_FMA;
798 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
799 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
800 for (size_t input_width = 1; input_width < 16; input_width += 3) {
801 ConvHWCMicrokernelTester()
802 .kernel_size(3)
803 .subsampling(2)
804 .padding_width(1)
805 .input_channels(3)
806 .output_channels_tile(4)
807 .output_channels(output_channels)
808 .input_width(input_width)
809 .input_height(9)
810 .output_y_end(output_y_end)
811 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
812 }
813 }
814 }
815}
816
817TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, qmin) {
818 TEST_REQUIRES_ARM_NEON_FMA;
819 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
820 for (size_t input_width = 1; input_width < 16; input_width += 3) {
821 ConvHWCMicrokernelTester()
822 .kernel_size(3)
823 .subsampling(2)
824 .padding_width(1)
825 .input_channels(3)
826 .output_channels_tile(4)
827 .output_channels(output_channels)
828 .input_width(input_width)
829 .input_height(6)
830 .qmin(128)
831 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
832 }
833 }
834}
835
836TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, qmax) {
837 TEST_REQUIRES_ARM_NEON_FMA;
838 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
839 for (size_t input_width = 1; input_width < 16; input_width += 3) {
840 ConvHWCMicrokernelTester()
841 .kernel_size(3)
842 .subsampling(2)
843 .padding_width(1)
844 .input_channels(3)
845 .output_channels_tile(4)
846 .output_channels(output_channels)
847 .input_width(input_width)
848 .input_height(6)
849 .qmax(128)
850 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1);
851 }
852 }
853}
Marat Dukhan441e2212019-12-04 18:30:49 -0800854
855
856TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_width_eq_2) {
857 TEST_REQUIRES_ARM_NEON_FMA;
858 ConvHWCMicrokernelTester()
859 .kernel_size(3)
860 .subsampling(2)
861 .padding_right(1)
862 .input_channels(3)
863 .output_channels_tile(4)
864 .output_channels(4)
865 .input_width(2)
866 .input_height(3)
867 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
868}
869
870TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_width_div_2) {
871 TEST_REQUIRES_ARM_NEON_FMA;
872 for (size_t input_width = 4; input_width <= 16; input_width += 6) {
873 ConvHWCMicrokernelTester()
874 .kernel_size(3)
875 .subsampling(2)
876 .padding_right(1)
877 .input_channels(3)
878 .output_channels_tile(4)
879 .output_channels(4)
880 .input_width(input_width)
881 .input_height(3)
882 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
883 }
884}
885
886TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_width_gt_2) {
887 TEST_REQUIRES_ARM_NEON_FMA;
888 for (size_t input_width = 3; input_width < 4; input_width++) {
889 ConvHWCMicrokernelTester()
890 .kernel_size(3)
891 .subsampling(2)
892 .padding_right(1)
893 .input_channels(3)
894 .output_channels_tile(4)
895 .output_channels(4)
896 .input_width(input_width)
897 .input_height(3)
898 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
899 }
900}
901
902TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_channels_lt_4) {
903 TEST_REQUIRES_ARM_NEON_FMA;
904 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
905 for (size_t input_width = 2; input_width < 16; input_width += 3) {
906 ConvHWCMicrokernelTester()
907 .kernel_size(3)
908 .subsampling(2)
909 .padding_right(1)
910 .input_channels(3)
911 .output_channels_tile(4)
912 .output_channels(output_channels)
913 .input_width(input_width)
914 .input_height(3)
915 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
916 }
917 }
918}
919
920TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_channels_div_4) {
921 TEST_REQUIRES_ARM_NEON_FMA;
922 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
923 for (size_t input_width = 2; input_width < 16; input_width += 3) {
924 ConvHWCMicrokernelTester()
925 .kernel_size(3)
926 .subsampling(2)
927 .padding_right(1)
928 .input_channels(3)
929 .output_channels_tile(4)
930 .output_channels(output_channels)
931 .input_width(input_width)
932 .input_height(3)
933 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
934 }
935 }
936}
937
938TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_channels_gt_4) {
939 TEST_REQUIRES_ARM_NEON_FMA;
940 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
941 for (size_t input_width = 2; input_width < 16; input_width += 3) {
942 ConvHWCMicrokernelTester()
943 .kernel_size(3)
944 .subsampling(2)
945 .padding_right(1)
946 .input_channels(3)
947 .output_channels_tile(4)
948 .output_channels(output_channels)
949 .input_width(input_width)
950 .input_height(3)
951 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
952 }
953 }
954}
955
956TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_height_lt_3) {
957 TEST_REQUIRES_ARM_NEON_FMA;
958 for (size_t input_height = 1; input_height < 3; input_height++) {
959 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
960 for (size_t input_width = 2; input_width < 16; input_width += 3) {
961 ConvHWCMicrokernelTester()
962 .kernel_size(3)
963 .subsampling(2)
964 .padding_right(1)
965 .padding_height(1)
966 .input_channels(3) // padded input height of at least 3 required
967 .output_channels_tile(4)
968 .output_channels(output_channels)
969 .input_width(input_width)
970 .input_height(input_height)
971 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
972 }
973 }
974 }
975}
976
977TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_height_gt_3) {
978 TEST_REQUIRES_ARM_NEON_FMA;
979 for (size_t input_height = 4; input_height <= 9; input_height++) {
980 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
981 for (size_t input_width = 2; input_width < 16; input_width += 3) {
982 ConvHWCMicrokernelTester()
983 .kernel_size(3)
984 .subsampling(2)
985 .padding_right(1)
986 .input_channels(3)
987 .output_channels_tile(4)
988 .output_channels(output_channels)
989 .input_width(input_width)
990 .input_height(input_height)
991 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
992 }
993 }
994 }
995}
996
997TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, padding_top) {
998 TEST_REQUIRES_ARM_NEON_FMA;
999 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1000 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1001 for (size_t input_width = 2; input_width < 16; input_width += 3) {
1002 ConvHWCMicrokernelTester()
1003 .kernel_size(3)
1004 .subsampling(2)
1005 .padding_right(1)
1006 .padding_top(padding_top)
1007 .input_channels(3)
1008 .output_channels_tile(4)
1009 .output_channels(output_channels)
1010 .input_width(input_width)
1011 .input_height(9)
1012 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
1013 }
1014 }
1015 }
1016}
1017
1018TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, padding_bottom) {
1019 TEST_REQUIRES_ARM_NEON_FMA;
1020 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1021 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1022 for (size_t input_width = 2; input_width < 16; input_width += 3) {
1023 ConvHWCMicrokernelTester()
1024 .kernel_size(3)
1025 .subsampling(2)
1026 .padding_right(1)
1027 .padding_bottom(padding_bottom)
1028 .input_channels(3)
1029 .output_channels_tile(4)
1030 .output_channels(output_channels)
1031 .input_width(input_width)
1032 .input_height(9)
1033 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
1034 }
1035 }
1036 }
1037}
1038
1039TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_y_start) {
1040 TEST_REQUIRES_ARM_NEON_FMA;
1041 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1042 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1043 for (size_t input_width = 2; input_width < 16; input_width += 3) {
1044 ConvHWCMicrokernelTester()
1045 .kernel_size(3)
1046 .subsampling(2)
1047 .padding_right(1)
1048 .input_channels(3)
1049 .output_channels_tile(4)
1050 .output_channels(output_channels)
1051 .input_width(input_width)
1052 .input_height(9)
1053 .output_y_start(output_y_start)
1054 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
1055 }
1056 }
1057 }
1058}
1059
1060TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_y_end) {
1061 TEST_REQUIRES_ARM_NEON_FMA;
1062 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1063 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1064 for (size_t input_width = 2; input_width < 16; input_width += 3) {
1065 ConvHWCMicrokernelTester()
1066 .kernel_size(3)
1067 .subsampling(2)
1068 .padding_right(1)
1069 .input_channels(3)
1070 .output_channels_tile(4)
1071 .output_channels(output_channels)
1072 .input_width(input_width)
1073 .input_height(9)
1074 .output_y_end(output_y_end)
1075 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
1076 }
1077 }
1078 }
1079}
1080
1081TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, qmin) {
1082 TEST_REQUIRES_ARM_NEON_FMA;
1083 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1084 for (size_t input_width = 2; input_width < 16; input_width += 3) {
1085 ConvHWCMicrokernelTester()
1086 .kernel_size(3)
1087 .subsampling(2)
1088 .padding_right(1)
1089 .input_channels(3)
1090 .output_channels_tile(4)
1091 .output_channels(output_channels)
1092 .input_width(input_width)
1093 .input_height(6)
1094 .qmin(128)
1095 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
1096 }
1097 }
1098}
1099
1100TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, qmax) {
1101 TEST_REQUIRES_ARM_NEON_FMA;
1102 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1103 for (size_t input_width = 2; input_width < 16; input_width += 3) {
1104 ConvHWCMicrokernelTester()
1105 .kernel_size(3)
1106 .subsampling(2)
1107 .padding_right(1)
1108 .input_channels(3)
1109 .output_channels_tile(4)
1110 .output_channels(output_channels)
1111 .input_width(input_width)
1112 .input_height(6)
1113 .qmax(128)
1114 .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1);
1115 }
1116 }
1117}