blob: 7227ac30be986b3f0cf2bc9fb2c961a872f11277 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
XNNPACK Teamb455b122019-09-27 18:10:33 -07006#include <gtest/gtest.h>
7
Marat Dukhan1dadbf72019-10-01 10:46:20 -07008#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -07009#include <xnnpack/isa-checks.h>
10
Marat Dukhan1dadbf72019-10-01 10:46:20 -070011#include <xnnpack/conv.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include "conv-hwc2spchw-microkernel-tester.h"
13
14
Marat Dukhan1dadbf72019-10-01 10:46:20 -070015#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070016 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
17 TEST_REQUIRES_ARM_NEON_FMA;
18 ConvHWC2SpCHWMicrokernelTester()
19 .kernel_size(3)
20 .subsampling(2)
21 .padding_width(1)
22 .input_channels(3)
23 .output_channels_tile(4)
24 .output_channels(4)
25 .input_width(4)
26 .input_height(3)
27 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
28 }
29
30 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_div_4) {
31 TEST_REQUIRES_ARM_NEON_FMA;
32 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
33 ConvHWC2SpCHWMicrokernelTester()
34 .kernel_size(3)
35 .subsampling(2)
36 .padding_width(1)
37 .input_channels(3)
38 .output_channels_tile(4)
39 .output_channels(4)
40 .input_width(input_width)
41 .input_height(3)
42 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
43 }
44 }
45
46 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_lt_4) {
47 TEST_REQUIRES_ARM_NEON_FMA;
48 for (size_t input_width = 1; input_width < 4; input_width++) {
49 ConvHWC2SpCHWMicrokernelTester()
50 .kernel_size(3)
51 .subsampling(2)
52 .padding_width(1)
53 .input_channels(3)
54 .output_channels_tile(4)
55 .output_channels(4)
56 .input_width(input_width)
57 .input_height(3)
58 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
59 }
60 }
61
62 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_gt_4) {
63 TEST_REQUIRES_ARM_NEON_FMA;
64 for (size_t input_width = 5; input_width < 8; input_width++) {
65 ConvHWC2SpCHWMicrokernelTester()
66 .kernel_size(3)
67 .subsampling(2)
68 .padding_width(1)
69 .input_channels(3)
70 .output_channels_tile(4)
71 .output_channels(4)
72 .input_width(input_width)
73 .input_height(3)
74 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
75 }
76 }
77
78 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_lt_4) {
79 TEST_REQUIRES_ARM_NEON_FMA;
80 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
81 for (size_t input_width = 1; input_width < 32; input_width += 7) {
82 ConvHWC2SpCHWMicrokernelTester()
83 .kernel_size(3)
84 .subsampling(2)
85 .padding_width(1)
86 .input_channels(3)
87 .output_channels_tile(4)
88 .output_channels(output_channels)
89 .input_width(input_width)
90 .input_height(3)
91 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
92 }
93 }
94 }
95
96 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_div_4) {
97 TEST_REQUIRES_ARM_NEON_FMA;
98 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
99 for (size_t input_width = 1; input_width < 32; input_width += 7) {
100 ConvHWC2SpCHWMicrokernelTester()
101 .kernel_size(3)
102 .subsampling(2)
103 .padding_width(1)
104 .input_channels(3)
105 .output_channels_tile(4)
106 .output_channels(output_channels)
107 .input_width(input_width)
108 .input_height(3)
109 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
110 }
111 }
112 }
113
114 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_gt_4) {
115 TEST_REQUIRES_ARM_NEON_FMA;
116 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
117 for (size_t input_width = 1; input_width < 32; input_width += 7) {
118 ConvHWC2SpCHWMicrokernelTester()
119 .kernel_size(3)
120 .subsampling(2)
121 .padding_width(1)
122 .input_channels(3)
123 .output_channels_tile(4)
124 .output_channels(output_channels)
125 .input_width(input_width)
126 .input_height(3)
127 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
128 }
129 }
130 }
131
132 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_lt_3) {
133 TEST_REQUIRES_ARM_NEON_FMA;
134 for (size_t input_height = 1; input_height < 3; input_height++) {
135 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
136 for (size_t input_width = 1; input_width < 32; input_width += 7) {
137 ConvHWC2SpCHWMicrokernelTester()
138 .kernel_size(3)
139 .subsampling(2)
140 .padding(1)
141 .input_channels(3) // padded input height of at least 3 required
142 .output_channels_tile(4)
143 .output_channels(output_channels)
144 .input_width(input_width)
145 .input_height(input_height)
146 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
147 }
148 }
149 }
150 }
151
152 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_gt_3) {
153 TEST_REQUIRES_ARM_NEON_FMA;
154 for (size_t input_height = 4; input_height <= 9; input_height++) {
155 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
156 for (size_t input_width = 1; input_width < 32; input_width += 7) {
157 ConvHWC2SpCHWMicrokernelTester()
158 .kernel_size(3)
159 .subsampling(2)
160 .padding_width(1)
161 .input_channels(3)
162 .output_channels_tile(4)
163 .output_channels(output_channels)
164 .input_width(input_width)
165 .input_height(input_height)
166 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
167 }
168 }
169 }
170 }
171
172 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, padding_top) {
173 TEST_REQUIRES_ARM_NEON_FMA;
174 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
175 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
176 for (size_t input_width = 1; input_width < 32; input_width += 7) {
177 ConvHWC2SpCHWMicrokernelTester()
178 .kernel_size(3)
179 .subsampling(2)
180 .padding_width(1)
181 .padding_top(padding_top)
182 .input_channels(3)
183 .output_channels_tile(4)
184 .output_channels(output_channels)
185 .input_width(input_width)
186 .input_height(9)
187 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
188 }
189 }
190 }
191 }
192
193 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, padding_bottom) {
194 TEST_REQUIRES_ARM_NEON_FMA;
195 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
196 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
197 for (size_t input_width = 1; input_width < 32; input_width += 7) {
198 ConvHWC2SpCHWMicrokernelTester()
199 .kernel_size(3)
200 .subsampling(2)
201 .padding_width(1)
202 .padding_bottom(padding_bottom)
203 .input_channels(3)
204 .output_channels_tile(4)
205 .output_channels(output_channels)
206 .input_width(input_width)
207 .input_height(9)
208 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
209 }
210 }
211 }
212 }
213
214 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_start) {
215 TEST_REQUIRES_ARM_NEON_FMA;
216 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
217 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
218 for (size_t input_width = 1; input_width < 32; input_width += 7) {
219 ConvHWC2SpCHWMicrokernelTester()
220 .kernel_size(3)
221 .subsampling(2)
222 .padding_width(1)
223 .input_channels(3)
224 .output_channels_tile(4)
225 .output_channels(output_channels)
226 .input_width(input_width)
227 .input_height(9)
228 .output_y_start(output_y_start)
229 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
230 }
231 }
232 }
233 }
234
235 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_end) {
236 TEST_REQUIRES_ARM_NEON_FMA;
237 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
238 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
239 for (size_t input_width = 1; input_width < 32; input_width += 7) {
240 ConvHWC2SpCHWMicrokernelTester()
241 .kernel_size(3)
242 .subsampling(2)
243 .padding_width(1)
244 .input_channels(3)
245 .output_channels_tile(4)
246 .output_channels(output_channels)
247 .input_width(input_width)
248 .input_height(9)
249 .output_y_end(output_y_end)
250 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
251 }
252 }
253 }
254 }
255
256 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, qmin) {
257 TEST_REQUIRES_ARM_NEON_FMA;
258 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
259 for (size_t input_width = 1; input_width < 32; input_width += 7) {
260 ConvHWC2SpCHWMicrokernelTester()
261 .kernel_size(3)
262 .subsampling(2)
263 .padding_width(1)
264 .input_channels(3)
265 .output_channels_tile(4)
266 .output_channels(output_channels)
267 .input_width(input_width)
268 .input_height(6)
269 .qmin(128)
270 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
271 }
272 }
273 }
274
275 TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, qmax) {
276 TEST_REQUIRES_ARM_NEON_FMA;
277 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
278 for (size_t input_width = 1; input_width < 32; input_width += 7) {
279 ConvHWC2SpCHWMicrokernelTester()
280 .kernel_size(3)
281 .subsampling(2)
282 .padding_width(1)
283 .input_channels(3)
284 .output_channels_tile(4)
285 .output_channels(output_channels)
286 .input_width(input_width)
287 .input_height(6)
288 .qmax(128)
289 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
290 }
291 }
292 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700293#endif // XNN_ARCH_ARM64
Erich Elsen563df5f2019-10-23 08:02:21 -0700294
295TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, input_width_eq_1) {
296 ConvHWC2SpCHWMicrokernelTester()
297 .kernel_size(3)
298 .subsampling(2)
299 .padding_width(1)
300 .input_channels(3)
301 .output_channels_tile(4)
302 .output_channels(4)
303 .input_width(4)
304 .input_height(3)
305 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
306}
307
308
309TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, input_width_gt_1) {
310 for (size_t input_width = 2; input_width < 33; input_width++) {
311 ConvHWC2SpCHWMicrokernelTester()
312 .kernel_size(3)
313 .subsampling(2)
314 .padding_width(1)
315 .input_channels(3)
316 .output_channels_tile(4)
317 .output_channels(4)
318 .input_width(input_width)
319 .input_height(3)
320 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
321 }
322}
323
324TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_lt_4) {
325 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
326 for (size_t input_width = 1; input_width < 32; input_width += 7) {
327 ConvHWC2SpCHWMicrokernelTester()
328 .kernel_size(3)
329 .subsampling(2)
330 .padding_width(1)
331 .input_channels(3)
332 .output_channels_tile(4)
333 .output_channels(output_channels)
334 .input_width(input_width)
335 .input_height(3)
336 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
337 }
338 }
339}
340
341TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_div_4) {
342 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
343 for (size_t input_width = 1; input_width < 32; input_width += 7) {
344 ConvHWC2SpCHWMicrokernelTester()
345 .kernel_size(3)
346 .subsampling(2)
347 .padding_width(1)
348 .input_channels(3)
349 .output_channels_tile(4)
350 .output_channels(output_channels)
351 .input_width(input_width)
352 .input_height(3)
353 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
354 }
355 }
356}
357
358TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_gt_4) {
359 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
360 for (size_t input_width = 1; input_width < 32; input_width += 7) {
361 ConvHWC2SpCHWMicrokernelTester()
362 .kernel_size(3)
363 .subsampling(2)
364 .padding_width(1)
365 .input_channels(3)
366 .output_channels_tile(4)
367 .output_channels(output_channels)
368 .input_width(input_width)
369 .input_height(3)
370 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
371 }
372 }
373}
374
375TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, input_height_lt_3) {
376 for (size_t input_height = 1; input_height < 3; input_height++) {
377 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
378 for (size_t input_width = 1; input_width < 32; input_width += 7) {
379 ConvHWC2SpCHWMicrokernelTester()
380 .kernel_size(3)
381 .subsampling(2)
382 .padding(1)
383 .input_channels(3) // padded input height of at least 3 required
384 .output_channels_tile(4)
385 .output_channels(output_channels)
386 .input_width(input_width)
387 .input_height(input_height)
388 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
389 }
390 }
391 }
392}
393
394TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, input_height_gt_3) {
395 for (size_t input_height = 4; input_height <= 9; input_height++) {
396 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
397 for (size_t input_width = 1; input_width < 32; input_width += 7) {
398 ConvHWC2SpCHWMicrokernelTester()
399 .kernel_size(3)
400 .subsampling(2)
401 .padding_width(1)
402 .input_channels(3)
403 .output_channels_tile(4)
404 .output_channels(output_channels)
405 .input_width(input_width)
406 .input_height(input_height)
407 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
408 }
409 }
410 }
411}
412
413TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, padding_top) {
414 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
415 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
416 for (size_t input_width = 1; input_width < 32; input_width += 7) {
417 ConvHWC2SpCHWMicrokernelTester()
418 .kernel_size(3)
419 .subsampling(2)
420 .padding_width(1)
421 .padding_top(padding_top)
422 .input_channels(3)
423 .output_channels_tile(4)
424 .output_channels(output_channels)
425 .input_width(input_width)
426 .input_height(9)
427 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
428 }
429 }
430 }
431}
432
433TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, padding_bottom) {
434 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
435 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
436 for (size_t input_width = 1; input_width < 32; input_width += 7) {
437 ConvHWC2SpCHWMicrokernelTester()
438 .kernel_size(3)
439 .subsampling(2)
440 .padding_width(1)
441 .padding_bottom(padding_bottom)
442 .input_channels(3)
443 .output_channels_tile(4)
444 .output_channels(output_channels)
445 .input_width(input_width)
446 .input_height(9)
447 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
448 }
449 }
450 }
451}
452
453TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_y_start) {
454 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
455 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
456 for (size_t input_width = 1; input_width < 32; input_width += 7) {
457 ConvHWC2SpCHWMicrokernelTester()
458 .kernel_size(3)
459 .subsampling(2)
460 .padding_width(1)
461 .input_channels(3)
462 .output_channels_tile(4)
463 .output_channels(output_channels)
464 .input_width(input_width)
465 .input_height(9)
466 .output_y_start(output_y_start)
467 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
468 }
469 }
470 }
471}
472
473TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_y_end) {
474 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
475 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
476 for (size_t input_width = 1; input_width < 32; input_width += 7) {
477 ConvHWC2SpCHWMicrokernelTester()
478 .kernel_size(3)
479 .subsampling(2)
480 .padding_width(1)
481 .input_channels(3)
482 .output_channels_tile(4)
483 .output_channels(output_channels)
484 .input_width(input_width)
485 .input_height(9)
486 .output_y_end(output_y_end)
487 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
488 }
489 }
490 }
491}
492
493TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, qmin) {
494 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
495 for (size_t input_width = 1; input_width < 32; input_width += 7) {
496 ConvHWC2SpCHWMicrokernelTester()
497 .kernel_size(3)
498 .subsampling(2)
499 .padding_width(1)
500 .input_channels(3)
501 .output_channels_tile(4)
502 .output_channels(output_channels)
503 .input_width(input_width)
504 .input_height(6)
505 .qmin(128)
506 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
507 }
508 }
509}
510
511TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, qmax) {
512 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
513 for (size_t input_width = 1; input_width < 32; input_width += 7) {
514 ConvHWC2SpCHWMicrokernelTester()
515 .kernel_size(3)
516 .subsampling(2)
517 .padding_width(1)
518 .input_channels(3)
519 .output_channels_tile(4)
520 .output_channels(output_channels)
521 .input_width(input_width)
522 .input_height(6)
523 .qmax(128)
524 .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
525 }
526 }
527}