blob: 83c7e0dc8b3bd0cddca41377c4b8551896961e44 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
XNNPACK Teamb455b122019-09-27 18:10:33 -07006#include <gtest/gtest.h>
7
Marat Dukhan1dadbf72019-10-01 10:46:20 -07008#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -07009#include <xnnpack/isa-checks.h>
10
Marat Dukhan1dadbf72019-10-01 10:46:20 -070011#include <xnnpack/conv.h>
Marat Dukhan1f29b802020-05-15 23:46:39 -070012#include "conv-hwc2chw-microkernel-tester.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070013
14
Marat Dukhanc7634882020-12-07 15:11:12 -080015#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_eq_4) {
17 TEST_REQUIRES_ARM_NEON;
18 ConvHWC2CHWMicrokernelTester()
19 .kernel_size(3)
20 .subsampling(2)
21 .padding_width(1)
22 .input_channels(3)
23 .output_channels_tile(4)
24 .output_channels(4)
25 .input_width(4)
26 .input_height(3)
27 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
28 }
29
30 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_div_4) {
31 TEST_REQUIRES_ARM_NEON;
32 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
33 ConvHWC2CHWMicrokernelTester()
34 .kernel_size(3)
35 .subsampling(2)
36 .padding_width(1)
37 .input_channels(3)
38 .output_channels_tile(4)
39 .output_channels(4)
40 .input_width(input_width)
41 .input_height(3)
42 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
43 }
44 }
45
46 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_lt_4) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t input_width = 1; input_width < 4; input_width++) {
49 ConvHWC2CHWMicrokernelTester()
50 .kernel_size(3)
51 .subsampling(2)
52 .padding_width(1)
53 .input_channels(3)
54 .output_channels_tile(4)
55 .output_channels(4)
56 .input_width(input_width)
57 .input_height(3)
58 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
59 }
60 }
61
62 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_gt_4) {
63 TEST_REQUIRES_ARM_NEON;
64 for (size_t input_width = 5; input_width < 8; input_width++) {
65 ConvHWC2CHWMicrokernelTester()
66 .kernel_size(3)
67 .subsampling(2)
68 .padding_width(1)
69 .input_channels(3)
70 .output_channels_tile(4)
71 .output_channels(4)
72 .input_width(input_width)
73 .input_height(3)
74 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
75 }
76 }
77
78 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_channels_lt_4) {
79 TEST_REQUIRES_ARM_NEON;
80 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
81 for (size_t input_width = 1; input_width < 32; input_width += 7) {
82 ConvHWC2CHWMicrokernelTester()
83 .kernel_size(3)
84 .subsampling(2)
85 .padding_width(1)
86 .input_channels(3)
87 .output_channels_tile(4)
88 .output_channels(output_channels)
89 .input_width(input_width)
90 .input_height(3)
91 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
92 }
93 }
94 }
95
96 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_channels_div_4) {
97 TEST_REQUIRES_ARM_NEON;
98 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
99 for (size_t input_width = 1; input_width < 32; input_width += 7) {
100 ConvHWC2CHWMicrokernelTester()
101 .kernel_size(3)
102 .subsampling(2)
103 .padding_width(1)
104 .input_channels(3)
105 .output_channels_tile(4)
106 .output_channels(output_channels)
107 .input_width(input_width)
108 .input_height(3)
109 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
110 }
111 }
112 }
113
114 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_channels_gt_4) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
117 for (size_t input_width = 1; input_width < 32; input_width += 7) {
118 ConvHWC2CHWMicrokernelTester()
119 .kernel_size(3)
120 .subsampling(2)
121 .padding_width(1)
122 .input_channels(3)
123 .output_channels_tile(4)
124 .output_channels(output_channels)
125 .input_width(input_width)
126 .input_height(3)
127 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
128 }
129 }
130 }
131
132 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_height_lt_3) {
133 TEST_REQUIRES_ARM_NEON;
134 for (size_t input_height = 1; input_height < 3; input_height++) {
135 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
136 for (size_t input_width = 1; input_width < 32; input_width += 7) {
137 ConvHWC2CHWMicrokernelTester()
138 .kernel_size(3)
139 .subsampling(2)
140 .padding(1)
141 .input_channels(3) // padded input height of at least 3 required
142 .output_channels_tile(4)
143 .output_channels(output_channels)
144 .input_width(input_width)
145 .input_height(input_height)
146 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
147 }
148 }
149 }
150 }
151
152 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_height_gt_3) {
153 TEST_REQUIRES_ARM_NEON;
154 for (size_t input_height = 4; input_height <= 9; input_height++) {
155 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
156 for (size_t input_width = 1; input_width < 32; input_width += 7) {
157 ConvHWC2CHWMicrokernelTester()
158 .kernel_size(3)
159 .subsampling(2)
160 .padding_width(1)
161 .input_channels(3)
162 .output_channels_tile(4)
163 .output_channels(output_channels)
164 .input_width(input_width)
165 .input_height(input_height)
166 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
167 }
168 }
169 }
170 }
171
172 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, padding_top) {
173 TEST_REQUIRES_ARM_NEON;
174 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
175 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
176 for (size_t input_width = 1; input_width < 32; input_width += 7) {
177 ConvHWC2CHWMicrokernelTester()
178 .kernel_size(3)
179 .subsampling(2)
180 .padding_width(1)
181 .padding_top(padding_top)
182 .input_channels(3)
183 .output_channels_tile(4)
184 .output_channels(output_channels)
185 .input_width(input_width)
186 .input_height(9)
187 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
188 }
189 }
190 }
191 }
192
193 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, padding_bottom) {
194 TEST_REQUIRES_ARM_NEON;
195 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
196 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
197 for (size_t input_width = 1; input_width < 32; input_width += 7) {
198 ConvHWC2CHWMicrokernelTester()
199 .kernel_size(3)
200 .subsampling(2)
201 .padding_width(1)
202 .padding_bottom(padding_bottom)
203 .input_channels(3)
204 .output_channels_tile(4)
205 .output_channels(output_channels)
206 .input_width(input_width)
207 .input_height(9)
208 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
209 }
210 }
211 }
212 }
213
214 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_y_start) {
215 TEST_REQUIRES_ARM_NEON;
216 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
217 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
218 for (size_t input_width = 1; input_width < 32; input_width += 7) {
219 ConvHWC2CHWMicrokernelTester()
220 .kernel_size(3)
221 .subsampling(2)
222 .padding_width(1)
223 .input_channels(3)
224 .output_channels_tile(4)
225 .output_channels(output_channels)
226 .input_width(input_width)
227 .input_height(9)
228 .output_y_start(output_y_start)
229 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
230 }
231 }
232 }
233 }
234
235 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_y_end) {
236 TEST_REQUIRES_ARM_NEON;
237 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
238 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
239 for (size_t input_width = 1; input_width < 32; input_width += 7) {
240 ConvHWC2CHWMicrokernelTester()
241 .kernel_size(3)
242 .subsampling(2)
243 .padding_width(1)
244 .input_channels(3)
245 .output_channels_tile(4)
246 .output_channels(output_channels)
247 .input_width(input_width)
248 .input_height(9)
249 .output_y_end(output_y_end)
250 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
251 }
252 }
253 }
254 }
255
256 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, qmin) {
257 TEST_REQUIRES_ARM_NEON;
258 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
259 for (size_t input_width = 1; input_width < 32; input_width += 7) {
260 ConvHWC2CHWMicrokernelTester()
261 .kernel_size(3)
262 .subsampling(2)
263 .padding_width(1)
264 .input_channels(3)
265 .output_channels_tile(4)
266 .output_channels(output_channels)
267 .input_width(input_width)
268 .input_height(6)
269 .qmin(128)
270 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
271 }
272 }
273 }
274
275 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, qmax) {
276 TEST_REQUIRES_ARM_NEON;
277 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
278 for (size_t input_width = 1; input_width < 32; input_width += 7) {
279 ConvHWC2CHWMicrokernelTester()
280 .kernel_size(3)
281 .subsampling(2)
282 .padding_width(1)
283 .input_channels(3)
284 .output_channels_tile(4)
285 .output_channels(output_channels)
286 .input_width(input_width)
287 .input_height(6)
288 .qmax(128)
289 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
290 }
291 }
292 }
293#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
294
295
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700296#if XNN_ARCH_ARM64
Marat Dukhan1f29b802020-05-15 23:46:39 -0700297 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700298 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhan1f29b802020-05-15 23:46:39 -0700299 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700300 .kernel_size(3)
301 .subsampling(2)
302 .padding_width(1)
303 .input_channels(3)
304 .output_channels_tile(4)
305 .output_channels(4)
306 .input_width(4)
307 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700308 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700309 }
310
Marat Dukhan1f29b802020-05-15 23:46:39 -0700311 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700312 TEST_REQUIRES_ARM_NEON_FMA;
313 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700314 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700315 .kernel_size(3)
316 .subsampling(2)
317 .padding_width(1)
318 .input_channels(3)
319 .output_channels_tile(4)
320 .output_channels(4)
321 .input_width(input_width)
322 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700323 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700324 }
325 }
326
Marat Dukhan1f29b802020-05-15 23:46:39 -0700327 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700328 TEST_REQUIRES_ARM_NEON_FMA;
329 for (size_t input_width = 1; input_width < 4; input_width++) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700330 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700331 .kernel_size(3)
332 .subsampling(2)
333 .padding_width(1)
334 .input_channels(3)
335 .output_channels_tile(4)
336 .output_channels(4)
337 .input_width(input_width)
338 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700339 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700340 }
341 }
342
Marat Dukhan1f29b802020-05-15 23:46:39 -0700343 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700344 TEST_REQUIRES_ARM_NEON_FMA;
345 for (size_t input_width = 5; input_width < 8; input_width++) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700346 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700347 .kernel_size(3)
348 .subsampling(2)
349 .padding_width(1)
350 .input_channels(3)
351 .output_channels_tile(4)
352 .output_channels(4)
353 .input_width(input_width)
354 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700355 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700356 }
357 }
358
Marat Dukhan1f29b802020-05-15 23:46:39 -0700359 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700360 TEST_REQUIRES_ARM_NEON_FMA;
361 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
362 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700363 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700364 .kernel_size(3)
365 .subsampling(2)
366 .padding_width(1)
367 .input_channels(3)
368 .output_channels_tile(4)
369 .output_channels(output_channels)
370 .input_width(input_width)
371 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700372 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700373 }
374 }
375 }
376
Marat Dukhan1f29b802020-05-15 23:46:39 -0700377 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700378 TEST_REQUIRES_ARM_NEON_FMA;
379 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
380 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700381 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700382 .kernel_size(3)
383 .subsampling(2)
384 .padding_width(1)
385 .input_channels(3)
386 .output_channels_tile(4)
387 .output_channels(output_channels)
388 .input_width(input_width)
389 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700390 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700391 }
392 }
393 }
394
Marat Dukhan1f29b802020-05-15 23:46:39 -0700395 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700396 TEST_REQUIRES_ARM_NEON_FMA;
397 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
398 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700399 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700400 .kernel_size(3)
401 .subsampling(2)
402 .padding_width(1)
403 .input_channels(3)
404 .output_channels_tile(4)
405 .output_channels(output_channels)
406 .input_width(input_width)
407 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700408 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700409 }
410 }
411 }
412
Marat Dukhan1f29b802020-05-15 23:46:39 -0700413 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_lt_3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700414 TEST_REQUIRES_ARM_NEON_FMA;
415 for (size_t input_height = 1; input_height < 3; input_height++) {
416 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
417 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700418 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700419 .kernel_size(3)
420 .subsampling(2)
421 .padding(1)
422 .input_channels(3) // padded input height of at least 3 required
423 .output_channels_tile(4)
424 .output_channels(output_channels)
425 .input_width(input_width)
426 .input_height(input_height)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700427 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700428 }
429 }
430 }
431 }
432
Marat Dukhan1f29b802020-05-15 23:46:39 -0700433 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_gt_3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700434 TEST_REQUIRES_ARM_NEON_FMA;
435 for (size_t input_height = 4; input_height <= 9; input_height++) {
436 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
437 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700438 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700439 .kernel_size(3)
440 .subsampling(2)
441 .padding_width(1)
442 .input_channels(3)
443 .output_channels_tile(4)
444 .output_channels(output_channels)
445 .input_width(input_width)
446 .input_height(input_height)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700447 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700448 }
449 }
450 }
451 }
452
Marat Dukhan1f29b802020-05-15 23:46:39 -0700453 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, padding_top) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700454 TEST_REQUIRES_ARM_NEON_FMA;
455 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
456 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
457 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700458 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700459 .kernel_size(3)
460 .subsampling(2)
461 .padding_width(1)
462 .padding_top(padding_top)
463 .input_channels(3)
464 .output_channels_tile(4)
465 .output_channels(output_channels)
466 .input_width(input_width)
467 .input_height(9)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700468 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700469 }
470 }
471 }
472 }
473
Marat Dukhan1f29b802020-05-15 23:46:39 -0700474 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, padding_bottom) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700475 TEST_REQUIRES_ARM_NEON_FMA;
476 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
477 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
478 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700479 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700480 .kernel_size(3)
481 .subsampling(2)
482 .padding_width(1)
483 .padding_bottom(padding_bottom)
484 .input_channels(3)
485 .output_channels_tile(4)
486 .output_channels(output_channels)
487 .input_width(input_width)
488 .input_height(9)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700489 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700490 }
491 }
492 }
493 }
494
Marat Dukhan1f29b802020-05-15 23:46:39 -0700495 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_start) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700496 TEST_REQUIRES_ARM_NEON_FMA;
497 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
498 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
499 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700500 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700501 .kernel_size(3)
502 .subsampling(2)
503 .padding_width(1)
504 .input_channels(3)
505 .output_channels_tile(4)
506 .output_channels(output_channels)
507 .input_width(input_width)
508 .input_height(9)
509 .output_y_start(output_y_start)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700510 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700511 }
512 }
513 }
514 }
515
Marat Dukhan1f29b802020-05-15 23:46:39 -0700516 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_end) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700517 TEST_REQUIRES_ARM_NEON_FMA;
518 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
519 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
520 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700521 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700522 .kernel_size(3)
523 .subsampling(2)
524 .padding_width(1)
525 .input_channels(3)
526 .output_channels_tile(4)
527 .output_channels(output_channels)
528 .input_width(input_width)
529 .input_height(9)
530 .output_y_end(output_y_end)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700531 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700532 }
533 }
534 }
535 }
536
Marat Dukhan1f29b802020-05-15 23:46:39 -0700537 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700538 TEST_REQUIRES_ARM_NEON_FMA;
539 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
540 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700541 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700542 .kernel_size(3)
543 .subsampling(2)
544 .padding_width(1)
545 .input_channels(3)
546 .output_channels_tile(4)
547 .output_channels(output_channels)
548 .input_width(input_width)
549 .input_height(6)
550 .qmin(128)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700551 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700552 }
553 }
554 }
555
Marat Dukhan1f29b802020-05-15 23:46:39 -0700556 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700557 TEST_REQUIRES_ARM_NEON_FMA;
558 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
559 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700560 ConvHWC2CHWMicrokernelTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700561 .kernel_size(3)
562 .subsampling(2)
563 .padding_width(1)
564 .input_channels(3)
565 .output_channels_tile(4)
566 .output_channels(output_channels)
567 .input_width(input_width)
568 .input_height(6)
569 .qmax(128)
Marat Dukhan1f29b802020-05-15 23:46:39 -0700570 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700571 }
572 }
573 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700574#endif // XNN_ARCH_ARM64
Erich Elsen563df5f2019-10-23 08:02:21 -0700575
Erich Elsenb1233402020-06-08 15:53:15 -0700576#if XNN_ARCH_X86 || XNN_ARCH_X86_64
577TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_width_eq_1) {
578 TEST_REQUIRES_X86_SSE;
579 ConvHWC2CHWMicrokernelTester()
580 .kernel_size(3)
581 .subsampling(2)
582 .padding_width(1)
583 .input_channels(3)
584 .output_channels_tile(4)
585 .output_channels(4)
586 .input_width(4)
587 .input_height(3)
588 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
589}
590
591
592TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_width_gt_1) {
593 TEST_REQUIRES_X86_SSE;
594 for (size_t input_width = 2; input_width < 33; input_width++) {
595 ConvHWC2CHWMicrokernelTester()
596 .kernel_size(3)
597 .subsampling(2)
598 .padding_width(1)
599 .input_channels(3)
600 .output_channels_tile(4)
601 .output_channels(4)
602 .input_width(input_width)
603 .input_height(3)
604 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
605 }
606}
607
608TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_channels_lt_4) {
609 TEST_REQUIRES_X86_SSE;
610 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
611 for (size_t input_width = 1; input_width < 32; input_width += 7) {
612 ConvHWC2CHWMicrokernelTester()
613 .kernel_size(3)
614 .subsampling(2)
615 .padding_width(1)
616 .input_channels(3)
617 .output_channels_tile(4)
618 .output_channels(output_channels)
619 .input_width(input_width)
620 .input_height(3)
621 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
622 }
623 }
624}
625
626TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_channels_div_4) {
627 TEST_REQUIRES_X86_SSE;
628 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
629 for (size_t input_width = 1; input_width < 32; input_width += 7) {
630 ConvHWC2CHWMicrokernelTester()
631 .kernel_size(3)
632 .subsampling(2)
633 .padding_width(1)
634 .input_channels(3)
635 .output_channels_tile(4)
636 .output_channels(output_channels)
637 .input_width(input_width)
638 .input_height(3)
639 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
640 }
641 }
642}
643
644TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_channels_gt_4) {
645 TEST_REQUIRES_X86_SSE;
646 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
647 for (size_t input_width = 1; input_width < 32; input_width += 7) {
648 ConvHWC2CHWMicrokernelTester()
649 .kernel_size(3)
650 .subsampling(2)
651 .padding_width(1)
652 .input_channels(3)
653 .output_channels_tile(4)
654 .output_channels(output_channels)
655 .input_width(input_width)
656 .input_height(3)
657 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
658 }
659 }
660}
661
662TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_height_lt_3) {
663 TEST_REQUIRES_X86_SSE;
664 for (size_t input_height = 1; input_height < 3; input_height++) {
665 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
666 for (size_t input_width = 1; input_width < 32; input_width += 7) {
667 ConvHWC2CHWMicrokernelTester()
668 .kernel_size(3)
669 .subsampling(2)
670 .padding(1)
671 .input_channels(3) // padded input height of at least 3 required
672 .output_channels_tile(4)
673 .output_channels(output_channels)
674 .input_width(input_width)
675 .input_height(input_height)
676 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
677 }
678 }
679 }
680}
681
682TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_height_gt_3) {
683 TEST_REQUIRES_X86_SSE;
684 for (size_t input_height = 4; input_height <= 9; input_height++) {
685 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
686 for (size_t input_width = 1; input_width < 32; input_width += 7) {
687 ConvHWC2CHWMicrokernelTester()
688 .kernel_size(3)
689 .subsampling(2)
690 .padding_width(1)
691 .input_channels(3)
692 .output_channels_tile(4)
693 .output_channels(output_channels)
694 .input_width(input_width)
695 .input_height(input_height)
696 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
697 }
698 }
699 }
700}
701
702TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, padding_top) {
703 TEST_REQUIRES_X86_SSE;
704 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
705 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
706 for (size_t input_width = 1; input_width < 32; input_width += 7) {
707 ConvHWC2CHWMicrokernelTester()
708 .kernel_size(3)
709 .subsampling(2)
710 .padding_width(1)
711 .padding_top(padding_top)
712 .input_channels(3)
713 .output_channels_tile(4)
714 .output_channels(output_channels)
715 .input_width(input_width)
716 .input_height(9)
717 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
718 }
719 }
720 }
721}
722
723TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, padding_bottom) {
724 TEST_REQUIRES_X86_SSE;
725 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
726 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
727 for (size_t input_width = 1; input_width < 32; input_width += 7) {
728 ConvHWC2CHWMicrokernelTester()
729 .kernel_size(3)
730 .subsampling(2)
731 .padding_width(1)
732 .padding_bottom(padding_bottom)
733 .input_channels(3)
734 .output_channels_tile(4)
735 .output_channels(output_channels)
736 .input_width(input_width)
737 .input_height(9)
738 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
739 }
740 }
741 }
742}
743
744TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_y_start) {
745 TEST_REQUIRES_X86_SSE;
746 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
747 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
748 for (size_t input_width = 1; input_width < 32; input_width += 7) {
749 ConvHWC2CHWMicrokernelTester()
750 .kernel_size(3)
751 .subsampling(2)
752 .padding_width(1)
753 .input_channels(3)
754 .output_channels_tile(4)
755 .output_channels(output_channels)
756 .input_width(input_width)
757 .input_height(9)
758 .output_y_start(output_y_start)
759 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
760 }
761 }
762 }
763}
764
765TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_y_end) {
766 TEST_REQUIRES_X86_SSE;
767 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
768 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
769 for (size_t input_width = 1; input_width < 32; input_width += 7) {
770 ConvHWC2CHWMicrokernelTester()
771 .kernel_size(3)
772 .subsampling(2)
773 .padding_width(1)
774 .input_channels(3)
775 .output_channels_tile(4)
776 .output_channels(output_channels)
777 .input_width(input_width)
778 .input_height(9)
779 .output_y_end(output_y_end)
780 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
781 }
782 }
783 }
784}
785
786TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, qmin) {
787 TEST_REQUIRES_X86_SSE;
788 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
789 for (size_t input_width = 1; input_width < 32; input_width += 7) {
790 ConvHWC2CHWMicrokernelTester()
791 .kernel_size(3)
792 .subsampling(2)
793 .padding_width(1)
794 .input_channels(3)
795 .output_channels_tile(4)
796 .output_channels(output_channels)
797 .input_width(input_width)
798 .input_height(6)
799 .qmin(128)
800 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
801 }
802 }
803}
804
805TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, qmax) {
806 TEST_REQUIRES_X86_SSE;
807 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
808 for (size_t input_width = 1; input_width < 32; input_width += 7) {
809 ConvHWC2CHWMicrokernelTester()
810 .kernel_size(3)
811 .subsampling(2)
812 .padding_width(1)
813 .input_channels(3)
814 .output_channels_tile(4)
815 .output_channels(output_channels)
816 .input_width(input_width)
817 .input_height(6)
818 .qmax(128)
819 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
820 }
821 }
822}
823#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
824
825#if XNN_ARCH_X86 || XNN_ARCH_X86_64
826TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_width_eq_1) {
827 TEST_REQUIRES_X86_SSE;
828 ConvHWC2CHWMicrokernelTester()
829 .kernel_size(3)
830 .subsampling(2)
831 .padding_width(1)
832 .input_channels(3)
833 .output_channels_tile(4)
834 .output_channels(4)
835 .input_width(4)
836 .input_height(3)
837 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
838}
839
840
841TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_width_gt_1) {
842 TEST_REQUIRES_X86_SSE;
843 for (size_t input_width = 2; input_width < 33; input_width++) {
844 ConvHWC2CHWMicrokernelTester()
845 .kernel_size(3)
846 .subsampling(2)
847 .padding_width(1)
848 .input_channels(3)
849 .output_channels_tile(4)
850 .output_channels(4)
851 .input_width(input_width)
852 .input_height(3)
853 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
854 }
855}
856
857TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_channels_lt_4) {
858 TEST_REQUIRES_X86_SSE;
859 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
860 for (size_t input_width = 1; input_width < 32; input_width += 7) {
861 ConvHWC2CHWMicrokernelTester()
862 .kernel_size(3)
863 .subsampling(2)
864 .padding_width(1)
865 .input_channels(3)
866 .output_channels_tile(4)
867 .output_channels(output_channels)
868 .input_width(input_width)
869 .input_height(3)
870 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
871 }
872 }
873}
874
875TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_channels_div_4) {
876 TEST_REQUIRES_X86_SSE;
877 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
878 for (size_t input_width = 1; input_width < 32; input_width += 7) {
879 ConvHWC2CHWMicrokernelTester()
880 .kernel_size(3)
881 .subsampling(2)
882 .padding_width(1)
883 .input_channels(3)
884 .output_channels_tile(4)
885 .output_channels(output_channels)
886 .input_width(input_width)
887 .input_height(3)
888 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
889 }
890 }
891}
892
893TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_channels_gt_4) {
894 TEST_REQUIRES_X86_SSE;
895 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
896 for (size_t input_width = 1; input_width < 32; input_width += 7) {
897 ConvHWC2CHWMicrokernelTester()
898 .kernel_size(3)
899 .subsampling(2)
900 .padding_width(1)
901 .input_channels(3)
902 .output_channels_tile(4)
903 .output_channels(output_channels)
904 .input_width(input_width)
905 .input_height(3)
906 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
907 }
908 }
909}
910
911TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_height_lt_3) {
912 TEST_REQUIRES_X86_SSE;
913 for (size_t input_height = 1; input_height < 3; input_height++) {
914 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
915 for (size_t input_width = 1; input_width < 32; input_width += 7) {
916 ConvHWC2CHWMicrokernelTester()
917 .kernel_size(3)
918 .subsampling(2)
919 .padding(1)
920 .input_channels(3) // padded input height of at least 3 required
921 .output_channels_tile(4)
922 .output_channels(output_channels)
923 .input_width(input_width)
924 .input_height(input_height)
925 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
926 }
927 }
928 }
929}
930
931TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_height_gt_3) {
932 TEST_REQUIRES_X86_SSE;
933 for (size_t input_height = 4; input_height <= 9; input_height++) {
934 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
935 for (size_t input_width = 1; input_width < 32; input_width += 7) {
936 ConvHWC2CHWMicrokernelTester()
937 .kernel_size(3)
938 .subsampling(2)
939 .padding_width(1)
940 .input_channels(3)
941 .output_channels_tile(4)
942 .output_channels(output_channels)
943 .input_width(input_width)
944 .input_height(input_height)
945 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
946 }
947 }
948 }
949}
950
951TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, padding_top) {
952 TEST_REQUIRES_X86_SSE;
953 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
954 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
955 for (size_t input_width = 1; input_width < 32; input_width += 7) {
956 ConvHWC2CHWMicrokernelTester()
957 .kernel_size(3)
958 .subsampling(2)
959 .padding_width(1)
960 .padding_top(padding_top)
961 .input_channels(3)
962 .output_channels_tile(4)
963 .output_channels(output_channels)
964 .input_width(input_width)
965 .input_height(9)
966 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
967 }
968 }
969 }
970}
971
972TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, padding_bottom) {
973 TEST_REQUIRES_X86_SSE;
974 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
975 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
976 for (size_t input_width = 1; input_width < 32; input_width += 7) {
977 ConvHWC2CHWMicrokernelTester()
978 .kernel_size(3)
979 .subsampling(2)
980 .padding_width(1)
981 .padding_bottom(padding_bottom)
982 .input_channels(3)
983 .output_channels_tile(4)
984 .output_channels(output_channels)
985 .input_width(input_width)
986 .input_height(9)
987 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
988 }
989 }
990 }
991}
992
993TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_y_start) {
994 TEST_REQUIRES_X86_SSE;
995 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
996 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
997 for (size_t input_width = 1; input_width < 32; input_width += 7) {
998 ConvHWC2CHWMicrokernelTester()
999 .kernel_size(3)
1000 .subsampling(2)
1001 .padding_width(1)
1002 .input_channels(3)
1003 .output_channels_tile(4)
1004 .output_channels(output_channels)
1005 .input_width(input_width)
1006 .input_height(9)
1007 .output_y_start(output_y_start)
1008 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1009 }
1010 }
1011 }
1012}
1013
1014TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_y_end) {
1015 TEST_REQUIRES_X86_SSE;
1016 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1017 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1018 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1019 ConvHWC2CHWMicrokernelTester()
1020 .kernel_size(3)
1021 .subsampling(2)
1022 .padding_width(1)
1023 .input_channels(3)
1024 .output_channels_tile(4)
1025 .output_channels(output_channels)
1026 .input_width(input_width)
1027 .input_height(9)
1028 .output_y_end(output_y_end)
1029 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1030 }
1031 }
1032 }
1033}
1034
1035TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, qmin) {
1036 TEST_REQUIRES_X86_SSE;
1037 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1038 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1039 ConvHWC2CHWMicrokernelTester()
1040 .kernel_size(3)
1041 .subsampling(2)
1042 .padding_width(1)
1043 .input_channels(3)
1044 .output_channels_tile(4)
1045 .output_channels(output_channels)
1046 .input_width(input_width)
1047 .input_height(6)
1048 .qmin(128)
1049 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1050 }
1051 }
1052}
1053
1054TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, qmax) {
1055 TEST_REQUIRES_X86_SSE;
1056 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1057 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1058 ConvHWC2CHWMicrokernelTester()
1059 .kernel_size(3)
1060 .subsampling(2)
1061 .padding_width(1)
1062 .input_channels(3)
1063 .output_channels_tile(4)
1064 .output_channels(output_channels)
1065 .input_width(input_width)
1066 .input_height(6)
1067 .qmax(128)
1068 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1069 }
1070 }
1071}
1072#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1073
Frank Barchard22136062020-11-24 18:44:46 -08001074#if XNN_ARCH_WASMSIMD
1075TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_width_eq_1) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001076 ConvHWC2CHWMicrokernelTester()
1077 .kernel_size(3)
1078 .subsampling(2)
1079 .padding_width(1)
1080 .input_channels(3)
1081 .output_channels_tile(4)
1082 .output_channels(4)
1083 .input_width(4)
1084 .input_height(3)
Frank Barchard22136062020-11-24 18:44:46 -08001085 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001086}
1087
1088
Frank Barchard22136062020-11-24 18:44:46 -08001089TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_width_gt_1) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001090 for (size_t input_width = 2; input_width < 33; input_width++) {
1091 ConvHWC2CHWMicrokernelTester()
1092 .kernel_size(3)
1093 .subsampling(2)
1094 .padding_width(1)
1095 .input_channels(3)
1096 .output_channels_tile(4)
1097 .output_channels(4)
1098 .input_width(input_width)
1099 .input_height(3)
Frank Barchard22136062020-11-24 18:44:46 -08001100 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001101 }
1102}
1103
Frank Barchard22136062020-11-24 18:44:46 -08001104TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_channels_lt_4) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001105 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
1106 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1107 ConvHWC2CHWMicrokernelTester()
1108 .kernel_size(3)
1109 .subsampling(2)
1110 .padding_width(1)
1111 .input_channels(3)
1112 .output_channels_tile(4)
1113 .output_channels(output_channels)
1114 .input_width(input_width)
1115 .input_height(3)
Frank Barchard22136062020-11-24 18:44:46 -08001116 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001117 }
1118 }
1119}
1120
Frank Barchard22136062020-11-24 18:44:46 -08001121TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_channels_div_4) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001122 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
1123 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1124 ConvHWC2CHWMicrokernelTester()
1125 .kernel_size(3)
1126 .subsampling(2)
1127 .padding_width(1)
1128 .input_channels(3)
1129 .output_channels_tile(4)
1130 .output_channels(output_channels)
1131 .input_width(input_width)
1132 .input_height(3)
Frank Barchard22136062020-11-24 18:44:46 -08001133 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001134 }
1135 }
1136}
1137
Frank Barchard22136062020-11-24 18:44:46 -08001138TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_channels_gt_4) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001139 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
1140 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1141 ConvHWC2CHWMicrokernelTester()
1142 .kernel_size(3)
1143 .subsampling(2)
1144 .padding_width(1)
1145 .input_channels(3)
1146 .output_channels_tile(4)
1147 .output_channels(output_channels)
1148 .input_width(input_width)
1149 .input_height(3)
Frank Barchard22136062020-11-24 18:44:46 -08001150 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001151 }
1152 }
1153}
1154
Frank Barchard22136062020-11-24 18:44:46 -08001155TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_height_lt_3) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001156 for (size_t input_height = 1; input_height < 3; input_height++) {
1157 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1158 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1159 ConvHWC2CHWMicrokernelTester()
1160 .kernel_size(3)
1161 .subsampling(2)
1162 .padding(1)
1163 .input_channels(3) // padded input height of at least 3 required
1164 .output_channels_tile(4)
1165 .output_channels(output_channels)
1166 .input_width(input_width)
1167 .input_height(input_height)
Frank Barchard22136062020-11-24 18:44:46 -08001168 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001169 }
1170 }
1171 }
1172}
1173
Frank Barchard22136062020-11-24 18:44:46 -08001174TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_height_gt_3) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001175 for (size_t input_height = 4; input_height <= 9; input_height++) {
1176 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1177 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1178 ConvHWC2CHWMicrokernelTester()
1179 .kernel_size(3)
1180 .subsampling(2)
1181 .padding_width(1)
1182 .input_channels(3)
1183 .output_channels_tile(4)
1184 .output_channels(output_channels)
1185 .input_width(input_width)
1186 .input_height(input_height)
Frank Barchard22136062020-11-24 18:44:46 -08001187 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001188 }
1189 }
1190 }
1191}
1192
Frank Barchard22136062020-11-24 18:44:46 -08001193TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, padding_top) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001194 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1195 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1196 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1197 ConvHWC2CHWMicrokernelTester()
1198 .kernel_size(3)
1199 .subsampling(2)
1200 .padding_width(1)
1201 .padding_top(padding_top)
1202 .input_channels(3)
1203 .output_channels_tile(4)
1204 .output_channels(output_channels)
1205 .input_width(input_width)
1206 .input_height(9)
Frank Barchard22136062020-11-24 18:44:46 -08001207 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001208 }
1209 }
1210 }
1211}
1212
Frank Barchard22136062020-11-24 18:44:46 -08001213TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, padding_bottom) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001214 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1215 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1216 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1217 ConvHWC2CHWMicrokernelTester()
1218 .kernel_size(3)
1219 .subsampling(2)
1220 .padding_width(1)
1221 .padding_bottom(padding_bottom)
1222 .input_channels(3)
1223 .output_channels_tile(4)
1224 .output_channels(output_channels)
1225 .input_width(input_width)
1226 .input_height(9)
Frank Barchard22136062020-11-24 18:44:46 -08001227 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001228 }
1229 }
1230 }
1231}
1232
Frank Barchard22136062020-11-24 18:44:46 -08001233TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_y_start) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001234 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1235 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1236 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1237 ConvHWC2CHWMicrokernelTester()
1238 .kernel_size(3)
1239 .subsampling(2)
1240 .padding_width(1)
1241 .input_channels(3)
1242 .output_channels_tile(4)
1243 .output_channels(output_channels)
1244 .input_width(input_width)
1245 .input_height(9)
1246 .output_y_start(output_y_start)
Frank Barchard22136062020-11-24 18:44:46 -08001247 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001248 }
1249 }
1250 }
1251}
1252
Frank Barchard22136062020-11-24 18:44:46 -08001253TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_y_end) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001254 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1255 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1256 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1257 ConvHWC2CHWMicrokernelTester()
1258 .kernel_size(3)
1259 .subsampling(2)
1260 .padding_width(1)
1261 .input_channels(3)
1262 .output_channels_tile(4)
1263 .output_channels(output_channels)
1264 .input_width(input_width)
1265 .input_height(9)
1266 .output_y_end(output_y_end)
Frank Barchard22136062020-11-24 18:44:46 -08001267 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001268 }
1269 }
1270 }
1271}
1272
Frank Barchard22136062020-11-24 18:44:46 -08001273TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, qmin) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001274 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1275 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1276 ConvHWC2CHWMicrokernelTester()
1277 .kernel_size(3)
1278 .subsampling(2)
1279 .padding_width(1)
1280 .input_channels(3)
1281 .output_channels_tile(4)
1282 .output_channels(output_channels)
1283 .input_width(input_width)
1284 .input_height(6)
1285 .qmin(128)
Frank Barchard22136062020-11-24 18:44:46 -08001286 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001287 }
1288 }
1289}
1290
Frank Barchard22136062020-11-24 18:44:46 -08001291TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, qmax) {
Erich Elsen0a1970e2020-06-10 09:24:59 -07001292 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1293 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1294 ConvHWC2CHWMicrokernelTester()
1295 .kernel_size(3)
1296 .subsampling(2)
1297 .padding_width(1)
1298 .input_channels(3)
1299 .output_channels_tile(4)
1300 .output_channels(output_channels)
1301 .input_width(input_width)
1302 .input_height(6)
1303 .qmax(128)
Frank Barchard22136062020-11-24 18:44:46 -08001304 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen0a1970e2020-06-10 09:24:59 -07001305 }
1306 }
1307}
Frank Barchard22136062020-11-24 18:44:46 -08001308#endif // XNN_ARCH_WASMSIMD
Erich Elsen0a1970e2020-06-10 09:24:59 -07001309
Marat Dukhan1f29b802020-05-15 23:46:39 -07001310TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_width_eq_1) {
1311 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001312 .kernel_size(3)
1313 .subsampling(2)
1314 .padding_width(1)
1315 .input_channels(3)
1316 .output_channels_tile(4)
1317 .output_channels(4)
1318 .input_width(4)
1319 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001320 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001321}
1322
1323
Marat Dukhan1f29b802020-05-15 23:46:39 -07001324TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_width_gt_1) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001325 for (size_t input_width = 2; input_width < 33; input_width++) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001326 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001327 .kernel_size(3)
1328 .subsampling(2)
1329 .padding_width(1)
1330 .input_channels(3)
1331 .output_channels_tile(4)
1332 .output_channels(4)
1333 .input_width(input_width)
1334 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001335 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001336 }
1337}
1338
Marat Dukhan1f29b802020-05-15 23:46:39 -07001339TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_lt_4) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001340 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
1341 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001342 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001343 .kernel_size(3)
1344 .subsampling(2)
1345 .padding_width(1)
1346 .input_channels(3)
1347 .output_channels_tile(4)
1348 .output_channels(output_channels)
1349 .input_width(input_width)
1350 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001351 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001352 }
1353 }
1354}
1355
Marat Dukhan1f29b802020-05-15 23:46:39 -07001356TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_div_4) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001357 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
1358 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001359 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001360 .kernel_size(3)
1361 .subsampling(2)
1362 .padding_width(1)
1363 .input_channels(3)
1364 .output_channels_tile(4)
1365 .output_channels(output_channels)
1366 .input_width(input_width)
1367 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001368 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001369 }
1370 }
1371}
1372
Marat Dukhan1f29b802020-05-15 23:46:39 -07001373TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_gt_4) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001374 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
1375 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001376 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001377 .kernel_size(3)
1378 .subsampling(2)
1379 .padding_width(1)
1380 .input_channels(3)
1381 .output_channels_tile(4)
1382 .output_channels(output_channels)
1383 .input_width(input_width)
1384 .input_height(3)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001385 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001386 }
1387 }
1388}
1389
Marat Dukhan1f29b802020-05-15 23:46:39 -07001390TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_height_lt_3) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001391 for (size_t input_height = 1; input_height < 3; input_height++) {
1392 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1393 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001394 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001395 .kernel_size(3)
1396 .subsampling(2)
1397 .padding(1)
1398 .input_channels(3) // padded input height of at least 3 required
1399 .output_channels_tile(4)
1400 .output_channels(output_channels)
1401 .input_width(input_width)
1402 .input_height(input_height)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001403 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001404 }
1405 }
1406 }
1407}
1408
Marat Dukhan1f29b802020-05-15 23:46:39 -07001409TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_height_gt_3) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001410 for (size_t input_height = 4; input_height <= 9; input_height++) {
1411 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1412 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001413 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001414 .kernel_size(3)
1415 .subsampling(2)
1416 .padding_width(1)
1417 .input_channels(3)
1418 .output_channels_tile(4)
1419 .output_channels(output_channels)
1420 .input_width(input_width)
1421 .input_height(input_height)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001422 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001423 }
1424 }
1425 }
1426}
1427
Marat Dukhan1f29b802020-05-15 23:46:39 -07001428TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, padding_top) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001429 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1430 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1431 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001432 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001433 .kernel_size(3)
1434 .subsampling(2)
1435 .padding_width(1)
1436 .padding_top(padding_top)
1437 .input_channels(3)
1438 .output_channels_tile(4)
1439 .output_channels(output_channels)
1440 .input_width(input_width)
1441 .input_height(9)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001442 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001443 }
1444 }
1445 }
1446}
1447
Marat Dukhan1f29b802020-05-15 23:46:39 -07001448TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, padding_bottom) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001449 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1450 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1451 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001452 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001453 .kernel_size(3)
1454 .subsampling(2)
1455 .padding_width(1)
1456 .padding_bottom(padding_bottom)
1457 .input_channels(3)
1458 .output_channels_tile(4)
1459 .output_channels(output_channels)
1460 .input_width(input_width)
1461 .input_height(9)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001462 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001463 }
1464 }
1465 }
1466}
1467
Marat Dukhan1f29b802020-05-15 23:46:39 -07001468TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_y_start) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001469 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1470 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1471 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001472 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001473 .kernel_size(3)
1474 .subsampling(2)
1475 .padding_width(1)
1476 .input_channels(3)
1477 .output_channels_tile(4)
1478 .output_channels(output_channels)
1479 .input_width(input_width)
1480 .input_height(9)
1481 .output_y_start(output_y_start)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001482 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001483 }
1484 }
1485 }
1486}
1487
Marat Dukhan1f29b802020-05-15 23:46:39 -07001488TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_y_end) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001489 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1490 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1491 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001492 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001493 .kernel_size(3)
1494 .subsampling(2)
1495 .padding_width(1)
1496 .input_channels(3)
1497 .output_channels_tile(4)
1498 .output_channels(output_channels)
1499 .input_width(input_width)
1500 .input_height(9)
1501 .output_y_end(output_y_end)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001502 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001503 }
1504 }
1505 }
1506}
1507
Marat Dukhan1f29b802020-05-15 23:46:39 -07001508TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, qmin) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001509 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1510 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001511 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001512 .kernel_size(3)
1513 .subsampling(2)
1514 .padding_width(1)
1515 .input_channels(3)
1516 .output_channels_tile(4)
1517 .output_channels(output_channels)
1518 .input_width(input_width)
1519 .input_height(6)
1520 .qmin(128)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001521 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001522 }
1523 }
1524}
1525
Marat Dukhan1f29b802020-05-15 23:46:39 -07001526TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, qmax) {
Erich Elsen563df5f2019-10-23 08:02:21 -07001527 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1528 for (size_t input_width = 1; input_width < 32; input_width += 7) {
Marat Dukhan1f29b802020-05-15 23:46:39 -07001529 ConvHWC2CHWMicrokernelTester()
Erich Elsen563df5f2019-10-23 08:02:21 -07001530 .kernel_size(3)
1531 .subsampling(2)
1532 .padding_width(1)
1533 .input_channels(3)
1534 .output_channels_tile(4)
1535 .output_channels(output_channels)
1536 .input_width(input_width)
1537 .input_height(6)
1538 .qmax(128)
Marat Dukhan1f29b802020-05-15 23:46:39 -07001539 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
Erich Elsen563df5f2019-10-23 08:02:21 -07001540 }
1541 }
1542}