blob: 572b24b36eb9f62798583781f070a433f6cb9b39 [file] [log] [blame]
Frank Barchard5a599a62020-06-04 20:12:44 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f16-dwconv-minmax.yaml
11// Generator: tools/generate-dwconv-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/dwconv.h>
20#include "dwconv-microkernel-tester.h"
21
22
23#if XNN_ARCH_ARM64
24 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_eq_8) {
25 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
26 DWConvMicrokernelTester()
27 .cr(8)
28 .kr(25)
29 .channels(8)
Marat Dukhan645af972022-01-09 22:50:27 -080030 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -070031 }
32
33 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8) {
34 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
35 for (uint32_t channels = 16; channels < 128; channels += 24) {
36 DWConvMicrokernelTester()
37 .cr(8)
38 .kr(25)
39 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -080040 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -070041 }
42 }
43
44 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmin) {
45 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
46 for (uint32_t channels = 16; channels < 128; channels += 24) {
47 DWConvMicrokernelTester()
48 .cr(8)
49 .kr(25)
50 .channels(channels)
51 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -080052 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -070053 }
54 }
55
56 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmax) {
57 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
58 for (uint32_t channels = 16; channels < 128; channels += 24) {
59 DWConvMicrokernelTester()
60 .cr(8)
61 .kr(25)
62 .channels(channels)
63 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -080064 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -070065 }
66 }
67
68 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_lt_8) {
69 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
70 for (uint32_t channels = 1; channels < 8; channels++) {
71 DWConvMicrokernelTester()
72 .cr(8)
73 .kr(25)
74 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -080075 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -070076 }
77 }
78
79 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8) {
80 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
81 for (uint32_t channels = 9; channels < 16; channels++) {
82 DWConvMicrokernelTester()
83 .cr(8)
84 .kr(25)
85 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -080086 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -070087 }
88 }
89
90 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmin) {
91 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
92 for (uint32_t channels = 9; channels < 16; channels++) {
93 DWConvMicrokernelTester()
94 .cr(8)
95 .kr(25)
96 .channels(channels)
97 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -080098 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -070099 }
100 }
101
102 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmax) {
103 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
104 for (uint32_t channels = 9; channels < 16; channels++) {
105 DWConvMicrokernelTester()
106 .cr(8)
107 .kr(25)
108 .channels(channels)
109 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800110 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700111 }
112 }
113
114 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel) {
115 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
116 for (size_t channels = 1; channels <= 40; channels += 7) {
117 DWConvMicrokernelTester()
118 .cr(8)
119 .kr(25)
120 .channels(channels)
121 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -0800122 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700123 }
124 }
125
126 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_step) {
127 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
128 for (size_t channels = 1; channels <= 40; channels += 7) {
129 for (size_t step = 2; step <= 25; step++) {
130 DWConvMicrokernelTester()
131 .cr(8)
132 .kr(25)
133 .channels(channels)
134 .width(3)
135 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -0800136 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700137 }
138 }
139 }
140
141 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_output_stride) {
142 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
143 for (size_t channels = 1; channels <= 40; channels += 7) {
144 DWConvMicrokernelTester()
145 .cr(8)
146 .kr(25)
147 .channels(8)
148 .width(5)
149 .output_stride(43)
Marat Dukhan645af972022-01-09 22:50:27 -0800150 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700151 }
152 }
153
154 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmin) {
155 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
156 for (size_t channels = 1; channels <= 40; channels += 7) {
157 DWConvMicrokernelTester()
158 .cr(8)
159 .kr(25)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800163 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700164 }
165 }
166
167 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmax) {
168 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
169 for (size_t channels = 1; channels <= 40; channels += 7) {
170 DWConvMicrokernelTester()
171 .cr(8)
172 .kr(25)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800176 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700177 }
178 }
179
180 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, input_offset) {
181 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
182 for (uint32_t channels = 16; channels < 128; channels += 24) {
183 DWConvMicrokernelTester()
184 .cr(8)
185 .kr(25)
186 .channels(channels)
187 .input_offset(176)
Marat Dukhan645af972022-01-09 22:50:27 -0800188 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700189 }
190 }
191
192 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, zero) {
193 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
194 for (uint32_t mz = 0; mz < 25; mz++) {
195 for (uint32_t channels = 16; channels < 128; channels += 24) {
196 DWConvMicrokernelTester()
197 .cr(8)
198 .kr(25)
199 .channels(channels)
200 .input_offset(176)
201 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -0800202 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700203 }
204 }
205 }
206#endif // XNN_ARCH_ARM64
207
208
209#if XNN_ARCH_ARM64
210 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_eq_8) {
211 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
212 DWConvMicrokernelTester()
213 .cr(8)
214 .kr(25)
215 .channels(8)
Marat Dukhan645af972022-01-09 22:50:27 -0800216 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700217 }
218
219 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8) {
220 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
221 for (uint32_t channels = 16; channels < 128; channels += 24) {
222 DWConvMicrokernelTester()
223 .cr(8)
224 .kr(25)
225 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800226 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700227 }
228 }
229
230 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
231 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
232 for (uint32_t channels = 16; channels < 128; channels += 24) {
233 DWConvMicrokernelTester()
234 .cr(8)
235 .kr(25)
236 .channels(channels)
237 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800238 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700239 }
240 }
241
242 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
243 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
244 for (uint32_t channels = 16; channels < 128; channels += 24) {
245 DWConvMicrokernelTester()
246 .cr(8)
247 .kr(25)
248 .channels(channels)
249 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800250 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700251 }
252 }
253
254 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_lt_8) {
255 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
256 for (uint32_t channels = 1; channels < 8; channels++) {
257 DWConvMicrokernelTester()
258 .cr(8)
259 .kr(25)
260 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800261 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700262 }
263 }
264
265 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8) {
266 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
267 for (uint32_t channels = 9; channels < 16; channels++) {
268 DWConvMicrokernelTester()
269 .cr(8)
270 .kr(25)
271 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800272 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700273 }
274 }
275
276 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
277 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
278 for (uint32_t channels = 9; channels < 16; channels++) {
279 DWConvMicrokernelTester()
280 .cr(8)
281 .kr(25)
282 .channels(channels)
283 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800284 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700285 }
286 }
287
288 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
289 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
290 for (uint32_t channels = 9; channels < 16; channels++) {
291 DWConvMicrokernelTester()
292 .cr(8)
293 .kr(25)
294 .channels(channels)
295 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800296 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700297 }
298 }
299
300 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel) {
301 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
302 for (size_t channels = 1; channels <= 40; channels += 7) {
303 DWConvMicrokernelTester()
304 .cr(8)
305 .kr(25)
306 .channels(channels)
307 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -0800308 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700309 }
310 }
311
312 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_step) {
313 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
314 for (size_t channels = 1; channels <= 40; channels += 7) {
315 for (size_t step = 2; step <= 25; step++) {
316 DWConvMicrokernelTester()
317 .cr(8)
318 .kr(25)
319 .channels(channels)
320 .width(3)
321 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -0800322 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700323 }
324 }
325 }
326
327 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
328 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
329 for (size_t channels = 1; channels <= 40; channels += 7) {
330 DWConvMicrokernelTester()
331 .cr(8)
332 .kr(25)
333 .channels(8)
334 .width(5)
335 .output_stride(43)
Marat Dukhan645af972022-01-09 22:50:27 -0800336 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700337 }
338 }
339
340 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
341 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
342 for (size_t channels = 1; channels <= 40; channels += 7) {
343 DWConvMicrokernelTester()
344 .cr(8)
345 .kr(25)
346 .channels(channels)
347 .width(3)
348 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800349 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700350 }
351 }
352
353 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
354 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
355 for (size_t channels = 1; channels <= 40; channels += 7) {
356 DWConvMicrokernelTester()
357 .cr(8)
358 .kr(25)
359 .channels(channels)
360 .width(3)
361 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800362 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700363 }
364 }
365
366 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, input_offset) {
367 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
368 for (uint32_t channels = 16; channels < 128; channels += 24) {
369 DWConvMicrokernelTester()
370 .cr(8)
371 .kr(25)
372 .channels(channels)
373 .input_offset(176)
Marat Dukhan645af972022-01-09 22:50:27 -0800374 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700375 }
376 }
377
378 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, zero) {
379 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
380 for (uint32_t mz = 0; mz < 25; mz++) {
381 for (uint32_t channels = 16; channels < 128; channels += 24) {
382 DWConvMicrokernelTester()
383 .cr(8)
384 .kr(25)
385 .channels(channels)
386 .input_offset(176)
387 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -0800388 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700389 }
390 }
391 }
392#endif // XNN_ARCH_ARM64
393
394
395#if XNN_ARCH_ARM64
396 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_eq_16) {
397 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
398 DWConvMicrokernelTester()
399 .cr(16)
400 .kr(25)
401 .channels(16)
Marat Dukhan645af972022-01-09 22:50:27 -0800402 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700403 }
404
405 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16) {
406 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
407 for (uint32_t channels = 32; channels < 256; channels += 48) {
408 DWConvMicrokernelTester()
409 .cr(16)
410 .kr(25)
411 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800412 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700413 }
414 }
415
416 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmin) {
417 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
418 for (uint32_t channels = 32; channels < 256; channels += 48) {
419 DWConvMicrokernelTester()
420 .cr(16)
421 .kr(25)
422 .channels(channels)
423 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800424 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700425 }
426 }
427
428 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmax) {
429 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
430 for (uint32_t channels = 32; channels < 256; channels += 48) {
431 DWConvMicrokernelTester()
432 .cr(16)
433 .kr(25)
434 .channels(channels)
435 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800436 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700437 }
438 }
439
440 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_lt_16) {
441 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
442 for (uint32_t channels = 1; channels < 16; channels++) {
443 DWConvMicrokernelTester()
444 .cr(16)
445 .kr(25)
446 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800447 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700448 }
449 }
450
451 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16) {
452 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
453 for (uint32_t channels = 17; channels < 32; channels++) {
454 DWConvMicrokernelTester()
455 .cr(16)
456 .kr(25)
457 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800458 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700459 }
460 }
461
462 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmin) {
463 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
464 for (uint32_t channels = 17; channels < 32; channels++) {
465 DWConvMicrokernelTester()
466 .cr(16)
467 .kr(25)
468 .channels(channels)
469 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800470 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700471 }
472 }
473
474 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmax) {
475 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
476 for (uint32_t channels = 17; channels < 32; channels++) {
477 DWConvMicrokernelTester()
478 .cr(16)
479 .kr(25)
480 .channels(channels)
481 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800482 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700483 }
484 }
485
486 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel) {
487 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
488 for (size_t channels = 1; channels <= 80; channels += 15) {
489 DWConvMicrokernelTester()
490 .cr(16)
491 .kr(25)
492 .channels(channels)
493 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -0800494 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700495 }
496 }
497
498 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_step) {
499 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
500 for (size_t channels = 1; channels <= 80; channels += 15) {
501 for (size_t step = 2; step <= 25; step++) {
502 DWConvMicrokernelTester()
503 .cr(16)
504 .kr(25)
505 .channels(channels)
506 .width(3)
507 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -0800508 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700509 }
510 }
511 }
512
513 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_output_stride) {
514 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
515 for (size_t channels = 1; channels <= 80; channels += 15) {
516 DWConvMicrokernelTester()
517 .cr(16)
518 .kr(25)
519 .channels(16)
520 .width(5)
521 .output_stride(83)
Marat Dukhan645af972022-01-09 22:50:27 -0800522 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700523 }
524 }
525
526 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmin) {
527 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
528 for (size_t channels = 1; channels <= 80; channels += 15) {
529 DWConvMicrokernelTester()
530 .cr(16)
531 .kr(25)
532 .channels(channels)
533 .width(3)
534 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800535 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700536 }
537 }
538
539 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmax) {
540 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
541 for (size_t channels = 1; channels <= 80; channels += 15) {
542 DWConvMicrokernelTester()
543 .cr(16)
544 .kr(25)
545 .channels(channels)
546 .width(3)
547 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800548 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700549 }
550 }
551
552 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, input_offset) {
553 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
554 for (uint32_t channels = 32; channels < 256; channels += 48) {
555 DWConvMicrokernelTester()
556 .cr(16)
557 .kr(25)
558 .channels(channels)
559 .input_offset(304)
Marat Dukhan645af972022-01-09 22:50:27 -0800560 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700561 }
562 }
563
564 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, zero) {
565 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
566 for (uint32_t mz = 0; mz < 25; mz++) {
567 for (uint32_t channels = 32; channels < 256; channels += 48) {
568 DWConvMicrokernelTester()
569 .cr(16)
570 .kr(25)
571 .channels(channels)
572 .input_offset(304)
573 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -0800574 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700575 }
576 }
577 }
578#endif // XNN_ARCH_ARM64
579
580
581#if XNN_ARCH_ARM64
582 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_eq_16) {
583 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
584 DWConvMicrokernelTester()
585 .cr(16)
586 .kr(25)
587 .channels(16)
Marat Dukhan645af972022-01-09 22:50:27 -0800588 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700589 }
590
591 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16) {
592 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
593 for (uint32_t channels = 32; channels < 256; channels += 48) {
594 DWConvMicrokernelTester()
595 .cr(16)
596 .kr(25)
597 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800598 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700599 }
600 }
601
602 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
603 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
604 for (uint32_t channels = 32; channels < 256; channels += 48) {
605 DWConvMicrokernelTester()
606 .cr(16)
607 .kr(25)
608 .channels(channels)
609 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800610 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700611 }
612 }
613
614 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
615 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
616 for (uint32_t channels = 32; channels < 256; channels += 48) {
617 DWConvMicrokernelTester()
618 .cr(16)
619 .kr(25)
620 .channels(channels)
621 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800622 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700623 }
624 }
625
626 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_lt_16) {
627 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
628 for (uint32_t channels = 1; channels < 16; channels++) {
629 DWConvMicrokernelTester()
630 .cr(16)
631 .kr(25)
632 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800633 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700634 }
635 }
636
637 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16) {
638 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
639 for (uint32_t channels = 17; channels < 32; channels++) {
640 DWConvMicrokernelTester()
641 .cr(16)
642 .kr(25)
643 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800644 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700645 }
646 }
647
648 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
649 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
650 for (uint32_t channels = 17; channels < 32; channels++) {
651 DWConvMicrokernelTester()
652 .cr(16)
653 .kr(25)
654 .channels(channels)
655 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800656 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700657 }
658 }
659
660 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
661 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
662 for (uint32_t channels = 17; channels < 32; channels++) {
663 DWConvMicrokernelTester()
664 .cr(16)
665 .kr(25)
666 .channels(channels)
667 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800668 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700669 }
670 }
671
672 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel) {
673 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
674 for (size_t channels = 1; channels <= 80; channels += 15) {
675 DWConvMicrokernelTester()
676 .cr(16)
677 .kr(25)
678 .channels(channels)
679 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -0800680 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700681 }
682 }
683
684 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_step) {
685 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
686 for (size_t channels = 1; channels <= 80; channels += 15) {
687 for (size_t step = 2; step <= 25; step++) {
688 DWConvMicrokernelTester()
689 .cr(16)
690 .kr(25)
691 .channels(channels)
692 .width(3)
693 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -0800694 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700695 }
696 }
697 }
698
699 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
700 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
701 for (size_t channels = 1; channels <= 80; channels += 15) {
702 DWConvMicrokernelTester()
703 .cr(16)
704 .kr(25)
705 .channels(16)
706 .width(5)
707 .output_stride(83)
Marat Dukhan645af972022-01-09 22:50:27 -0800708 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700709 }
710 }
711
712 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
713 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
714 for (size_t channels = 1; channels <= 80; channels += 15) {
715 DWConvMicrokernelTester()
716 .cr(16)
717 .kr(25)
718 .channels(channels)
719 .width(3)
720 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800721 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700722 }
723 }
724
725 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
726 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
727 for (size_t channels = 1; channels <= 80; channels += 15) {
728 DWConvMicrokernelTester()
729 .cr(16)
730 .kr(25)
731 .channels(channels)
732 .width(3)
733 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800734 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700735 }
736 }
737
738 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, input_offset) {
739 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
740 for (uint32_t channels = 32; channels < 256; channels += 48) {
741 DWConvMicrokernelTester()
742 .cr(16)
743 .kr(25)
744 .channels(channels)
745 .input_offset(304)
Marat Dukhan645af972022-01-09 22:50:27 -0800746 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700747 }
748 }
749
750 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, zero) {
751 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
752 for (uint32_t mz = 0; mz < 25; mz++) {
753 for (uint32_t channels = 32; channels < 256; channels += 48) {
754 DWConvMicrokernelTester()
755 .cr(16)
756 .kr(25)
757 .channels(channels)
758 .input_offset(304)
759 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -0800760 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -0700761 }
762 }
763 }
764#endif // XNN_ARCH_ARM64
765
766
767#if XNN_ARCH_ARM64
Frank Barchardc9f9d672021-10-18 12:51:59 -0700768 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_eq_32) {
769 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
770 DWConvMicrokernelTester()
771 .cr(32)
772 .kr(25)
773 .channels(32)
Marat Dukhan645af972022-01-09 22:50:27 -0800774 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700775 }
776
777 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_div_32) {
778 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
779 for (uint32_t channels = 64; channels < 512; channels += 96) {
780 DWConvMicrokernelTester()
781 .cr(32)
782 .kr(25)
783 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800784 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700785 }
786 }
787
788 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_div_32_with_qmin) {
789 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
790 for (uint32_t channels = 64; channels < 512; channels += 96) {
791 DWConvMicrokernelTester()
792 .cr(32)
793 .kr(25)
794 .channels(channels)
795 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800796 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700797 }
798 }
799
800 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_div_32_with_qmax) {
801 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
802 for (uint32_t channels = 64; channels < 512; channels += 96) {
803 DWConvMicrokernelTester()
804 .cr(32)
805 .kr(25)
806 .channels(channels)
807 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800808 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700809 }
810 }
811
812 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_lt_32) {
813 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
814 for (uint32_t channels = 1; channels < 32; channels++) {
815 DWConvMicrokernelTester()
816 .cr(32)
817 .kr(25)
818 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800819 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700820 }
821 }
822
823 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_gt_32) {
824 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
825 for (uint32_t channels = 33; channels < 64; channels++) {
826 DWConvMicrokernelTester()
827 .cr(32)
828 .kr(25)
829 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800830 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700831 }
832 }
833
834 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_gt_32_with_qmin) {
835 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
836 for (uint32_t channels = 33; channels < 64; channels++) {
837 DWConvMicrokernelTester()
838 .cr(32)
839 .kr(25)
840 .channels(channels)
841 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800842 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700843 }
844 }
845
846 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_gt_32_with_qmax) {
847 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
848 for (uint32_t channels = 33; channels < 64; channels++) {
849 DWConvMicrokernelTester()
850 .cr(32)
851 .kr(25)
852 .channels(channels)
853 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800854 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700855 }
856 }
857
858 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel) {
859 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
860 for (size_t channels = 1; channels <= 160; channels += 31) {
861 DWConvMicrokernelTester()
862 .cr(32)
863 .kr(25)
864 .channels(channels)
865 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -0800866 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700867 }
868 }
869
870 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_step) {
871 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
872 for (size_t channels = 1; channels <= 160; channels += 31) {
873 for (size_t step = 2; step <= 25; step++) {
874 DWConvMicrokernelTester()
875 .cr(32)
876 .kr(25)
877 .channels(channels)
878 .width(3)
879 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -0800880 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700881 }
882 }
883 }
884
885 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_output_stride) {
886 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
887 for (size_t channels = 1; channels <= 160; channels += 31) {
888 DWConvMicrokernelTester()
889 .cr(32)
890 .kr(25)
891 .channels(32)
892 .width(5)
893 .output_stride(163)
Marat Dukhan645af972022-01-09 22:50:27 -0800894 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700895 }
896 }
897
898 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_qmin) {
899 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
900 for (size_t channels = 1; channels <= 160; channels += 31) {
901 DWConvMicrokernelTester()
902 .cr(32)
903 .kr(25)
904 .channels(channels)
905 .width(3)
906 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800907 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700908 }
909 }
910
911 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_qmax) {
912 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
913 for (size_t channels = 1; channels <= 160; channels += 31) {
914 DWConvMicrokernelTester()
915 .cr(32)
916 .kr(25)
917 .channels(channels)
918 .width(3)
919 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800920 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700921 }
922 }
923
924 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, input_offset) {
925 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
926 for (uint32_t channels = 64; channels < 512; channels += 96) {
927 DWConvMicrokernelTester()
928 .cr(32)
929 .kr(25)
930 .channels(channels)
931 .input_offset(592)
Marat Dukhan645af972022-01-09 22:50:27 -0800932 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700933 }
934 }
935
936 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, zero) {
937 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
938 for (uint32_t mz = 0; mz < 25; mz++) {
939 for (uint32_t channels = 64; channels < 512; channels += 96) {
940 DWConvMicrokernelTester()
941 .cr(32)
942 .kr(25)
943 .channels(channels)
944 .input_offset(592)
945 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -0800946 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700947 }
948 }
949 }
950#endif // XNN_ARCH_ARM64
951
952
953#if XNN_ARCH_ARM64
954 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_eq_32) {
955 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
956 DWConvMicrokernelTester()
957 .cr(32)
958 .kr(25)
959 .channels(32)
Marat Dukhan645af972022-01-09 22:50:27 -0800960 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700961 }
962
963 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_div_32) {
964 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
965 for (uint32_t channels = 64; channels < 512; channels += 96) {
966 DWConvMicrokernelTester()
967 .cr(32)
968 .kr(25)
969 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -0800970 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700971 }
972 }
973
974 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_div_32_with_qmin) {
975 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
976 for (uint32_t channels = 64; channels < 512; channels += 96) {
977 DWConvMicrokernelTester()
978 .cr(32)
979 .kr(25)
980 .channels(channels)
981 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800982 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700983 }
984 }
985
986 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_div_32_with_qmax) {
987 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
988 for (uint32_t channels = 64; channels < 512; channels += 96) {
989 DWConvMicrokernelTester()
990 .cr(32)
991 .kr(25)
992 .channels(channels)
993 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -0800994 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -0700995 }
996 }
997
998 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_lt_32) {
999 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1000 for (uint32_t channels = 1; channels < 32; channels++) {
1001 DWConvMicrokernelTester()
1002 .cr(32)
1003 .kr(25)
1004 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001005 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001006 }
1007 }
1008
1009 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_gt_32) {
1010 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1011 for (uint32_t channels = 33; channels < 64; channels++) {
1012 DWConvMicrokernelTester()
1013 .cr(32)
1014 .kr(25)
1015 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001016 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001017 }
1018 }
1019
1020 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) {
1021 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1022 for (uint32_t channels = 33; channels < 64; channels++) {
1023 DWConvMicrokernelTester()
1024 .cr(32)
1025 .kr(25)
1026 .channels(channels)
1027 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001028 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001029 }
1030 }
1031
1032 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) {
1033 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1034 for (uint32_t channels = 33; channels < 64; channels++) {
1035 DWConvMicrokernelTester()
1036 .cr(32)
1037 .kr(25)
1038 .channels(channels)
1039 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001040 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001041 }
1042 }
1043
1044 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel) {
1045 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1046 for (size_t channels = 1; channels <= 160; channels += 31) {
1047 DWConvMicrokernelTester()
1048 .cr(32)
1049 .kr(25)
1050 .channels(channels)
1051 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08001052 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001053 }
1054 }
1055
1056 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_step) {
1057 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1058 for (size_t channels = 1; channels <= 160; channels += 31) {
1059 for (size_t step = 2; step <= 25; step++) {
1060 DWConvMicrokernelTester()
1061 .cr(32)
1062 .kr(25)
1063 .channels(channels)
1064 .width(3)
1065 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08001066 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001067 }
1068 }
1069 }
1070
1071 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
1072 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1073 for (size_t channels = 1; channels <= 160; channels += 31) {
1074 DWConvMicrokernelTester()
1075 .cr(32)
1076 .kr(25)
1077 .channels(32)
1078 .width(5)
1079 .output_stride(163)
Marat Dukhan645af972022-01-09 22:50:27 -08001080 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001081 }
1082 }
1083
1084 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
1085 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1086 for (size_t channels = 1; channels <= 160; channels += 31) {
1087 DWConvMicrokernelTester()
1088 .cr(32)
1089 .kr(25)
1090 .channels(channels)
1091 .width(3)
1092 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001093 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001094 }
1095 }
1096
1097 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
1098 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1099 for (size_t channels = 1; channels <= 160; channels += 31) {
1100 DWConvMicrokernelTester()
1101 .cr(32)
1102 .kr(25)
1103 .channels(channels)
1104 .width(3)
1105 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001106 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001107 }
1108 }
1109
1110 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, input_offset) {
1111 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1112 for (uint32_t channels = 64; channels < 512; channels += 96) {
1113 DWConvMicrokernelTester()
1114 .cr(32)
1115 .kr(25)
1116 .channels(channels)
1117 .input_offset(592)
Marat Dukhan645af972022-01-09 22:50:27 -08001118 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001119 }
1120 }
1121
1122 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, zero) {
1123 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1124 for (uint32_t mz = 0; mz < 25; mz++) {
1125 for (uint32_t channels = 64; channels < 512; channels += 96) {
1126 DWConvMicrokernelTester()
1127 .cr(32)
1128 .kr(25)
1129 .channels(channels)
1130 .input_offset(592)
1131 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08001132 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001133 }
1134 }
1135 }
1136#endif // XNN_ARCH_ARM64
1137
1138
1139#if XNN_ARCH_ARM64
Frank Barchard5a599a62020-06-04 20:12:44 -07001140 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_eq_8) {
1141 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1142 DWConvMicrokernelTester()
1143 .cr(8)
1144 .kr(9)
1145 .channels(8)
Marat Dukhan645af972022-01-09 22:50:27 -08001146 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001147 }
1148
1149 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8) {
1150 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1151 for (uint32_t channels = 16; channels < 128; channels += 24) {
1152 DWConvMicrokernelTester()
1153 .cr(8)
1154 .kr(9)
1155 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001156 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001157 }
1158 }
1159
1160 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmin) {
1161 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1162 for (uint32_t channels = 16; channels < 128; channels += 24) {
1163 DWConvMicrokernelTester()
1164 .cr(8)
1165 .kr(9)
1166 .channels(channels)
1167 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001168 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001169 }
1170 }
1171
1172 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmax) {
1173 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1174 for (uint32_t channels = 16; channels < 128; channels += 24) {
1175 DWConvMicrokernelTester()
1176 .cr(8)
1177 .kr(9)
1178 .channels(channels)
1179 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001180 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001181 }
1182 }
1183
1184 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_lt_8) {
1185 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1186 for (uint32_t channels = 1; channels < 8; channels++) {
1187 DWConvMicrokernelTester()
1188 .cr(8)
1189 .kr(9)
1190 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001191 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001192 }
1193 }
1194
1195 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8) {
1196 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1197 for (uint32_t channels = 9; channels < 16; channels++) {
1198 DWConvMicrokernelTester()
1199 .cr(8)
1200 .kr(9)
1201 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001202 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001203 }
1204 }
1205
1206 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmin) {
1207 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1208 for (uint32_t channels = 9; channels < 16; channels++) {
1209 DWConvMicrokernelTester()
1210 .cr(8)
1211 .kr(9)
1212 .channels(channels)
1213 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001214 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001215 }
1216 }
1217
1218 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmax) {
1219 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1220 for (uint32_t channels = 9; channels < 16; channels++) {
1221 DWConvMicrokernelTester()
1222 .cr(8)
1223 .kr(9)
1224 .channels(channels)
1225 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001226 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001227 }
1228 }
1229
1230 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel) {
1231 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1232 for (size_t channels = 1; channels <= 40; channels += 7) {
1233 DWConvMicrokernelTester()
1234 .cr(8)
1235 .kr(9)
1236 .channels(channels)
1237 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08001238 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001239 }
1240 }
1241
1242 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_step) {
1243 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1244 for (size_t channels = 1; channels <= 40; channels += 7) {
1245 for (size_t step = 2; step <= 9; step++) {
1246 DWConvMicrokernelTester()
1247 .cr(8)
1248 .kr(9)
1249 .channels(channels)
1250 .width(3)
1251 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08001252 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001253 }
1254 }
1255 }
1256
1257 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_output_stride) {
1258 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1259 for (size_t channels = 1; channels <= 40; channels += 7) {
1260 DWConvMicrokernelTester()
1261 .cr(8)
1262 .kr(9)
1263 .channels(8)
1264 .width(5)
1265 .output_stride(43)
Marat Dukhan645af972022-01-09 22:50:27 -08001266 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001267 }
1268 }
1269
1270 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmin) {
1271 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1272 for (size_t channels = 1; channels <= 40; channels += 7) {
1273 DWConvMicrokernelTester()
1274 .cr(8)
1275 .kr(9)
1276 .channels(channels)
1277 .width(3)
1278 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001279 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001280 }
1281 }
1282
1283 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmax) {
1284 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1285 for (size_t channels = 1; channels <= 40; channels += 7) {
1286 DWConvMicrokernelTester()
1287 .cr(8)
1288 .kr(9)
1289 .channels(channels)
1290 .width(3)
1291 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001292 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001293 }
1294 }
1295
1296 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, input_offset) {
1297 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1298 for (uint32_t channels = 16; channels < 128; channels += 24) {
1299 DWConvMicrokernelTester()
1300 .cr(8)
1301 .kr(9)
1302 .channels(channels)
1303 .input_offset(176)
Marat Dukhan645af972022-01-09 22:50:27 -08001304 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001305 }
1306 }
1307
1308 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, zero) {
1309 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1310 for (uint32_t mz = 0; mz < 9; mz++) {
1311 for (uint32_t channels = 16; channels < 128; channels += 24) {
1312 DWConvMicrokernelTester()
1313 .cr(8)
1314 .kr(9)
1315 .channels(channels)
1316 .input_offset(176)
1317 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08001318 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001319 }
1320 }
1321 }
1322#endif // XNN_ARCH_ARM64
1323
1324
1325#if XNN_ARCH_ARM64
1326 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_eq_8) {
1327 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1328 DWConvMicrokernelTester()
1329 .cr(8)
1330 .kr(9)
1331 .channels(8)
Marat Dukhan645af972022-01-09 22:50:27 -08001332 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001333 }
1334
1335 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8) {
1336 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1337 for (uint32_t channels = 16; channels < 128; channels += 24) {
1338 DWConvMicrokernelTester()
1339 .cr(8)
1340 .kr(9)
1341 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001342 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001343 }
1344 }
1345
1346 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
1347 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1348 for (uint32_t channels = 16; channels < 128; channels += 24) {
1349 DWConvMicrokernelTester()
1350 .cr(8)
1351 .kr(9)
1352 .channels(channels)
1353 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001354 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001355 }
1356 }
1357
1358 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
1359 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1360 for (uint32_t channels = 16; channels < 128; channels += 24) {
1361 DWConvMicrokernelTester()
1362 .cr(8)
1363 .kr(9)
1364 .channels(channels)
1365 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001366 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001367 }
1368 }
1369
1370 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_lt_8) {
1371 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1372 for (uint32_t channels = 1; channels < 8; channels++) {
1373 DWConvMicrokernelTester()
1374 .cr(8)
1375 .kr(9)
1376 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001377 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001378 }
1379 }
1380
1381 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8) {
1382 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1383 for (uint32_t channels = 9; channels < 16; channels++) {
1384 DWConvMicrokernelTester()
1385 .cr(8)
1386 .kr(9)
1387 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001388 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001389 }
1390 }
1391
1392 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
1393 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1394 for (uint32_t channels = 9; channels < 16; channels++) {
1395 DWConvMicrokernelTester()
1396 .cr(8)
1397 .kr(9)
1398 .channels(channels)
1399 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001400 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001401 }
1402 }
1403
1404 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
1405 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1406 for (uint32_t channels = 9; channels < 16; channels++) {
1407 DWConvMicrokernelTester()
1408 .cr(8)
1409 .kr(9)
1410 .channels(channels)
1411 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001412 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001413 }
1414 }
1415
1416 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel) {
1417 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1418 for (size_t channels = 1; channels <= 40; channels += 7) {
1419 DWConvMicrokernelTester()
1420 .cr(8)
1421 .kr(9)
1422 .channels(channels)
1423 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08001424 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001425 }
1426 }
1427
1428 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_step) {
1429 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1430 for (size_t channels = 1; channels <= 40; channels += 7) {
1431 for (size_t step = 2; step <= 9; step++) {
1432 DWConvMicrokernelTester()
1433 .cr(8)
1434 .kr(9)
1435 .channels(channels)
1436 .width(3)
1437 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08001438 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001439 }
1440 }
1441 }
1442
1443 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
1444 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1445 for (size_t channels = 1; channels <= 40; channels += 7) {
1446 DWConvMicrokernelTester()
1447 .cr(8)
1448 .kr(9)
1449 .channels(8)
1450 .width(5)
1451 .output_stride(43)
Marat Dukhan645af972022-01-09 22:50:27 -08001452 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001453 }
1454 }
1455
1456 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
1457 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1458 for (size_t channels = 1; channels <= 40; channels += 7) {
1459 DWConvMicrokernelTester()
1460 .cr(8)
1461 .kr(9)
1462 .channels(channels)
1463 .width(3)
1464 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001465 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001466 }
1467 }
1468
1469 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
1470 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1471 for (size_t channels = 1; channels <= 40; channels += 7) {
1472 DWConvMicrokernelTester()
1473 .cr(8)
1474 .kr(9)
1475 .channels(channels)
1476 .width(3)
1477 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001478 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001479 }
1480 }
1481
1482 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, input_offset) {
1483 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1484 for (uint32_t channels = 16; channels < 128; channels += 24) {
1485 DWConvMicrokernelTester()
1486 .cr(8)
1487 .kr(9)
1488 .channels(channels)
1489 .input_offset(176)
Marat Dukhan645af972022-01-09 22:50:27 -08001490 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001491 }
1492 }
1493
1494 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, zero) {
1495 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1496 for (uint32_t mz = 0; mz < 9; mz++) {
1497 for (uint32_t channels = 16; channels < 128; channels += 24) {
1498 DWConvMicrokernelTester()
1499 .cr(8)
1500 .kr(9)
1501 .channels(channels)
1502 .input_offset(176)
1503 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08001504 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001505 }
1506 }
1507 }
1508#endif // XNN_ARCH_ARM64
1509
1510
1511#if XNN_ARCH_ARM64
1512 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_eq_16) {
1513 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1514 DWConvMicrokernelTester()
1515 .cr(16)
1516 .kr(9)
1517 .channels(16)
Marat Dukhan645af972022-01-09 22:50:27 -08001518 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001519 }
1520
1521 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16) {
1522 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1523 for (uint32_t channels = 32; channels < 256; channels += 48) {
1524 DWConvMicrokernelTester()
1525 .cr(16)
1526 .kr(9)
1527 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001528 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001529 }
1530 }
1531
1532 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmin) {
1533 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1534 for (uint32_t channels = 32; channels < 256; channels += 48) {
1535 DWConvMicrokernelTester()
1536 .cr(16)
1537 .kr(9)
1538 .channels(channels)
1539 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001540 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001541 }
1542 }
1543
1544 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmax) {
1545 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1546 for (uint32_t channels = 32; channels < 256; channels += 48) {
1547 DWConvMicrokernelTester()
1548 .cr(16)
1549 .kr(9)
1550 .channels(channels)
1551 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001552 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001553 }
1554 }
1555
1556 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_lt_16) {
1557 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1558 for (uint32_t channels = 1; channels < 16; channels++) {
1559 DWConvMicrokernelTester()
1560 .cr(16)
1561 .kr(9)
1562 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001563 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001564 }
1565 }
1566
1567 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16) {
1568 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1569 for (uint32_t channels = 17; channels < 32; channels++) {
1570 DWConvMicrokernelTester()
1571 .cr(16)
1572 .kr(9)
1573 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001574 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001575 }
1576 }
1577
1578 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmin) {
1579 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1580 for (uint32_t channels = 17; channels < 32; channels++) {
1581 DWConvMicrokernelTester()
1582 .cr(16)
1583 .kr(9)
1584 .channels(channels)
1585 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001586 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001587 }
1588 }
1589
1590 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmax) {
1591 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1592 for (uint32_t channels = 17; channels < 32; channels++) {
1593 DWConvMicrokernelTester()
1594 .cr(16)
1595 .kr(9)
1596 .channels(channels)
1597 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001598 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001599 }
1600 }
1601
1602 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel) {
1603 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1604 for (size_t channels = 1; channels <= 80; channels += 15) {
1605 DWConvMicrokernelTester()
1606 .cr(16)
1607 .kr(9)
1608 .channels(channels)
1609 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08001610 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001611 }
1612 }
1613
1614 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_step) {
1615 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1616 for (size_t channels = 1; channels <= 80; channels += 15) {
1617 for (size_t step = 2; step <= 9; step++) {
1618 DWConvMicrokernelTester()
1619 .cr(16)
1620 .kr(9)
1621 .channels(channels)
1622 .width(3)
1623 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08001624 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001625 }
1626 }
1627 }
1628
1629 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_output_stride) {
1630 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1631 for (size_t channels = 1; channels <= 80; channels += 15) {
1632 DWConvMicrokernelTester()
1633 .cr(16)
1634 .kr(9)
1635 .channels(16)
1636 .width(5)
1637 .output_stride(83)
Marat Dukhan645af972022-01-09 22:50:27 -08001638 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001639 }
1640 }
1641
1642 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmin) {
1643 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1644 for (size_t channels = 1; channels <= 80; channels += 15) {
1645 DWConvMicrokernelTester()
1646 .cr(16)
1647 .kr(9)
1648 .channels(channels)
1649 .width(3)
1650 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001651 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001652 }
1653 }
1654
1655 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmax) {
1656 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1657 for (size_t channels = 1; channels <= 80; channels += 15) {
1658 DWConvMicrokernelTester()
1659 .cr(16)
1660 .kr(9)
1661 .channels(channels)
1662 .width(3)
1663 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001664 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001665 }
1666 }
1667
1668 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, input_offset) {
1669 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1670 for (uint32_t channels = 32; channels < 256; channels += 48) {
1671 DWConvMicrokernelTester()
1672 .cr(16)
1673 .kr(9)
1674 .channels(channels)
1675 .input_offset(304)
Marat Dukhan645af972022-01-09 22:50:27 -08001676 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001677 }
1678 }
1679
1680 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, zero) {
1681 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1682 for (uint32_t mz = 0; mz < 9; mz++) {
1683 for (uint32_t channels = 32; channels < 256; channels += 48) {
1684 DWConvMicrokernelTester()
1685 .cr(16)
1686 .kr(9)
1687 .channels(channels)
1688 .input_offset(304)
1689 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08001690 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001691 }
1692 }
1693 }
1694#endif // XNN_ARCH_ARM64
1695
1696
1697#if XNN_ARCH_ARM64
1698 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_eq_16) {
1699 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1700 DWConvMicrokernelTester()
1701 .cr(16)
1702 .kr(9)
1703 .channels(16)
Marat Dukhan645af972022-01-09 22:50:27 -08001704 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001705 }
1706
1707 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16) {
1708 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1709 for (uint32_t channels = 32; channels < 256; channels += 48) {
1710 DWConvMicrokernelTester()
1711 .cr(16)
1712 .kr(9)
1713 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001714 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001715 }
1716 }
1717
1718 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
1719 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1720 for (uint32_t channels = 32; channels < 256; channels += 48) {
1721 DWConvMicrokernelTester()
1722 .cr(16)
1723 .kr(9)
1724 .channels(channels)
1725 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001726 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001727 }
1728 }
1729
1730 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
1731 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1732 for (uint32_t channels = 32; channels < 256; channels += 48) {
1733 DWConvMicrokernelTester()
1734 .cr(16)
1735 .kr(9)
1736 .channels(channels)
1737 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001738 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001739 }
1740 }
1741
1742 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_lt_16) {
1743 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1744 for (uint32_t channels = 1; channels < 16; channels++) {
1745 DWConvMicrokernelTester()
1746 .cr(16)
1747 .kr(9)
1748 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001749 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001750 }
1751 }
1752
1753 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16) {
1754 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1755 for (uint32_t channels = 17; channels < 32; channels++) {
1756 DWConvMicrokernelTester()
1757 .cr(16)
1758 .kr(9)
1759 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001760 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001761 }
1762 }
1763
1764 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
1765 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1766 for (uint32_t channels = 17; channels < 32; channels++) {
1767 DWConvMicrokernelTester()
1768 .cr(16)
1769 .kr(9)
1770 .channels(channels)
1771 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001772 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001773 }
1774 }
1775
1776 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
1777 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1778 for (uint32_t channels = 17; channels < 32; channels++) {
1779 DWConvMicrokernelTester()
1780 .cr(16)
1781 .kr(9)
1782 .channels(channels)
1783 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001784 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001785 }
1786 }
1787
1788 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel) {
1789 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1790 for (size_t channels = 1; channels <= 80; channels += 15) {
1791 DWConvMicrokernelTester()
1792 .cr(16)
1793 .kr(9)
1794 .channels(channels)
1795 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08001796 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001797 }
1798 }
1799
1800 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_step) {
1801 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1802 for (size_t channels = 1; channels <= 80; channels += 15) {
1803 for (size_t step = 2; step <= 9; step++) {
1804 DWConvMicrokernelTester()
1805 .cr(16)
1806 .kr(9)
1807 .channels(channels)
1808 .width(3)
1809 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08001810 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001811 }
1812 }
1813 }
1814
1815 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
1816 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1817 for (size_t channels = 1; channels <= 80; channels += 15) {
1818 DWConvMicrokernelTester()
1819 .cr(16)
1820 .kr(9)
1821 .channels(16)
1822 .width(5)
1823 .output_stride(83)
Marat Dukhan645af972022-01-09 22:50:27 -08001824 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001825 }
1826 }
1827
1828 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
1829 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1830 for (size_t channels = 1; channels <= 80; channels += 15) {
1831 DWConvMicrokernelTester()
1832 .cr(16)
1833 .kr(9)
1834 .channels(channels)
1835 .width(3)
1836 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001837 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001838 }
1839 }
1840
1841 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
1842 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1843 for (size_t channels = 1; channels <= 80; channels += 15) {
1844 DWConvMicrokernelTester()
1845 .cr(16)
1846 .kr(9)
1847 .channels(channels)
1848 .width(3)
1849 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001850 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001851 }
1852 }
1853
1854 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, input_offset) {
1855 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1856 for (uint32_t channels = 32; channels < 256; channels += 48) {
1857 DWConvMicrokernelTester()
1858 .cr(16)
1859 .kr(9)
1860 .channels(channels)
1861 .input_offset(304)
Marat Dukhan645af972022-01-09 22:50:27 -08001862 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001863 }
1864 }
1865
1866 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, zero) {
1867 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1868 for (uint32_t mz = 0; mz < 9; mz++) {
1869 for (uint32_t channels = 32; channels < 256; channels += 48) {
1870 DWConvMicrokernelTester()
1871 .cr(16)
1872 .kr(9)
1873 .channels(channels)
1874 .input_offset(304)
1875 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08001876 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07001877 }
1878 }
1879 }
1880#endif // XNN_ARCH_ARM64
1881
1882
1883#if XNN_ARCH_ARM64
Frank Barchardc9f9d672021-10-18 12:51:59 -07001884 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_eq_32) {
1885 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1886 DWConvMicrokernelTester()
1887 .cr(32)
1888 .kr(9)
1889 .channels(32)
Marat Dukhan645af972022-01-09 22:50:27 -08001890 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001891 }
1892
1893 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_div_32) {
1894 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1895 for (uint32_t channels = 64; channels < 512; channels += 96) {
1896 DWConvMicrokernelTester()
1897 .cr(32)
1898 .kr(9)
1899 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001900 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001901 }
1902 }
1903
1904 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_div_32_with_qmin) {
1905 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1906 for (uint32_t channels = 64; channels < 512; channels += 96) {
1907 DWConvMicrokernelTester()
1908 .cr(32)
1909 .kr(9)
1910 .channels(channels)
1911 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001912 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001913 }
1914 }
1915
1916 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_div_32_with_qmax) {
1917 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1918 for (uint32_t channels = 64; channels < 512; channels += 96) {
1919 DWConvMicrokernelTester()
1920 .cr(32)
1921 .kr(9)
1922 .channels(channels)
1923 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001924 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001925 }
1926 }
1927
1928 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_lt_32) {
1929 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1930 for (uint32_t channels = 1; channels < 32; channels++) {
1931 DWConvMicrokernelTester()
1932 .cr(32)
1933 .kr(9)
1934 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001935 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001936 }
1937 }
1938
1939 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_gt_32) {
1940 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1941 for (uint32_t channels = 33; channels < 64; channels++) {
1942 DWConvMicrokernelTester()
1943 .cr(32)
1944 .kr(9)
1945 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08001946 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001947 }
1948 }
1949
1950 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_gt_32_with_qmin) {
1951 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1952 for (uint32_t channels = 33; channels < 64; channels++) {
1953 DWConvMicrokernelTester()
1954 .cr(32)
1955 .kr(9)
1956 .channels(channels)
1957 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001958 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001959 }
1960 }
1961
1962 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_gt_32_with_qmax) {
1963 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1964 for (uint32_t channels = 33; channels < 64; channels++) {
1965 DWConvMicrokernelTester()
1966 .cr(32)
1967 .kr(9)
1968 .channels(channels)
1969 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08001970 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001971 }
1972 }
1973
1974 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel) {
1975 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1976 for (size_t channels = 1; channels <= 160; channels += 31) {
1977 DWConvMicrokernelTester()
1978 .cr(32)
1979 .kr(9)
1980 .channels(channels)
1981 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08001982 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001983 }
1984 }
1985
1986 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_step) {
1987 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1988 for (size_t channels = 1; channels <= 160; channels += 31) {
1989 for (size_t step = 2; step <= 9; step++) {
1990 DWConvMicrokernelTester()
1991 .cr(32)
1992 .kr(9)
1993 .channels(channels)
1994 .width(3)
1995 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08001996 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07001997 }
1998 }
1999 }
2000
2001 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_output_stride) {
2002 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2003 for (size_t channels = 1; channels <= 160; channels += 31) {
2004 DWConvMicrokernelTester()
2005 .cr(32)
2006 .kr(9)
2007 .channels(32)
2008 .width(5)
2009 .output_stride(163)
Marat Dukhan645af972022-01-09 22:50:27 -08002010 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002011 }
2012 }
2013
2014 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_qmin) {
2015 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2016 for (size_t channels = 1; channels <= 160; channels += 31) {
2017 DWConvMicrokernelTester()
2018 .cr(32)
2019 .kr(9)
2020 .channels(channels)
2021 .width(3)
2022 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002023 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002024 }
2025 }
2026
2027 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_qmax) {
2028 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2029 for (size_t channels = 1; channels <= 160; channels += 31) {
2030 DWConvMicrokernelTester()
2031 .cr(32)
2032 .kr(9)
2033 .channels(channels)
2034 .width(3)
2035 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002036 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002037 }
2038 }
2039
2040 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, input_offset) {
2041 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2042 for (uint32_t channels = 64; channels < 512; channels += 96) {
2043 DWConvMicrokernelTester()
2044 .cr(32)
2045 .kr(9)
2046 .channels(channels)
2047 .input_offset(592)
Marat Dukhan645af972022-01-09 22:50:27 -08002048 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002049 }
2050 }
2051
2052 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, zero) {
2053 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2054 for (uint32_t mz = 0; mz < 9; mz++) {
2055 for (uint32_t channels = 64; channels < 512; channels += 96) {
2056 DWConvMicrokernelTester()
2057 .cr(32)
2058 .kr(9)
2059 .channels(channels)
2060 .input_offset(592)
2061 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08002062 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002063 }
2064 }
2065 }
2066#endif // XNN_ARCH_ARM64
2067
2068
2069#if XNN_ARCH_ARM64
2070 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_eq_32) {
2071 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2072 DWConvMicrokernelTester()
2073 .cr(32)
2074 .kr(9)
2075 .channels(32)
Marat Dukhan645af972022-01-09 22:50:27 -08002076 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002077 }
2078
2079 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_div_32) {
2080 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2081 for (uint32_t channels = 64; channels < 512; channels += 96) {
2082 DWConvMicrokernelTester()
2083 .cr(32)
2084 .kr(9)
2085 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002086 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002087 }
2088 }
2089
2090 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_div_32_with_qmin) {
2091 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2092 for (uint32_t channels = 64; channels < 512; channels += 96) {
2093 DWConvMicrokernelTester()
2094 .cr(32)
2095 .kr(9)
2096 .channels(channels)
2097 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002098 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002099 }
2100 }
2101
2102 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_div_32_with_qmax) {
2103 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2104 for (uint32_t channels = 64; channels < 512; channels += 96) {
2105 DWConvMicrokernelTester()
2106 .cr(32)
2107 .kr(9)
2108 .channels(channels)
2109 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002110 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002111 }
2112 }
2113
2114 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_lt_32) {
2115 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2116 for (uint32_t channels = 1; channels < 32; channels++) {
2117 DWConvMicrokernelTester()
2118 .cr(32)
2119 .kr(9)
2120 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002121 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002122 }
2123 }
2124
2125 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_gt_32) {
2126 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2127 for (uint32_t channels = 33; channels < 64; channels++) {
2128 DWConvMicrokernelTester()
2129 .cr(32)
2130 .kr(9)
2131 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002132 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002133 }
2134 }
2135
2136 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) {
2137 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2138 for (uint32_t channels = 33; channels < 64; channels++) {
2139 DWConvMicrokernelTester()
2140 .cr(32)
2141 .kr(9)
2142 .channels(channels)
2143 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002144 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002145 }
2146 }
2147
2148 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) {
2149 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2150 for (uint32_t channels = 33; channels < 64; channels++) {
2151 DWConvMicrokernelTester()
2152 .cr(32)
2153 .kr(9)
2154 .channels(channels)
2155 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002156 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002157 }
2158 }
2159
2160 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel) {
2161 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2162 for (size_t channels = 1; channels <= 160; channels += 31) {
2163 DWConvMicrokernelTester()
2164 .cr(32)
2165 .kr(9)
2166 .channels(channels)
2167 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08002168 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002169 }
2170 }
2171
2172 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_step) {
2173 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2174 for (size_t channels = 1; channels <= 160; channels += 31) {
2175 for (size_t step = 2; step <= 9; step++) {
2176 DWConvMicrokernelTester()
2177 .cr(32)
2178 .kr(9)
2179 .channels(channels)
2180 .width(3)
2181 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08002182 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002183 }
2184 }
2185 }
2186
2187 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
2188 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2189 for (size_t channels = 1; channels <= 160; channels += 31) {
2190 DWConvMicrokernelTester()
2191 .cr(32)
2192 .kr(9)
2193 .channels(32)
2194 .width(5)
2195 .output_stride(163)
Marat Dukhan645af972022-01-09 22:50:27 -08002196 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002197 }
2198 }
2199
2200 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
2201 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2202 for (size_t channels = 1; channels <= 160; channels += 31) {
2203 DWConvMicrokernelTester()
2204 .cr(32)
2205 .kr(9)
2206 .channels(channels)
2207 .width(3)
2208 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002209 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002210 }
2211 }
2212
2213 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
2214 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2215 for (size_t channels = 1; channels <= 160; channels += 31) {
2216 DWConvMicrokernelTester()
2217 .cr(32)
2218 .kr(9)
2219 .channels(channels)
2220 .width(3)
2221 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002222 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002223 }
2224 }
2225
2226 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, input_offset) {
2227 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2228 for (uint32_t channels = 64; channels < 512; channels += 96) {
2229 DWConvMicrokernelTester()
2230 .cr(32)
2231 .kr(9)
2232 .channels(channels)
2233 .input_offset(592)
Marat Dukhan645af972022-01-09 22:50:27 -08002234 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002235 }
2236 }
2237
2238 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, zero) {
2239 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2240 for (uint32_t mz = 0; mz < 9; mz++) {
2241 for (uint32_t channels = 64; channels < 512; channels += 96) {
2242 DWConvMicrokernelTester()
2243 .cr(32)
2244 .kr(9)
2245 .channels(channels)
2246 .input_offset(592)
2247 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08002248 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07002249 }
2250 }
2251 }
2252#endif // XNN_ARCH_ARM64
2253
2254
2255#if XNN_ARCH_ARM64
Frank Barchard5a599a62020-06-04 20:12:44 -07002256 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_eq_8) {
2257 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2258 DWConvMicrokernelTester()
2259 .cr(8)
2260 .kr(4)
2261 .channels(8)
Marat Dukhan645af972022-01-09 22:50:27 -08002262 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002263 }
2264
2265 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8) {
2266 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2267 for (uint32_t channels = 16; channels < 128; channels += 24) {
2268 DWConvMicrokernelTester()
2269 .cr(8)
2270 .kr(4)
2271 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002272 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002273 }
2274 }
2275
2276 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmin) {
2277 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2278 for (uint32_t channels = 16; channels < 128; channels += 24) {
2279 DWConvMicrokernelTester()
2280 .cr(8)
2281 .kr(4)
2282 .channels(channels)
2283 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002284 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002285 }
2286 }
2287
2288 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmax) {
2289 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2290 for (uint32_t channels = 16; channels < 128; channels += 24) {
2291 DWConvMicrokernelTester()
2292 .cr(8)
2293 .kr(4)
2294 .channels(channels)
2295 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002296 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002297 }
2298 }
2299
2300 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_lt_8) {
2301 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2302 for (uint32_t channels = 1; channels < 8; channels++) {
2303 DWConvMicrokernelTester()
2304 .cr(8)
2305 .kr(4)
2306 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002307 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002308 }
2309 }
2310
2311 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8) {
2312 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2313 for (uint32_t channels = 9; channels < 16; channels++) {
2314 DWConvMicrokernelTester()
2315 .cr(8)
2316 .kr(4)
2317 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002318 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002319 }
2320 }
2321
2322 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmin) {
2323 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2324 for (uint32_t channels = 9; channels < 16; channels++) {
2325 DWConvMicrokernelTester()
2326 .cr(8)
2327 .kr(4)
2328 .channels(channels)
2329 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002330 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002331 }
2332 }
2333
2334 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmax) {
2335 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2336 for (uint32_t channels = 9; channels < 16; channels++) {
2337 DWConvMicrokernelTester()
2338 .cr(8)
2339 .kr(4)
2340 .channels(channels)
2341 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002342 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002343 }
2344 }
2345
2346 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel) {
2347 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2348 for (size_t channels = 1; channels <= 40; channels += 7) {
2349 DWConvMicrokernelTester()
2350 .cr(8)
2351 .kr(4)
2352 .channels(channels)
2353 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08002354 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002355 }
2356 }
2357
2358 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_step) {
2359 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2360 for (size_t channels = 1; channels <= 40; channels += 7) {
2361 for (size_t step = 2; step <= 4; step++) {
2362 DWConvMicrokernelTester()
2363 .cr(8)
2364 .kr(4)
2365 .channels(channels)
2366 .width(3)
2367 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08002368 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002369 }
2370 }
2371 }
2372
2373 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_output_stride) {
2374 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2375 for (size_t channels = 1; channels <= 40; channels += 7) {
2376 DWConvMicrokernelTester()
2377 .cr(8)
2378 .kr(4)
2379 .channels(8)
2380 .width(5)
2381 .output_stride(43)
Marat Dukhan645af972022-01-09 22:50:27 -08002382 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002383 }
2384 }
2385
2386 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmin) {
2387 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2388 for (size_t channels = 1; channels <= 40; channels += 7) {
2389 DWConvMicrokernelTester()
2390 .cr(8)
2391 .kr(4)
2392 .channels(channels)
2393 .width(3)
2394 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002395 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002396 }
2397 }
2398
2399 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmax) {
2400 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2401 for (size_t channels = 1; channels <= 40; channels += 7) {
2402 DWConvMicrokernelTester()
2403 .cr(8)
2404 .kr(4)
2405 .channels(channels)
2406 .width(3)
2407 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002408 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002409 }
2410 }
2411
2412 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, input_offset) {
2413 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2414 for (uint32_t channels = 16; channels < 128; channels += 24) {
2415 DWConvMicrokernelTester()
2416 .cr(8)
2417 .kr(4)
2418 .channels(channels)
2419 .input_offset(176)
Marat Dukhan645af972022-01-09 22:50:27 -08002420 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002421 }
2422 }
2423
2424 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, zero) {
2425 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2426 for (uint32_t mz = 0; mz < 4; mz++) {
2427 for (uint32_t channels = 16; channels < 128; channels += 24) {
2428 DWConvMicrokernelTester()
2429 .cr(8)
2430 .kr(4)
2431 .channels(channels)
2432 .input_offset(176)
2433 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08002434 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002435 }
2436 }
2437 }
2438#endif // XNN_ARCH_ARM64
2439
2440
2441#if XNN_ARCH_ARM64
2442 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_eq_8) {
2443 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2444 DWConvMicrokernelTester()
2445 .cr(8)
2446 .kr(4)
2447 .channels(8)
Marat Dukhan645af972022-01-09 22:50:27 -08002448 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002449 }
2450
2451 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8) {
2452 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2453 for (uint32_t channels = 16; channels < 128; channels += 24) {
2454 DWConvMicrokernelTester()
2455 .cr(8)
2456 .kr(4)
2457 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002458 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002459 }
2460 }
2461
2462 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
2463 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2464 for (uint32_t channels = 16; channels < 128; channels += 24) {
2465 DWConvMicrokernelTester()
2466 .cr(8)
2467 .kr(4)
2468 .channels(channels)
2469 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002470 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002471 }
2472 }
2473
2474 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
2475 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2476 for (uint32_t channels = 16; channels < 128; channels += 24) {
2477 DWConvMicrokernelTester()
2478 .cr(8)
2479 .kr(4)
2480 .channels(channels)
2481 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002482 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002483 }
2484 }
2485
2486 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_lt_8) {
2487 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2488 for (uint32_t channels = 1; channels < 8; channels++) {
2489 DWConvMicrokernelTester()
2490 .cr(8)
2491 .kr(4)
2492 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002493 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002494 }
2495 }
2496
2497 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8) {
2498 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2499 for (uint32_t channels = 9; channels < 16; channels++) {
2500 DWConvMicrokernelTester()
2501 .cr(8)
2502 .kr(4)
2503 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002504 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002505 }
2506 }
2507
2508 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
2509 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2510 for (uint32_t channels = 9; channels < 16; channels++) {
2511 DWConvMicrokernelTester()
2512 .cr(8)
2513 .kr(4)
2514 .channels(channels)
2515 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002516 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002517 }
2518 }
2519
2520 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
2521 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2522 for (uint32_t channels = 9; channels < 16; channels++) {
2523 DWConvMicrokernelTester()
2524 .cr(8)
2525 .kr(4)
2526 .channels(channels)
2527 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002528 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002529 }
2530 }
2531
2532 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel) {
2533 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2534 for (size_t channels = 1; channels <= 40; channels += 7) {
2535 DWConvMicrokernelTester()
2536 .cr(8)
2537 .kr(4)
2538 .channels(channels)
2539 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08002540 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002541 }
2542 }
2543
2544 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_step) {
2545 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2546 for (size_t channels = 1; channels <= 40; channels += 7) {
2547 for (size_t step = 2; step <= 4; step++) {
2548 DWConvMicrokernelTester()
2549 .cr(8)
2550 .kr(4)
2551 .channels(channels)
2552 .width(3)
2553 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08002554 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002555 }
2556 }
2557 }
2558
2559 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
2560 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2561 for (size_t channels = 1; channels <= 40; channels += 7) {
2562 DWConvMicrokernelTester()
2563 .cr(8)
2564 .kr(4)
2565 .channels(8)
2566 .width(5)
2567 .output_stride(43)
Marat Dukhan645af972022-01-09 22:50:27 -08002568 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002569 }
2570 }
2571
2572 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
2573 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2574 for (size_t channels = 1; channels <= 40; channels += 7) {
2575 DWConvMicrokernelTester()
2576 .cr(8)
2577 .kr(4)
2578 .channels(channels)
2579 .width(3)
2580 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002581 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002582 }
2583 }
2584
2585 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
2586 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2587 for (size_t channels = 1; channels <= 40; channels += 7) {
2588 DWConvMicrokernelTester()
2589 .cr(8)
2590 .kr(4)
2591 .channels(channels)
2592 .width(3)
2593 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002594 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002595 }
2596 }
2597
2598 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, input_offset) {
2599 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2600 for (uint32_t channels = 16; channels < 128; channels += 24) {
2601 DWConvMicrokernelTester()
2602 .cr(8)
2603 .kr(4)
2604 .channels(channels)
2605 .input_offset(176)
Marat Dukhan645af972022-01-09 22:50:27 -08002606 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002607 }
2608 }
2609
2610 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, zero) {
2611 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2612 for (uint32_t mz = 0; mz < 4; mz++) {
2613 for (uint32_t channels = 16; channels < 128; channels += 24) {
2614 DWConvMicrokernelTester()
2615 .cr(8)
2616 .kr(4)
2617 .channels(channels)
2618 .input_offset(176)
2619 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08002620 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002621 }
2622 }
2623 }
2624#endif // XNN_ARCH_ARM64
2625
2626
2627#if XNN_ARCH_ARM64
2628 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_eq_16) {
2629 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2630 DWConvMicrokernelTester()
2631 .cr(16)
2632 .kr(4)
2633 .channels(16)
Marat Dukhan645af972022-01-09 22:50:27 -08002634 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002635 }
2636
2637 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16) {
2638 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2639 for (uint32_t channels = 32; channels < 256; channels += 48) {
2640 DWConvMicrokernelTester()
2641 .cr(16)
2642 .kr(4)
2643 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002644 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002645 }
2646 }
2647
2648 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmin) {
2649 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2650 for (uint32_t channels = 32; channels < 256; channels += 48) {
2651 DWConvMicrokernelTester()
2652 .cr(16)
2653 .kr(4)
2654 .channels(channels)
2655 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002656 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002657 }
2658 }
2659
2660 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmax) {
2661 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2662 for (uint32_t channels = 32; channels < 256; channels += 48) {
2663 DWConvMicrokernelTester()
2664 .cr(16)
2665 .kr(4)
2666 .channels(channels)
2667 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002668 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002669 }
2670 }
2671
2672 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_lt_16) {
2673 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2674 for (uint32_t channels = 1; channels < 16; channels++) {
2675 DWConvMicrokernelTester()
2676 .cr(16)
2677 .kr(4)
2678 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002679 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002680 }
2681 }
2682
2683 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16) {
2684 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2685 for (uint32_t channels = 17; channels < 32; channels++) {
2686 DWConvMicrokernelTester()
2687 .cr(16)
2688 .kr(4)
2689 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002690 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002691 }
2692 }
2693
2694 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmin) {
2695 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2696 for (uint32_t channels = 17; channels < 32; channels++) {
2697 DWConvMicrokernelTester()
2698 .cr(16)
2699 .kr(4)
2700 .channels(channels)
2701 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002702 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002703 }
2704 }
2705
2706 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmax) {
2707 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2708 for (uint32_t channels = 17; channels < 32; channels++) {
2709 DWConvMicrokernelTester()
2710 .cr(16)
2711 .kr(4)
2712 .channels(channels)
2713 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002714 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002715 }
2716 }
2717
2718 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel) {
2719 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2720 for (size_t channels = 1; channels <= 80; channels += 15) {
2721 DWConvMicrokernelTester()
2722 .cr(16)
2723 .kr(4)
2724 .channels(channels)
2725 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08002726 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002727 }
2728 }
2729
2730 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_step) {
2731 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2732 for (size_t channels = 1; channels <= 80; channels += 15) {
2733 for (size_t step = 2; step <= 4; step++) {
2734 DWConvMicrokernelTester()
2735 .cr(16)
2736 .kr(4)
2737 .channels(channels)
2738 .width(3)
2739 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08002740 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002741 }
2742 }
2743 }
2744
2745 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_output_stride) {
2746 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2747 for (size_t channels = 1; channels <= 80; channels += 15) {
2748 DWConvMicrokernelTester()
2749 .cr(16)
2750 .kr(4)
2751 .channels(16)
2752 .width(5)
2753 .output_stride(83)
Marat Dukhan645af972022-01-09 22:50:27 -08002754 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002755 }
2756 }
2757
2758 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmin) {
2759 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2760 for (size_t channels = 1; channels <= 80; channels += 15) {
2761 DWConvMicrokernelTester()
2762 .cr(16)
2763 .kr(4)
2764 .channels(channels)
2765 .width(3)
2766 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002767 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002768 }
2769 }
2770
2771 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmax) {
2772 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2773 for (size_t channels = 1; channels <= 80; channels += 15) {
2774 DWConvMicrokernelTester()
2775 .cr(16)
2776 .kr(4)
2777 .channels(channels)
2778 .width(3)
2779 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002780 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002781 }
2782 }
2783
2784 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, input_offset) {
2785 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2786 for (uint32_t channels = 32; channels < 256; channels += 48) {
2787 DWConvMicrokernelTester()
2788 .cr(16)
2789 .kr(4)
2790 .channels(channels)
2791 .input_offset(304)
Marat Dukhan645af972022-01-09 22:50:27 -08002792 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002793 }
2794 }
2795
2796 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, zero) {
2797 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2798 for (uint32_t mz = 0; mz < 4; mz++) {
2799 for (uint32_t channels = 32; channels < 256; channels += 48) {
2800 DWConvMicrokernelTester()
2801 .cr(16)
2802 .kr(4)
2803 .channels(channels)
2804 .input_offset(304)
2805 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08002806 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002807 }
2808 }
2809 }
2810#endif // XNN_ARCH_ARM64
2811
2812
2813#if XNN_ARCH_ARM64
2814 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_eq_16) {
2815 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2816 DWConvMicrokernelTester()
2817 .cr(16)
2818 .kr(4)
2819 .channels(16)
Marat Dukhan645af972022-01-09 22:50:27 -08002820 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002821 }
2822
2823 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16) {
2824 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2825 for (uint32_t channels = 32; channels < 256; channels += 48) {
2826 DWConvMicrokernelTester()
2827 .cr(16)
2828 .kr(4)
2829 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002830 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002831 }
2832 }
2833
2834 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
2835 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2836 for (uint32_t channels = 32; channels < 256; channels += 48) {
2837 DWConvMicrokernelTester()
2838 .cr(16)
2839 .kr(4)
2840 .channels(channels)
2841 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002842 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002843 }
2844 }
2845
2846 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
2847 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2848 for (uint32_t channels = 32; channels < 256; channels += 48) {
2849 DWConvMicrokernelTester()
2850 .cr(16)
2851 .kr(4)
2852 .channels(channels)
2853 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002854 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002855 }
2856 }
2857
2858 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_lt_16) {
2859 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2860 for (uint32_t channels = 1; channels < 16; channels++) {
2861 DWConvMicrokernelTester()
2862 .cr(16)
2863 .kr(4)
2864 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002865 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002866 }
2867 }
2868
2869 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16) {
2870 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2871 for (uint32_t channels = 17; channels < 32; channels++) {
2872 DWConvMicrokernelTester()
2873 .cr(16)
2874 .kr(4)
2875 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08002876 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002877 }
2878 }
2879
2880 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
2881 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2882 for (uint32_t channels = 17; channels < 32; channels++) {
2883 DWConvMicrokernelTester()
2884 .cr(16)
2885 .kr(4)
2886 .channels(channels)
2887 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002888 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002889 }
2890 }
2891
2892 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
2893 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2894 for (uint32_t channels = 17; channels < 32; channels++) {
2895 DWConvMicrokernelTester()
2896 .cr(16)
2897 .kr(4)
2898 .channels(channels)
2899 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002900 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002901 }
2902 }
2903
2904 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel) {
2905 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2906 for (size_t channels = 1; channels <= 80; channels += 15) {
2907 DWConvMicrokernelTester()
2908 .cr(16)
2909 .kr(4)
2910 .channels(channels)
2911 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08002912 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002913 }
2914 }
2915
2916 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_step) {
2917 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2918 for (size_t channels = 1; channels <= 80; channels += 15) {
2919 for (size_t step = 2; step <= 4; step++) {
2920 DWConvMicrokernelTester()
2921 .cr(16)
2922 .kr(4)
2923 .channels(channels)
2924 .width(3)
2925 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08002926 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002927 }
2928 }
2929 }
2930
2931 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
2932 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2933 for (size_t channels = 1; channels <= 80; channels += 15) {
2934 DWConvMicrokernelTester()
2935 .cr(16)
2936 .kr(4)
2937 .channels(16)
2938 .width(5)
2939 .output_stride(83)
Marat Dukhan645af972022-01-09 22:50:27 -08002940 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002941 }
2942 }
2943
2944 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
2945 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2946 for (size_t channels = 1; channels <= 80; channels += 15) {
2947 DWConvMicrokernelTester()
2948 .cr(16)
2949 .kr(4)
2950 .channels(channels)
2951 .width(3)
2952 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002953 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002954 }
2955 }
2956
2957 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
2958 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2959 for (size_t channels = 1; channels <= 80; channels += 15) {
2960 DWConvMicrokernelTester()
2961 .cr(16)
2962 .kr(4)
2963 .channels(channels)
2964 .width(3)
2965 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08002966 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002967 }
2968 }
2969
2970 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, input_offset) {
2971 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2972 for (uint32_t channels = 32; channels < 256; channels += 48) {
2973 DWConvMicrokernelTester()
2974 .cr(16)
2975 .kr(4)
2976 .channels(channels)
2977 .input_offset(304)
Marat Dukhan645af972022-01-09 22:50:27 -08002978 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002979 }
2980 }
2981
2982 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, zero) {
2983 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2984 for (uint32_t mz = 0; mz < 4; mz++) {
2985 for (uint32_t channels = 32; channels < 256; channels += 48) {
2986 DWConvMicrokernelTester()
2987 .cr(16)
2988 .kr(4)
2989 .channels(channels)
2990 .input_offset(304)
2991 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08002992 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchard5a599a62020-06-04 20:12:44 -07002993 }
2994 }
2995 }
2996#endif // XNN_ARCH_ARM64
Frank Barchardc9f9d672021-10-18 12:51:59 -07002997
2998
2999#if XNN_ARCH_ARM64
3000 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_eq_32) {
3001 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3002 DWConvMicrokernelTester()
3003 .cr(32)
3004 .kr(4)
3005 .channels(32)
Marat Dukhan645af972022-01-09 22:50:27 -08003006 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003007 }
3008
3009 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_div_32) {
3010 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3011 for (uint32_t channels = 64; channels < 512; channels += 96) {
3012 DWConvMicrokernelTester()
3013 .cr(32)
3014 .kr(4)
3015 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08003016 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003017 }
3018 }
3019
3020 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_div_32_with_qmin) {
3021 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3022 for (uint32_t channels = 64; channels < 512; channels += 96) {
3023 DWConvMicrokernelTester()
3024 .cr(32)
3025 .kr(4)
3026 .channels(channels)
3027 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003028 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003029 }
3030 }
3031
3032 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_div_32_with_qmax) {
3033 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3034 for (uint32_t channels = 64; channels < 512; channels += 96) {
3035 DWConvMicrokernelTester()
3036 .cr(32)
3037 .kr(4)
3038 .channels(channels)
3039 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003040 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003041 }
3042 }
3043
3044 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_lt_32) {
3045 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3046 for (uint32_t channels = 1; channels < 32; channels++) {
3047 DWConvMicrokernelTester()
3048 .cr(32)
3049 .kr(4)
3050 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08003051 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003052 }
3053 }
3054
3055 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_gt_32) {
3056 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3057 for (uint32_t channels = 33; channels < 64; channels++) {
3058 DWConvMicrokernelTester()
3059 .cr(32)
3060 .kr(4)
3061 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08003062 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003063 }
3064 }
3065
3066 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_gt_32_with_qmin) {
3067 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3068 for (uint32_t channels = 33; channels < 64; channels++) {
3069 DWConvMicrokernelTester()
3070 .cr(32)
3071 .kr(4)
3072 .channels(channels)
3073 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003074 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003075 }
3076 }
3077
3078 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_gt_32_with_qmax) {
3079 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3080 for (uint32_t channels = 33; channels < 64; channels++) {
3081 DWConvMicrokernelTester()
3082 .cr(32)
3083 .kr(4)
3084 .channels(channels)
3085 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003086 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003087 }
3088 }
3089
3090 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel) {
3091 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3092 for (size_t channels = 1; channels <= 160; channels += 31) {
3093 DWConvMicrokernelTester()
3094 .cr(32)
3095 .kr(4)
3096 .channels(channels)
3097 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08003098 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003099 }
3100 }
3101
3102 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_step) {
3103 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3104 for (size_t channels = 1; channels <= 160; channels += 31) {
3105 for (size_t step = 2; step <= 4; step++) {
3106 DWConvMicrokernelTester()
3107 .cr(32)
3108 .kr(4)
3109 .channels(channels)
3110 .width(3)
3111 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08003112 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003113 }
3114 }
3115 }
3116
3117 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_output_stride) {
3118 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3119 for (size_t channels = 1; channels <= 160; channels += 31) {
3120 DWConvMicrokernelTester()
3121 .cr(32)
3122 .kr(4)
3123 .channels(32)
3124 .width(5)
3125 .output_stride(163)
Marat Dukhan645af972022-01-09 22:50:27 -08003126 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003127 }
3128 }
3129
3130 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_qmin) {
3131 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3132 for (size_t channels = 1; channels <= 160; channels += 31) {
3133 DWConvMicrokernelTester()
3134 .cr(32)
3135 .kr(4)
3136 .channels(channels)
3137 .width(3)
3138 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003139 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003140 }
3141 }
3142
3143 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_qmax) {
3144 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3145 for (size_t channels = 1; channels <= 160; channels += 31) {
3146 DWConvMicrokernelTester()
3147 .cr(32)
3148 .kr(4)
3149 .channels(channels)
3150 .width(3)
3151 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003152 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003153 }
3154 }
3155
3156 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, input_offset) {
3157 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3158 for (uint32_t channels = 64; channels < 512; channels += 96) {
3159 DWConvMicrokernelTester()
3160 .cr(32)
3161 .kr(4)
3162 .channels(channels)
3163 .input_offset(592)
Marat Dukhan645af972022-01-09 22:50:27 -08003164 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003165 }
3166 }
3167
3168 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, zero) {
3169 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3170 for (uint32_t mz = 0; mz < 4; mz++) {
3171 for (uint32_t channels = 64; channels < 512; channels += 96) {
3172 DWConvMicrokernelTester()
3173 .cr(32)
3174 .kr(4)
3175 .channels(channels)
3176 .input_offset(592)
3177 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08003178 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003179 }
3180 }
3181 }
3182#endif // XNN_ARCH_ARM64
3183
3184
3185#if XNN_ARCH_ARM64
3186 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_eq_32) {
3187 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3188 DWConvMicrokernelTester()
3189 .cr(32)
3190 .kr(4)
3191 .channels(32)
Marat Dukhan645af972022-01-09 22:50:27 -08003192 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003193 }
3194
3195 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_div_32) {
3196 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3197 for (uint32_t channels = 64; channels < 512; channels += 96) {
3198 DWConvMicrokernelTester()
3199 .cr(32)
3200 .kr(4)
3201 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08003202 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003203 }
3204 }
3205
3206 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_div_32_with_qmin) {
3207 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3208 for (uint32_t channels = 64; channels < 512; channels += 96) {
3209 DWConvMicrokernelTester()
3210 .cr(32)
3211 .kr(4)
3212 .channels(channels)
3213 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003214 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003215 }
3216 }
3217
3218 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_div_32_with_qmax) {
3219 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3220 for (uint32_t channels = 64; channels < 512; channels += 96) {
3221 DWConvMicrokernelTester()
3222 .cr(32)
3223 .kr(4)
3224 .channels(channels)
3225 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003226 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003227 }
3228 }
3229
3230 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_lt_32) {
3231 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3232 for (uint32_t channels = 1; channels < 32; channels++) {
3233 DWConvMicrokernelTester()
3234 .cr(32)
3235 .kr(4)
3236 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08003237 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003238 }
3239 }
3240
3241 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_gt_32) {
3242 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3243 for (uint32_t channels = 33; channels < 64; channels++) {
3244 DWConvMicrokernelTester()
3245 .cr(32)
3246 .kr(4)
3247 .channels(channels)
Marat Dukhan645af972022-01-09 22:50:27 -08003248 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003249 }
3250 }
3251
3252 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) {
3253 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3254 for (uint32_t channels = 33; channels < 64; channels++) {
3255 DWConvMicrokernelTester()
3256 .cr(32)
3257 .kr(4)
3258 .channels(channels)
3259 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003260 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003261 }
3262 }
3263
3264 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) {
3265 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3266 for (uint32_t channels = 33; channels < 64; channels++) {
3267 DWConvMicrokernelTester()
3268 .cr(32)
3269 .kr(4)
3270 .channels(channels)
3271 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003272 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003273 }
3274 }
3275
3276 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel) {
3277 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3278 for (size_t channels = 1; channels <= 160; channels += 31) {
3279 DWConvMicrokernelTester()
3280 .cr(32)
3281 .kr(4)
3282 .channels(channels)
3283 .width(3)
Marat Dukhan645af972022-01-09 22:50:27 -08003284 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003285 }
3286 }
3287
3288 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_step) {
3289 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3290 for (size_t channels = 1; channels <= 160; channels += 31) {
3291 for (size_t step = 2; step <= 4; step++) {
3292 DWConvMicrokernelTester()
3293 .cr(32)
3294 .kr(4)
3295 .channels(channels)
3296 .width(3)
3297 .step(step)
Marat Dukhan645af972022-01-09 22:50:27 -08003298 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003299 }
3300 }
3301 }
3302
3303 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
3304 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3305 for (size_t channels = 1; channels <= 160; channels += 31) {
3306 DWConvMicrokernelTester()
3307 .cr(32)
3308 .kr(4)
3309 .channels(32)
3310 .width(5)
3311 .output_stride(163)
Marat Dukhan645af972022-01-09 22:50:27 -08003312 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003313 }
3314 }
3315
3316 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
3317 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3318 for (size_t channels = 1; channels <= 160; channels += 31) {
3319 DWConvMicrokernelTester()
3320 .cr(32)
3321 .kr(4)
3322 .channels(channels)
3323 .width(3)
3324 .qmin(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003325 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003326 }
3327 }
3328
3329 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
3330 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3331 for (size_t channels = 1; channels <= 160; channels += 31) {
3332 DWConvMicrokernelTester()
3333 .cr(32)
3334 .kr(4)
3335 .channels(channels)
3336 .width(3)
3337 .qmax(128)
Marat Dukhan645af972022-01-09 22:50:27 -08003338 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003339 }
3340 }
3341
3342 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, input_offset) {
3343 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3344 for (uint32_t channels = 64; channels < 512; channels += 96) {
3345 DWConvMicrokernelTester()
3346 .cr(32)
3347 .kr(4)
3348 .channels(channels)
3349 .input_offset(592)
Marat Dukhan645af972022-01-09 22:50:27 -08003350 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003351 }
3352 }
3353
3354 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, zero) {
3355 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3356 for (uint32_t mz = 0; mz < 4; mz++) {
3357 for (uint32_t channels = 64; channels < 512; channels += 96) {
3358 DWConvMicrokernelTester()
3359 .cr(32)
3360 .kr(4)
3361 .channels(channels)
3362 .input_offset(592)
3363 .zero_index(mz)
Marat Dukhan645af972022-01-09 22:50:27 -08003364 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
Frank Barchardc9f9d672021-10-18 12:51:59 -07003365 }
3366 }
3367 }
3368#endif // XNN_ARCH_ARM64
Marat Dukhan66eb5082022-01-19 13:25:38 -08003369
3370
3371#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3372 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_eq_8) {
3373 TEST_REQUIRES_X86_FMA3;
3374 DWConvMicrokernelTester()
3375 .cr(8)
3376 .kr(25)
3377 .channels(8)
3378 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3379 }
3380
3381 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_div_8) {
3382 TEST_REQUIRES_X86_FMA3;
3383 for (uint32_t channels = 16; channels < 128; channels += 24) {
3384 DWConvMicrokernelTester()
3385 .cr(8)
3386 .kr(25)
3387 .channels(channels)
3388 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3389 }
3390 }
3391
3392 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_div_8_with_qmin) {
3393 TEST_REQUIRES_X86_FMA3;
3394 for (uint32_t channels = 16; channels < 128; channels += 24) {
3395 DWConvMicrokernelTester()
3396 .cr(8)
3397 .kr(25)
3398 .channels(channels)
3399 .qmin(128)
3400 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3401 }
3402 }
3403
3404 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_div_8_with_qmax) {
3405 TEST_REQUIRES_X86_FMA3;
3406 for (uint32_t channels = 16; channels < 128; channels += 24) {
3407 DWConvMicrokernelTester()
3408 .cr(8)
3409 .kr(25)
3410 .channels(channels)
3411 .qmax(128)
3412 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3413 }
3414 }
3415
3416 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_lt_8) {
3417 TEST_REQUIRES_X86_FMA3;
3418 for (uint32_t channels = 1; channels < 8; channels++) {
3419 DWConvMicrokernelTester()
3420 .cr(8)
3421 .kr(25)
3422 .channels(channels)
3423 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3424 }
3425 }
3426
3427 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_gt_8) {
3428 TEST_REQUIRES_X86_FMA3;
3429 for (uint32_t channels = 9; channels < 16; channels++) {
3430 DWConvMicrokernelTester()
3431 .cr(8)
3432 .kr(25)
3433 .channels(channels)
3434 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3435 }
3436 }
3437
3438 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_gt_8_with_qmin) {
3439 TEST_REQUIRES_X86_FMA3;
3440 for (uint32_t channels = 9; channels < 16; channels++) {
3441 DWConvMicrokernelTester()
3442 .cr(8)
3443 .kr(25)
3444 .channels(channels)
3445 .qmin(128)
3446 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3447 }
3448 }
3449
3450 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_gt_8_with_qmax) {
3451 TEST_REQUIRES_X86_FMA3;
3452 for (uint32_t channels = 9; channels < 16; channels++) {
3453 DWConvMicrokernelTester()
3454 .cr(8)
3455 .kr(25)
3456 .channels(channels)
3457 .qmax(128)
3458 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3459 }
3460 }
3461
3462 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel) {
3463 TEST_REQUIRES_X86_FMA3;
3464 for (size_t channels = 1; channels <= 40; channels += 7) {
3465 DWConvMicrokernelTester()
3466 .cr(8)
3467 .kr(25)
3468 .channels(channels)
3469 .width(3)
3470 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3471 }
3472 }
3473
3474 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_step) {
3475 TEST_REQUIRES_X86_FMA3;
3476 for (size_t channels = 1; channels <= 40; channels += 7) {
3477 for (size_t step = 2; step <= 25; step++) {
3478 DWConvMicrokernelTester()
3479 .cr(8)
3480 .kr(25)
3481 .channels(channels)
3482 .width(3)
3483 .step(step)
3484 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3485 }
3486 }
3487 }
3488
3489 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_output_stride) {
3490 TEST_REQUIRES_X86_FMA3;
3491 for (size_t channels = 1; channels <= 40; channels += 7) {
3492 DWConvMicrokernelTester()
3493 .cr(8)
3494 .kr(25)
3495 .channels(8)
3496 .width(5)
3497 .output_stride(43)
3498 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3499 }
3500 }
3501
3502 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_qmin) {
3503 TEST_REQUIRES_X86_FMA3;
3504 for (size_t channels = 1; channels <= 40; channels += 7) {
3505 DWConvMicrokernelTester()
3506 .cr(8)
3507 .kr(25)
3508 .channels(channels)
3509 .width(3)
3510 .qmin(128)
3511 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3512 }
3513 }
3514
3515 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_qmax) {
3516 TEST_REQUIRES_X86_FMA3;
3517 for (size_t channels = 1; channels <= 40; channels += 7) {
3518 DWConvMicrokernelTester()
3519 .cr(8)
3520 .kr(25)
3521 .channels(channels)
3522 .width(3)
3523 .qmax(128)
3524 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3525 }
3526 }
3527
3528 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, input_offset) {
3529 TEST_REQUIRES_X86_FMA3;
3530 for (uint32_t channels = 16; channels < 128; channels += 24) {
3531 DWConvMicrokernelTester()
3532 .cr(8)
3533 .kr(25)
3534 .channels(channels)
3535 .input_offset(176)
3536 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3537 }
3538 }
3539
3540 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, zero) {
3541 TEST_REQUIRES_X86_FMA3;
3542 for (uint32_t mz = 0; mz < 25; mz++) {
3543 for (uint32_t channels = 16; channels < 128; channels += 24) {
3544 DWConvMicrokernelTester()
3545 .cr(8)
3546 .kr(25)
3547 .channels(channels)
3548 .input_offset(176)
3549 .zero_index(mz)
3550 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
3551 }
3552 }
3553 }
3554#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3555
3556
3557#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3558 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_eq_8) {
3559 TEST_REQUIRES_X86_FMA3;
3560 DWConvMicrokernelTester()
3561 .cr(8)
3562 .kr(25)
3563 .channels(8)
3564 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3565 }
3566
3567 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_div_8) {
3568 TEST_REQUIRES_X86_FMA3;
3569 for (uint32_t channels = 16; channels < 128; channels += 24) {
3570 DWConvMicrokernelTester()
3571 .cr(8)
3572 .kr(25)
3573 .channels(channels)
3574 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3575 }
3576 }
3577
3578 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_div_8_with_qmin) {
3579 TEST_REQUIRES_X86_FMA3;
3580 for (uint32_t channels = 16; channels < 128; channels += 24) {
3581 DWConvMicrokernelTester()
3582 .cr(8)
3583 .kr(25)
3584 .channels(channels)
3585 .qmin(128)
3586 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3587 }
3588 }
3589
3590 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_div_8_with_qmax) {
3591 TEST_REQUIRES_X86_FMA3;
3592 for (uint32_t channels = 16; channels < 128; channels += 24) {
3593 DWConvMicrokernelTester()
3594 .cr(8)
3595 .kr(25)
3596 .channels(channels)
3597 .qmax(128)
3598 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3599 }
3600 }
3601
3602 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_lt_8) {
3603 TEST_REQUIRES_X86_FMA3;
3604 for (uint32_t channels = 1; channels < 8; channels++) {
3605 DWConvMicrokernelTester()
3606 .cr(8)
3607 .kr(25)
3608 .channels(channels)
3609 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3610 }
3611 }
3612
3613 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_gt_8) {
3614 TEST_REQUIRES_X86_FMA3;
3615 for (uint32_t channels = 9; channels < 16; channels++) {
3616 DWConvMicrokernelTester()
3617 .cr(8)
3618 .kr(25)
3619 .channels(channels)
3620 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3621 }
3622 }
3623
3624 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_gt_8_with_qmin) {
3625 TEST_REQUIRES_X86_FMA3;
3626 for (uint32_t channels = 9; channels < 16; channels++) {
3627 DWConvMicrokernelTester()
3628 .cr(8)
3629 .kr(25)
3630 .channels(channels)
3631 .qmin(128)
3632 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3633 }
3634 }
3635
3636 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_gt_8_with_qmax) {
3637 TEST_REQUIRES_X86_FMA3;
3638 for (uint32_t channels = 9; channels < 16; channels++) {
3639 DWConvMicrokernelTester()
3640 .cr(8)
3641 .kr(25)
3642 .channels(channels)
3643 .qmax(128)
3644 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3645 }
3646 }
3647
3648 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel) {
3649 TEST_REQUIRES_X86_FMA3;
3650 for (size_t channels = 1; channels <= 40; channels += 7) {
3651 DWConvMicrokernelTester()
3652 .cr(8)
3653 .kr(25)
3654 .channels(channels)
3655 .width(3)
3656 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3657 }
3658 }
3659
3660 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_step) {
3661 TEST_REQUIRES_X86_FMA3;
3662 for (size_t channels = 1; channels <= 40; channels += 7) {
3663 for (size_t step = 2; step <= 25; step++) {
3664 DWConvMicrokernelTester()
3665 .cr(8)
3666 .kr(25)
3667 .channels(channels)
3668 .width(3)
3669 .step(step)
3670 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3671 }
3672 }
3673 }
3674
3675 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_output_stride) {
3676 TEST_REQUIRES_X86_FMA3;
3677 for (size_t channels = 1; channels <= 40; channels += 7) {
3678 DWConvMicrokernelTester()
3679 .cr(8)
3680 .kr(25)
3681 .channels(8)
3682 .width(5)
3683 .output_stride(43)
3684 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3685 }
3686 }
3687
3688 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_qmin) {
3689 TEST_REQUIRES_X86_FMA3;
3690 for (size_t channels = 1; channels <= 40; channels += 7) {
3691 DWConvMicrokernelTester()
3692 .cr(8)
3693 .kr(25)
3694 .channels(channels)
3695 .width(3)
3696 .qmin(128)
3697 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3698 }
3699 }
3700
3701 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_qmax) {
3702 TEST_REQUIRES_X86_FMA3;
3703 for (size_t channels = 1; channels <= 40; channels += 7) {
3704 DWConvMicrokernelTester()
3705 .cr(8)
3706 .kr(25)
3707 .channels(channels)
3708 .width(3)
3709 .qmax(128)
3710 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3711 }
3712 }
3713
3714 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, input_offset) {
3715 TEST_REQUIRES_X86_FMA3;
3716 for (uint32_t channels = 16; channels < 128; channels += 24) {
3717 DWConvMicrokernelTester()
3718 .cr(8)
3719 .kr(25)
3720 .channels(channels)
3721 .input_offset(176)
3722 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3723 }
3724 }
3725
3726 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, zero) {
3727 TEST_REQUIRES_X86_FMA3;
3728 for (uint32_t mz = 0; mz < 25; mz++) {
3729 for (uint32_t channels = 16; channels < 128; channels += 24) {
3730 DWConvMicrokernelTester()
3731 .cr(8)
3732 .kr(25)
3733 .channels(channels)
3734 .input_offset(176)
3735 .zero_index(mz)
3736 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3737 }
3738 }
3739 }
3740#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3741
3742
3743#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3744 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_eq_16) {
3745 TEST_REQUIRES_X86_FMA3;
3746 DWConvMicrokernelTester()
3747 .cr(16)
3748 .kr(25)
3749 .channels(16)
3750 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3751 }
3752
3753 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_div_16) {
3754 TEST_REQUIRES_X86_FMA3;
3755 for (uint32_t channels = 32; channels < 256; channels += 48) {
3756 DWConvMicrokernelTester()
3757 .cr(16)
3758 .kr(25)
3759 .channels(channels)
3760 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3761 }
3762 }
3763
3764 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_div_16_with_qmin) {
3765 TEST_REQUIRES_X86_FMA3;
3766 for (uint32_t channels = 32; channels < 256; channels += 48) {
3767 DWConvMicrokernelTester()
3768 .cr(16)
3769 .kr(25)
3770 .channels(channels)
3771 .qmin(128)
3772 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3773 }
3774 }
3775
3776 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_div_16_with_qmax) {
3777 TEST_REQUIRES_X86_FMA3;
3778 for (uint32_t channels = 32; channels < 256; channels += 48) {
3779 DWConvMicrokernelTester()
3780 .cr(16)
3781 .kr(25)
3782 .channels(channels)
3783 .qmax(128)
3784 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3785 }
3786 }
3787
3788 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_lt_16) {
3789 TEST_REQUIRES_X86_FMA3;
3790 for (uint32_t channels = 1; channels < 16; channels++) {
3791 DWConvMicrokernelTester()
3792 .cr(16)
3793 .kr(25)
3794 .channels(channels)
3795 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3796 }
3797 }
3798
3799 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_gt_16) {
3800 TEST_REQUIRES_X86_FMA3;
3801 for (uint32_t channels = 17; channels < 32; channels++) {
3802 DWConvMicrokernelTester()
3803 .cr(16)
3804 .kr(25)
3805 .channels(channels)
3806 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3807 }
3808 }
3809
3810 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_gt_16_with_qmin) {
3811 TEST_REQUIRES_X86_FMA3;
3812 for (uint32_t channels = 17; channels < 32; channels++) {
3813 DWConvMicrokernelTester()
3814 .cr(16)
3815 .kr(25)
3816 .channels(channels)
3817 .qmin(128)
3818 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3819 }
3820 }
3821
3822 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_gt_16_with_qmax) {
3823 TEST_REQUIRES_X86_FMA3;
3824 for (uint32_t channels = 17; channels < 32; channels++) {
3825 DWConvMicrokernelTester()
3826 .cr(16)
3827 .kr(25)
3828 .channels(channels)
3829 .qmax(128)
3830 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3831 }
3832 }
3833
3834 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel) {
3835 TEST_REQUIRES_X86_FMA3;
3836 for (size_t channels = 1; channels <= 80; channels += 15) {
3837 DWConvMicrokernelTester()
3838 .cr(16)
3839 .kr(25)
3840 .channels(channels)
3841 .width(3)
3842 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3843 }
3844 }
3845
3846 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_step) {
3847 TEST_REQUIRES_X86_FMA3;
3848 for (size_t channels = 1; channels <= 80; channels += 15) {
3849 for (size_t step = 2; step <= 25; step++) {
3850 DWConvMicrokernelTester()
3851 .cr(16)
3852 .kr(25)
3853 .channels(channels)
3854 .width(3)
3855 .step(step)
3856 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3857 }
3858 }
3859 }
3860
3861 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_output_stride) {
3862 TEST_REQUIRES_X86_FMA3;
3863 for (size_t channels = 1; channels <= 80; channels += 15) {
3864 DWConvMicrokernelTester()
3865 .cr(16)
3866 .kr(25)
3867 .channels(16)
3868 .width(5)
3869 .output_stride(83)
3870 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3871 }
3872 }
3873
3874 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_qmin) {
3875 TEST_REQUIRES_X86_FMA3;
3876 for (size_t channels = 1; channels <= 80; channels += 15) {
3877 DWConvMicrokernelTester()
3878 .cr(16)
3879 .kr(25)
3880 .channels(channels)
3881 .width(3)
3882 .qmin(128)
3883 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3884 }
3885 }
3886
3887 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_qmax) {
3888 TEST_REQUIRES_X86_FMA3;
3889 for (size_t channels = 1; channels <= 80; channels += 15) {
3890 DWConvMicrokernelTester()
3891 .cr(16)
3892 .kr(25)
3893 .channels(channels)
3894 .width(3)
3895 .qmax(128)
3896 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3897 }
3898 }
3899
3900 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, input_offset) {
3901 TEST_REQUIRES_X86_FMA3;
3902 for (uint32_t channels = 32; channels < 256; channels += 48) {
3903 DWConvMicrokernelTester()
3904 .cr(16)
3905 .kr(25)
3906 .channels(channels)
3907 .input_offset(304)
3908 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3909 }
3910 }
3911
3912 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, zero) {
3913 TEST_REQUIRES_X86_FMA3;
3914 for (uint32_t mz = 0; mz < 25; mz++) {
3915 for (uint32_t channels = 32; channels < 256; channels += 48) {
3916 DWConvMicrokernelTester()
3917 .cr(16)
3918 .kr(25)
3919 .channels(channels)
3920 .input_offset(304)
3921 .zero_index(mz)
3922 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
3923 }
3924 }
3925 }
3926#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3927
3928
3929#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3930 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_eq_16) {
3931 TEST_REQUIRES_X86_FMA3;
3932 DWConvMicrokernelTester()
3933 .cr(16)
3934 .kr(25)
3935 .channels(16)
3936 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3937 }
3938
3939 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_div_16) {
3940 TEST_REQUIRES_X86_FMA3;
3941 for (uint32_t channels = 32; channels < 256; channels += 48) {
3942 DWConvMicrokernelTester()
3943 .cr(16)
3944 .kr(25)
3945 .channels(channels)
3946 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3947 }
3948 }
3949
3950 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_div_16_with_qmin) {
3951 TEST_REQUIRES_X86_FMA3;
3952 for (uint32_t channels = 32; channels < 256; channels += 48) {
3953 DWConvMicrokernelTester()
3954 .cr(16)
3955 .kr(25)
3956 .channels(channels)
3957 .qmin(128)
3958 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3959 }
3960 }
3961
3962 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_div_16_with_qmax) {
3963 TEST_REQUIRES_X86_FMA3;
3964 for (uint32_t channels = 32; channels < 256; channels += 48) {
3965 DWConvMicrokernelTester()
3966 .cr(16)
3967 .kr(25)
3968 .channels(channels)
3969 .qmax(128)
3970 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3971 }
3972 }
3973
3974 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_lt_16) {
3975 TEST_REQUIRES_X86_FMA3;
3976 for (uint32_t channels = 1; channels < 16; channels++) {
3977 DWConvMicrokernelTester()
3978 .cr(16)
3979 .kr(25)
3980 .channels(channels)
3981 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3982 }
3983 }
3984
3985 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_gt_16) {
3986 TEST_REQUIRES_X86_FMA3;
3987 for (uint32_t channels = 17; channels < 32; channels++) {
3988 DWConvMicrokernelTester()
3989 .cr(16)
3990 .kr(25)
3991 .channels(channels)
3992 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
3993 }
3994 }
3995
3996 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_gt_16_with_qmin) {
3997 TEST_REQUIRES_X86_FMA3;
3998 for (uint32_t channels = 17; channels < 32; channels++) {
3999 DWConvMicrokernelTester()
4000 .cr(16)
4001 .kr(25)
4002 .channels(channels)
4003 .qmin(128)
4004 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4005 }
4006 }
4007
4008 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_gt_16_with_qmax) {
4009 TEST_REQUIRES_X86_FMA3;
4010 for (uint32_t channels = 17; channels < 32; channels++) {
4011 DWConvMicrokernelTester()
4012 .cr(16)
4013 .kr(25)
4014 .channels(channels)
4015 .qmax(128)
4016 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4017 }
4018 }
4019
4020 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel) {
4021 TEST_REQUIRES_X86_FMA3;
4022 for (size_t channels = 1; channels <= 80; channels += 15) {
4023 DWConvMicrokernelTester()
4024 .cr(16)
4025 .kr(25)
4026 .channels(channels)
4027 .width(3)
4028 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4029 }
4030 }
4031
4032 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_step) {
4033 TEST_REQUIRES_X86_FMA3;
4034 for (size_t channels = 1; channels <= 80; channels += 15) {
4035 for (size_t step = 2; step <= 25; step++) {
4036 DWConvMicrokernelTester()
4037 .cr(16)
4038 .kr(25)
4039 .channels(channels)
4040 .width(3)
4041 .step(step)
4042 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4043 }
4044 }
4045 }
4046
4047 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_output_stride) {
4048 TEST_REQUIRES_X86_FMA3;
4049 for (size_t channels = 1; channels <= 80; channels += 15) {
4050 DWConvMicrokernelTester()
4051 .cr(16)
4052 .kr(25)
4053 .channels(16)
4054 .width(5)
4055 .output_stride(83)
4056 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4057 }
4058 }
4059
4060 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_qmin) {
4061 TEST_REQUIRES_X86_FMA3;
4062 for (size_t channels = 1; channels <= 80; channels += 15) {
4063 DWConvMicrokernelTester()
4064 .cr(16)
4065 .kr(25)
4066 .channels(channels)
4067 .width(3)
4068 .qmin(128)
4069 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4070 }
4071 }
4072
4073 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_qmax) {
4074 TEST_REQUIRES_X86_FMA3;
4075 for (size_t channels = 1; channels <= 80; channels += 15) {
4076 DWConvMicrokernelTester()
4077 .cr(16)
4078 .kr(25)
4079 .channels(channels)
4080 .width(3)
4081 .qmax(128)
4082 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4083 }
4084 }
4085
4086 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, input_offset) {
4087 TEST_REQUIRES_X86_FMA3;
4088 for (uint32_t channels = 32; channels < 256; channels += 48) {
4089 DWConvMicrokernelTester()
4090 .cr(16)
4091 .kr(25)
4092 .channels(channels)
4093 .input_offset(304)
4094 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4095 }
4096 }
4097
4098 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, zero) {
4099 TEST_REQUIRES_X86_FMA3;
4100 for (uint32_t mz = 0; mz < 25; mz++) {
4101 for (uint32_t channels = 32; channels < 256; channels += 48) {
4102 DWConvMicrokernelTester()
4103 .cr(16)
4104 .kr(25)
4105 .channels(channels)
4106 .input_offset(304)
4107 .zero_index(mz)
4108 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4109 }
4110 }
4111 }
4112#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4113
4114
4115#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4116 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_eq_32) {
4117 TEST_REQUIRES_X86_FMA3;
4118 DWConvMicrokernelTester()
4119 .cr(32)
4120 .kr(25)
4121 .channels(32)
4122 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4123 }
4124
4125 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_div_32) {
4126 TEST_REQUIRES_X86_FMA3;
4127 for (uint32_t channels = 64; channels < 512; channels += 96) {
4128 DWConvMicrokernelTester()
4129 .cr(32)
4130 .kr(25)
4131 .channels(channels)
4132 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4133 }
4134 }
4135
4136 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_div_32_with_qmin) {
4137 TEST_REQUIRES_X86_FMA3;
4138 for (uint32_t channels = 64; channels < 512; channels += 96) {
4139 DWConvMicrokernelTester()
4140 .cr(32)
4141 .kr(25)
4142 .channels(channels)
4143 .qmin(128)
4144 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4145 }
4146 }
4147
4148 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_div_32_with_qmax) {
4149 TEST_REQUIRES_X86_FMA3;
4150 for (uint32_t channels = 64; channels < 512; channels += 96) {
4151 DWConvMicrokernelTester()
4152 .cr(32)
4153 .kr(25)
4154 .channels(channels)
4155 .qmax(128)
4156 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4157 }
4158 }
4159
4160 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_lt_32) {
4161 TEST_REQUIRES_X86_FMA3;
4162 for (uint32_t channels = 1; channels < 32; channels++) {
4163 DWConvMicrokernelTester()
4164 .cr(32)
4165 .kr(25)
4166 .channels(channels)
4167 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4168 }
4169 }
4170
4171 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_gt_32) {
4172 TEST_REQUIRES_X86_FMA3;
4173 for (uint32_t channels = 33; channels < 64; channels++) {
4174 DWConvMicrokernelTester()
4175 .cr(32)
4176 .kr(25)
4177 .channels(channels)
4178 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4179 }
4180 }
4181
4182 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_gt_32_with_qmin) {
4183 TEST_REQUIRES_X86_FMA3;
4184 for (uint32_t channels = 33; channels < 64; channels++) {
4185 DWConvMicrokernelTester()
4186 .cr(32)
4187 .kr(25)
4188 .channels(channels)
4189 .qmin(128)
4190 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4191 }
4192 }
4193
4194 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_gt_32_with_qmax) {
4195 TEST_REQUIRES_X86_FMA3;
4196 for (uint32_t channels = 33; channels < 64; channels++) {
4197 DWConvMicrokernelTester()
4198 .cr(32)
4199 .kr(25)
4200 .channels(channels)
4201 .qmax(128)
4202 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4203 }
4204 }
4205
4206 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel) {
4207 TEST_REQUIRES_X86_FMA3;
4208 for (size_t channels = 1; channels <= 160; channels += 31) {
4209 DWConvMicrokernelTester()
4210 .cr(32)
4211 .kr(25)
4212 .channels(channels)
4213 .width(3)
4214 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4215 }
4216 }
4217
4218 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_step) {
4219 TEST_REQUIRES_X86_FMA3;
4220 for (size_t channels = 1; channels <= 160; channels += 31) {
4221 for (size_t step = 2; step <= 25; step++) {
4222 DWConvMicrokernelTester()
4223 .cr(32)
4224 .kr(25)
4225 .channels(channels)
4226 .width(3)
4227 .step(step)
4228 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4229 }
4230 }
4231 }
4232
4233 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_output_stride) {
4234 TEST_REQUIRES_X86_FMA3;
4235 for (size_t channels = 1; channels <= 160; channels += 31) {
4236 DWConvMicrokernelTester()
4237 .cr(32)
4238 .kr(25)
4239 .channels(32)
4240 .width(5)
4241 .output_stride(163)
4242 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4243 }
4244 }
4245
4246 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_qmin) {
4247 TEST_REQUIRES_X86_FMA3;
4248 for (size_t channels = 1; channels <= 160; channels += 31) {
4249 DWConvMicrokernelTester()
4250 .cr(32)
4251 .kr(25)
4252 .channels(channels)
4253 .width(3)
4254 .qmin(128)
4255 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4256 }
4257 }
4258
4259 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_qmax) {
4260 TEST_REQUIRES_X86_FMA3;
4261 for (size_t channels = 1; channels <= 160; channels += 31) {
4262 DWConvMicrokernelTester()
4263 .cr(32)
4264 .kr(25)
4265 .channels(channels)
4266 .width(3)
4267 .qmax(128)
4268 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4269 }
4270 }
4271
4272 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, input_offset) {
4273 TEST_REQUIRES_X86_FMA3;
4274 for (uint32_t channels = 64; channels < 512; channels += 96) {
4275 DWConvMicrokernelTester()
4276 .cr(32)
4277 .kr(25)
4278 .channels(channels)
4279 .input_offset(592)
4280 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4281 }
4282 }
4283
4284 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, zero) {
4285 TEST_REQUIRES_X86_FMA3;
4286 for (uint32_t mz = 0; mz < 25; mz++) {
4287 for (uint32_t channels = 64; channels < 512; channels += 96) {
4288 DWConvMicrokernelTester()
4289 .cr(32)
4290 .kr(25)
4291 .channels(channels)
4292 .input_offset(592)
4293 .zero_index(mz)
4294 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
4295 }
4296 }
4297 }
4298#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4299
4300
4301#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4302 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_eq_32) {
4303 TEST_REQUIRES_X86_FMA3;
4304 DWConvMicrokernelTester()
4305 .cr(32)
4306 .kr(25)
4307 .channels(32)
4308 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4309 }
4310
4311 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_div_32) {
4312 TEST_REQUIRES_X86_FMA3;
4313 for (uint32_t channels = 64; channels < 512; channels += 96) {
4314 DWConvMicrokernelTester()
4315 .cr(32)
4316 .kr(25)
4317 .channels(channels)
4318 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4319 }
4320 }
4321
4322 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_div_32_with_qmin) {
4323 TEST_REQUIRES_X86_FMA3;
4324 for (uint32_t channels = 64; channels < 512; channels += 96) {
4325 DWConvMicrokernelTester()
4326 .cr(32)
4327 .kr(25)
4328 .channels(channels)
4329 .qmin(128)
4330 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4331 }
4332 }
4333
4334 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_div_32_with_qmax) {
4335 TEST_REQUIRES_X86_FMA3;
4336 for (uint32_t channels = 64; channels < 512; channels += 96) {
4337 DWConvMicrokernelTester()
4338 .cr(32)
4339 .kr(25)
4340 .channels(channels)
4341 .qmax(128)
4342 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4343 }
4344 }
4345
4346 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_lt_32) {
4347 TEST_REQUIRES_X86_FMA3;
4348 for (uint32_t channels = 1; channels < 32; channels++) {
4349 DWConvMicrokernelTester()
4350 .cr(32)
4351 .kr(25)
4352 .channels(channels)
4353 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4354 }
4355 }
4356
4357 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_gt_32) {
4358 TEST_REQUIRES_X86_FMA3;
4359 for (uint32_t channels = 33; channels < 64; channels++) {
4360 DWConvMicrokernelTester()
4361 .cr(32)
4362 .kr(25)
4363 .channels(channels)
4364 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4365 }
4366 }
4367
4368 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_gt_32_with_qmin) {
4369 TEST_REQUIRES_X86_FMA3;
4370 for (uint32_t channels = 33; channels < 64; channels++) {
4371 DWConvMicrokernelTester()
4372 .cr(32)
4373 .kr(25)
4374 .channels(channels)
4375 .qmin(128)
4376 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4377 }
4378 }
4379
4380 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_gt_32_with_qmax) {
4381 TEST_REQUIRES_X86_FMA3;
4382 for (uint32_t channels = 33; channels < 64; channels++) {
4383 DWConvMicrokernelTester()
4384 .cr(32)
4385 .kr(25)
4386 .channels(channels)
4387 .qmax(128)
4388 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4389 }
4390 }
4391
4392 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel) {
4393 TEST_REQUIRES_X86_FMA3;
4394 for (size_t channels = 1; channels <= 160; channels += 31) {
4395 DWConvMicrokernelTester()
4396 .cr(32)
4397 .kr(25)
4398 .channels(channels)
4399 .width(3)
4400 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4401 }
4402 }
4403
4404 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_step) {
4405 TEST_REQUIRES_X86_FMA3;
4406 for (size_t channels = 1; channels <= 160; channels += 31) {
4407 for (size_t step = 2; step <= 25; step++) {
4408 DWConvMicrokernelTester()
4409 .cr(32)
4410 .kr(25)
4411 .channels(channels)
4412 .width(3)
4413 .step(step)
4414 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4415 }
4416 }
4417 }
4418
4419 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_output_stride) {
4420 TEST_REQUIRES_X86_FMA3;
4421 for (size_t channels = 1; channels <= 160; channels += 31) {
4422 DWConvMicrokernelTester()
4423 .cr(32)
4424 .kr(25)
4425 .channels(32)
4426 .width(5)
4427 .output_stride(163)
4428 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4429 }
4430 }
4431
4432 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_qmin) {
4433 TEST_REQUIRES_X86_FMA3;
4434 for (size_t channels = 1; channels <= 160; channels += 31) {
4435 DWConvMicrokernelTester()
4436 .cr(32)
4437 .kr(25)
4438 .channels(channels)
4439 .width(3)
4440 .qmin(128)
4441 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4442 }
4443 }
4444
4445 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_qmax) {
4446 TEST_REQUIRES_X86_FMA3;
4447 for (size_t channels = 1; channels <= 160; channels += 31) {
4448 DWConvMicrokernelTester()
4449 .cr(32)
4450 .kr(25)
4451 .channels(channels)
4452 .width(3)
4453 .qmax(128)
4454 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4455 }
4456 }
4457
4458 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, input_offset) {
4459 TEST_REQUIRES_X86_FMA3;
4460 for (uint32_t channels = 64; channels < 512; channels += 96) {
4461 DWConvMicrokernelTester()
4462 .cr(32)
4463 .kr(25)
4464 .channels(channels)
4465 .input_offset(592)
4466 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4467 }
4468 }
4469
4470 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, zero) {
4471 TEST_REQUIRES_X86_FMA3;
4472 for (uint32_t mz = 0; mz < 25; mz++) {
4473 for (uint32_t channels = 64; channels < 512; channels += 96) {
4474 DWConvMicrokernelTester()
4475 .cr(32)
4476 .kr(25)
4477 .channels(channels)
4478 .input_offset(592)
4479 .zero_index(mz)
4480 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
4481 }
4482 }
4483 }
4484#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4485
4486
4487#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4488 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_eq_8) {
4489 TEST_REQUIRES_X86_FMA3;
4490 DWConvMicrokernelTester()
4491 .cr(8)
4492 .kr(9)
4493 .channels(8)
4494 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4495 }
4496
4497 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_div_8) {
4498 TEST_REQUIRES_X86_FMA3;
4499 for (uint32_t channels = 16; channels < 128; channels += 24) {
4500 DWConvMicrokernelTester()
4501 .cr(8)
4502 .kr(9)
4503 .channels(channels)
4504 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4505 }
4506 }
4507
4508 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_div_8_with_qmin) {
4509 TEST_REQUIRES_X86_FMA3;
4510 for (uint32_t channels = 16; channels < 128; channels += 24) {
4511 DWConvMicrokernelTester()
4512 .cr(8)
4513 .kr(9)
4514 .channels(channels)
4515 .qmin(128)
4516 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4517 }
4518 }
4519
4520 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_div_8_with_qmax) {
4521 TEST_REQUIRES_X86_FMA3;
4522 for (uint32_t channels = 16; channels < 128; channels += 24) {
4523 DWConvMicrokernelTester()
4524 .cr(8)
4525 .kr(9)
4526 .channels(channels)
4527 .qmax(128)
4528 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4529 }
4530 }
4531
4532 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_lt_8) {
4533 TEST_REQUIRES_X86_FMA3;
4534 for (uint32_t channels = 1; channels < 8; channels++) {
4535 DWConvMicrokernelTester()
4536 .cr(8)
4537 .kr(9)
4538 .channels(channels)
4539 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4540 }
4541 }
4542
4543 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_gt_8) {
4544 TEST_REQUIRES_X86_FMA3;
4545 for (uint32_t channels = 9; channels < 16; channels++) {
4546 DWConvMicrokernelTester()
4547 .cr(8)
4548 .kr(9)
4549 .channels(channels)
4550 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4551 }
4552 }
4553
4554 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_gt_8_with_qmin) {
4555 TEST_REQUIRES_X86_FMA3;
4556 for (uint32_t channels = 9; channels < 16; channels++) {
4557 DWConvMicrokernelTester()
4558 .cr(8)
4559 .kr(9)
4560 .channels(channels)
4561 .qmin(128)
4562 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4563 }
4564 }
4565
4566 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_gt_8_with_qmax) {
4567 TEST_REQUIRES_X86_FMA3;
4568 for (uint32_t channels = 9; channels < 16; channels++) {
4569 DWConvMicrokernelTester()
4570 .cr(8)
4571 .kr(9)
4572 .channels(channels)
4573 .qmax(128)
4574 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4575 }
4576 }
4577
4578 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel) {
4579 TEST_REQUIRES_X86_FMA3;
4580 for (size_t channels = 1; channels <= 40; channels += 7) {
4581 DWConvMicrokernelTester()
4582 .cr(8)
4583 .kr(9)
4584 .channels(channels)
4585 .width(3)
4586 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4587 }
4588 }
4589
4590 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_step) {
4591 TEST_REQUIRES_X86_FMA3;
4592 for (size_t channels = 1; channels <= 40; channels += 7) {
4593 for (size_t step = 2; step <= 9; step++) {
4594 DWConvMicrokernelTester()
4595 .cr(8)
4596 .kr(9)
4597 .channels(channels)
4598 .width(3)
4599 .step(step)
4600 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4601 }
4602 }
4603 }
4604
4605 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_output_stride) {
4606 TEST_REQUIRES_X86_FMA3;
4607 for (size_t channels = 1; channels <= 40; channels += 7) {
4608 DWConvMicrokernelTester()
4609 .cr(8)
4610 .kr(9)
4611 .channels(8)
4612 .width(5)
4613 .output_stride(43)
4614 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4615 }
4616 }
4617
4618 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_qmin) {
4619 TEST_REQUIRES_X86_FMA3;
4620 for (size_t channels = 1; channels <= 40; channels += 7) {
4621 DWConvMicrokernelTester()
4622 .cr(8)
4623 .kr(9)
4624 .channels(channels)
4625 .width(3)
4626 .qmin(128)
4627 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4628 }
4629 }
4630
4631 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_qmax) {
4632 TEST_REQUIRES_X86_FMA3;
4633 for (size_t channels = 1; channels <= 40; channels += 7) {
4634 DWConvMicrokernelTester()
4635 .cr(8)
4636 .kr(9)
4637 .channels(channels)
4638 .width(3)
4639 .qmax(128)
4640 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4641 }
4642 }
4643
4644 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, input_offset) {
4645 TEST_REQUIRES_X86_FMA3;
4646 for (uint32_t channels = 16; channels < 128; channels += 24) {
4647 DWConvMicrokernelTester()
4648 .cr(8)
4649 .kr(9)
4650 .channels(channels)
4651 .input_offset(176)
4652 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4653 }
4654 }
4655
4656 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, zero) {
4657 TEST_REQUIRES_X86_FMA3;
4658 for (uint32_t mz = 0; mz < 9; mz++) {
4659 for (uint32_t channels = 16; channels < 128; channels += 24) {
4660 DWConvMicrokernelTester()
4661 .cr(8)
4662 .kr(9)
4663 .channels(channels)
4664 .input_offset(176)
4665 .zero_index(mz)
4666 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
4667 }
4668 }
4669 }
4670#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4671
4672
4673#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4674 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_eq_8) {
4675 TEST_REQUIRES_X86_FMA3;
4676 DWConvMicrokernelTester()
4677 .cr(8)
4678 .kr(9)
4679 .channels(8)
4680 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4681 }
4682
4683 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_div_8) {
4684 TEST_REQUIRES_X86_FMA3;
4685 for (uint32_t channels = 16; channels < 128; channels += 24) {
4686 DWConvMicrokernelTester()
4687 .cr(8)
4688 .kr(9)
4689 .channels(channels)
4690 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4691 }
4692 }
4693
4694 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_div_8_with_qmin) {
4695 TEST_REQUIRES_X86_FMA3;
4696 for (uint32_t channels = 16; channels < 128; channels += 24) {
4697 DWConvMicrokernelTester()
4698 .cr(8)
4699 .kr(9)
4700 .channels(channels)
4701 .qmin(128)
4702 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4703 }
4704 }
4705
4706 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_div_8_with_qmax) {
4707 TEST_REQUIRES_X86_FMA3;
4708 for (uint32_t channels = 16; channels < 128; channels += 24) {
4709 DWConvMicrokernelTester()
4710 .cr(8)
4711 .kr(9)
4712 .channels(channels)
4713 .qmax(128)
4714 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4715 }
4716 }
4717
4718 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_lt_8) {
4719 TEST_REQUIRES_X86_FMA3;
4720 for (uint32_t channels = 1; channels < 8; channels++) {
4721 DWConvMicrokernelTester()
4722 .cr(8)
4723 .kr(9)
4724 .channels(channels)
4725 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4726 }
4727 }
4728
4729 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_gt_8) {
4730 TEST_REQUIRES_X86_FMA3;
4731 for (uint32_t channels = 9; channels < 16; channels++) {
4732 DWConvMicrokernelTester()
4733 .cr(8)
4734 .kr(9)
4735 .channels(channels)
4736 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4737 }
4738 }
4739
4740 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_gt_8_with_qmin) {
4741 TEST_REQUIRES_X86_FMA3;
4742 for (uint32_t channels = 9; channels < 16; channels++) {
4743 DWConvMicrokernelTester()
4744 .cr(8)
4745 .kr(9)
4746 .channels(channels)
4747 .qmin(128)
4748 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4749 }
4750 }
4751
4752 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_gt_8_with_qmax) {
4753 TEST_REQUIRES_X86_FMA3;
4754 for (uint32_t channels = 9; channels < 16; channels++) {
4755 DWConvMicrokernelTester()
4756 .cr(8)
4757 .kr(9)
4758 .channels(channels)
4759 .qmax(128)
4760 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4761 }
4762 }
4763
4764 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel) {
4765 TEST_REQUIRES_X86_FMA3;
4766 for (size_t channels = 1; channels <= 40; channels += 7) {
4767 DWConvMicrokernelTester()
4768 .cr(8)
4769 .kr(9)
4770 .channels(channels)
4771 .width(3)
4772 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4773 }
4774 }
4775
4776 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_step) {
4777 TEST_REQUIRES_X86_FMA3;
4778 for (size_t channels = 1; channels <= 40; channels += 7) {
4779 for (size_t step = 2; step <= 9; step++) {
4780 DWConvMicrokernelTester()
4781 .cr(8)
4782 .kr(9)
4783 .channels(channels)
4784 .width(3)
4785 .step(step)
4786 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4787 }
4788 }
4789 }
4790
4791 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_output_stride) {
4792 TEST_REQUIRES_X86_FMA3;
4793 for (size_t channels = 1; channels <= 40; channels += 7) {
4794 DWConvMicrokernelTester()
4795 .cr(8)
4796 .kr(9)
4797 .channels(8)
4798 .width(5)
4799 .output_stride(43)
4800 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4801 }
4802 }
4803
4804 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_qmin) {
4805 TEST_REQUIRES_X86_FMA3;
4806 for (size_t channels = 1; channels <= 40; channels += 7) {
4807 DWConvMicrokernelTester()
4808 .cr(8)
4809 .kr(9)
4810 .channels(channels)
4811 .width(3)
4812 .qmin(128)
4813 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4814 }
4815 }
4816
4817 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_qmax) {
4818 TEST_REQUIRES_X86_FMA3;
4819 for (size_t channels = 1; channels <= 40; channels += 7) {
4820 DWConvMicrokernelTester()
4821 .cr(8)
4822 .kr(9)
4823 .channels(channels)
4824 .width(3)
4825 .qmax(128)
4826 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4827 }
4828 }
4829
4830 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, input_offset) {
4831 TEST_REQUIRES_X86_FMA3;
4832 for (uint32_t channels = 16; channels < 128; channels += 24) {
4833 DWConvMicrokernelTester()
4834 .cr(8)
4835 .kr(9)
4836 .channels(channels)
4837 .input_offset(176)
4838 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4839 }
4840 }
4841
4842 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, zero) {
4843 TEST_REQUIRES_X86_FMA3;
4844 for (uint32_t mz = 0; mz < 9; mz++) {
4845 for (uint32_t channels = 16; channels < 128; channels += 24) {
4846 DWConvMicrokernelTester()
4847 .cr(8)
4848 .kr(9)
4849 .channels(channels)
4850 .input_offset(176)
4851 .zero_index(mz)
4852 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
4853 }
4854 }
4855 }
4856#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4857
4858
4859#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4860 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_eq_16) {
4861 TEST_REQUIRES_X86_FMA3;
4862 DWConvMicrokernelTester()
4863 .cr(16)
4864 .kr(9)
4865 .channels(16)
4866 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4867 }
4868
4869 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_div_16) {
4870 TEST_REQUIRES_X86_FMA3;
4871 for (uint32_t channels = 32; channels < 256; channels += 48) {
4872 DWConvMicrokernelTester()
4873 .cr(16)
4874 .kr(9)
4875 .channels(channels)
4876 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4877 }
4878 }
4879
4880 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_div_16_with_qmin) {
4881 TEST_REQUIRES_X86_FMA3;
4882 for (uint32_t channels = 32; channels < 256; channels += 48) {
4883 DWConvMicrokernelTester()
4884 .cr(16)
4885 .kr(9)
4886 .channels(channels)
4887 .qmin(128)
4888 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4889 }
4890 }
4891
4892 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_div_16_with_qmax) {
4893 TEST_REQUIRES_X86_FMA3;
4894 for (uint32_t channels = 32; channels < 256; channels += 48) {
4895 DWConvMicrokernelTester()
4896 .cr(16)
4897 .kr(9)
4898 .channels(channels)
4899 .qmax(128)
4900 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4901 }
4902 }
4903
4904 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_lt_16) {
4905 TEST_REQUIRES_X86_FMA3;
4906 for (uint32_t channels = 1; channels < 16; channels++) {
4907 DWConvMicrokernelTester()
4908 .cr(16)
4909 .kr(9)
4910 .channels(channels)
4911 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4912 }
4913 }
4914
4915 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_gt_16) {
4916 TEST_REQUIRES_X86_FMA3;
4917 for (uint32_t channels = 17; channels < 32; channels++) {
4918 DWConvMicrokernelTester()
4919 .cr(16)
4920 .kr(9)
4921 .channels(channels)
4922 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4923 }
4924 }
4925
4926 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_gt_16_with_qmin) {
4927 TEST_REQUIRES_X86_FMA3;
4928 for (uint32_t channels = 17; channels < 32; channels++) {
4929 DWConvMicrokernelTester()
4930 .cr(16)
4931 .kr(9)
4932 .channels(channels)
4933 .qmin(128)
4934 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4935 }
4936 }
4937
4938 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_gt_16_with_qmax) {
4939 TEST_REQUIRES_X86_FMA3;
4940 for (uint32_t channels = 17; channels < 32; channels++) {
4941 DWConvMicrokernelTester()
4942 .cr(16)
4943 .kr(9)
4944 .channels(channels)
4945 .qmax(128)
4946 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4947 }
4948 }
4949
4950 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel) {
4951 TEST_REQUIRES_X86_FMA3;
4952 for (size_t channels = 1; channels <= 80; channels += 15) {
4953 DWConvMicrokernelTester()
4954 .cr(16)
4955 .kr(9)
4956 .channels(channels)
4957 .width(3)
4958 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4959 }
4960 }
4961
4962 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_step) {
4963 TEST_REQUIRES_X86_FMA3;
4964 for (size_t channels = 1; channels <= 80; channels += 15) {
4965 for (size_t step = 2; step <= 9; step++) {
4966 DWConvMicrokernelTester()
4967 .cr(16)
4968 .kr(9)
4969 .channels(channels)
4970 .width(3)
4971 .step(step)
4972 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4973 }
4974 }
4975 }
4976
4977 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_output_stride) {
4978 TEST_REQUIRES_X86_FMA3;
4979 for (size_t channels = 1; channels <= 80; channels += 15) {
4980 DWConvMicrokernelTester()
4981 .cr(16)
4982 .kr(9)
4983 .channels(16)
4984 .width(5)
4985 .output_stride(83)
4986 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
4987 }
4988 }
4989
4990 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_qmin) {
4991 TEST_REQUIRES_X86_FMA3;
4992 for (size_t channels = 1; channels <= 80; channels += 15) {
4993 DWConvMicrokernelTester()
4994 .cr(16)
4995 .kr(9)
4996 .channels(channels)
4997 .width(3)
4998 .qmin(128)
4999 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
5000 }
5001 }
5002
5003 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_qmax) {
5004 TEST_REQUIRES_X86_FMA3;
5005 for (size_t channels = 1; channels <= 80; channels += 15) {
5006 DWConvMicrokernelTester()
5007 .cr(16)
5008 .kr(9)
5009 .channels(channels)
5010 .width(3)
5011 .qmax(128)
5012 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
5013 }
5014 }
5015
5016 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, input_offset) {
5017 TEST_REQUIRES_X86_FMA3;
5018 for (uint32_t channels = 32; channels < 256; channels += 48) {
5019 DWConvMicrokernelTester()
5020 .cr(16)
5021 .kr(9)
5022 .channels(channels)
5023 .input_offset(304)
5024 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
5025 }
5026 }
5027
5028 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, zero) {
5029 TEST_REQUIRES_X86_FMA3;
5030 for (uint32_t mz = 0; mz < 9; mz++) {
5031 for (uint32_t channels = 32; channels < 256; channels += 48) {
5032 DWConvMicrokernelTester()
5033 .cr(16)
5034 .kr(9)
5035 .channels(channels)
5036 .input_offset(304)
5037 .zero_index(mz)
5038 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
5039 }
5040 }
5041 }
5042#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5043
5044
5045#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5046 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_eq_16) {
5047 TEST_REQUIRES_X86_FMA3;
5048 DWConvMicrokernelTester()
5049 .cr(16)
5050 .kr(9)
5051 .channels(16)
5052 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5053 }
5054
5055 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_div_16) {
5056 TEST_REQUIRES_X86_FMA3;
5057 for (uint32_t channels = 32; channels < 256; channels += 48) {
5058 DWConvMicrokernelTester()
5059 .cr(16)
5060 .kr(9)
5061 .channels(channels)
5062 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5063 }
5064 }
5065
5066 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_div_16_with_qmin) {
5067 TEST_REQUIRES_X86_FMA3;
5068 for (uint32_t channels = 32; channels < 256; channels += 48) {
5069 DWConvMicrokernelTester()
5070 .cr(16)
5071 .kr(9)
5072 .channels(channels)
5073 .qmin(128)
5074 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5075 }
5076 }
5077
5078 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_div_16_with_qmax) {
5079 TEST_REQUIRES_X86_FMA3;
5080 for (uint32_t channels = 32; channels < 256; channels += 48) {
5081 DWConvMicrokernelTester()
5082 .cr(16)
5083 .kr(9)
5084 .channels(channels)
5085 .qmax(128)
5086 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5087 }
5088 }
5089
5090 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_lt_16) {
5091 TEST_REQUIRES_X86_FMA3;
5092 for (uint32_t channels = 1; channels < 16; channels++) {
5093 DWConvMicrokernelTester()
5094 .cr(16)
5095 .kr(9)
5096 .channels(channels)
5097 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5098 }
5099 }
5100
5101 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_gt_16) {
5102 TEST_REQUIRES_X86_FMA3;
5103 for (uint32_t channels = 17; channels < 32; channels++) {
5104 DWConvMicrokernelTester()
5105 .cr(16)
5106 .kr(9)
5107 .channels(channels)
5108 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5109 }
5110 }
5111
5112 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_gt_16_with_qmin) {
5113 TEST_REQUIRES_X86_FMA3;
5114 for (uint32_t channels = 17; channels < 32; channels++) {
5115 DWConvMicrokernelTester()
5116 .cr(16)
5117 .kr(9)
5118 .channels(channels)
5119 .qmin(128)
5120 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5121 }
5122 }
5123
5124 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_gt_16_with_qmax) {
5125 TEST_REQUIRES_X86_FMA3;
5126 for (uint32_t channels = 17; channels < 32; channels++) {
5127 DWConvMicrokernelTester()
5128 .cr(16)
5129 .kr(9)
5130 .channels(channels)
5131 .qmax(128)
5132 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5133 }
5134 }
5135
5136 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel) {
5137 TEST_REQUIRES_X86_FMA3;
5138 for (size_t channels = 1; channels <= 80; channels += 15) {
5139 DWConvMicrokernelTester()
5140 .cr(16)
5141 .kr(9)
5142 .channels(channels)
5143 .width(3)
5144 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5145 }
5146 }
5147
5148 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_step) {
5149 TEST_REQUIRES_X86_FMA3;
5150 for (size_t channels = 1; channels <= 80; channels += 15) {
5151 for (size_t step = 2; step <= 9; step++) {
5152 DWConvMicrokernelTester()
5153 .cr(16)
5154 .kr(9)
5155 .channels(channels)
5156 .width(3)
5157 .step(step)
5158 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5159 }
5160 }
5161 }
5162
5163 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_output_stride) {
5164 TEST_REQUIRES_X86_FMA3;
5165 for (size_t channels = 1; channels <= 80; channels += 15) {
5166 DWConvMicrokernelTester()
5167 .cr(16)
5168 .kr(9)
5169 .channels(16)
5170 .width(5)
5171 .output_stride(83)
5172 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5173 }
5174 }
5175
5176 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_qmin) {
5177 TEST_REQUIRES_X86_FMA3;
5178 for (size_t channels = 1; channels <= 80; channels += 15) {
5179 DWConvMicrokernelTester()
5180 .cr(16)
5181 .kr(9)
5182 .channels(channels)
5183 .width(3)
5184 .qmin(128)
5185 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5186 }
5187 }
5188
5189 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_qmax) {
5190 TEST_REQUIRES_X86_FMA3;
5191 for (size_t channels = 1; channels <= 80; channels += 15) {
5192 DWConvMicrokernelTester()
5193 .cr(16)
5194 .kr(9)
5195 .channels(channels)
5196 .width(3)
5197 .qmax(128)
5198 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5199 }
5200 }
5201
5202 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, input_offset) {
5203 TEST_REQUIRES_X86_FMA3;
5204 for (uint32_t channels = 32; channels < 256; channels += 48) {
5205 DWConvMicrokernelTester()
5206 .cr(16)
5207 .kr(9)
5208 .channels(channels)
5209 .input_offset(304)
5210 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5211 }
5212 }
5213
5214 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, zero) {
5215 TEST_REQUIRES_X86_FMA3;
5216 for (uint32_t mz = 0; mz < 9; mz++) {
5217 for (uint32_t channels = 32; channels < 256; channels += 48) {
5218 DWConvMicrokernelTester()
5219 .cr(16)
5220 .kr(9)
5221 .channels(channels)
5222 .input_offset(304)
5223 .zero_index(mz)
5224 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5225 }
5226 }
5227 }
5228#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5229
5230
5231#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5232 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_eq_32) {
5233 TEST_REQUIRES_X86_FMA3;
5234 DWConvMicrokernelTester()
5235 .cr(32)
5236 .kr(9)
5237 .channels(32)
5238 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5239 }
5240
5241 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_div_32) {
5242 TEST_REQUIRES_X86_FMA3;
5243 for (uint32_t channels = 64; channels < 512; channels += 96) {
5244 DWConvMicrokernelTester()
5245 .cr(32)
5246 .kr(9)
5247 .channels(channels)
5248 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5249 }
5250 }
5251
5252 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_div_32_with_qmin) {
5253 TEST_REQUIRES_X86_FMA3;
5254 for (uint32_t channels = 64; channels < 512; channels += 96) {
5255 DWConvMicrokernelTester()
5256 .cr(32)
5257 .kr(9)
5258 .channels(channels)
5259 .qmin(128)
5260 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5261 }
5262 }
5263
5264 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_div_32_with_qmax) {
5265 TEST_REQUIRES_X86_FMA3;
5266 for (uint32_t channels = 64; channels < 512; channels += 96) {
5267 DWConvMicrokernelTester()
5268 .cr(32)
5269 .kr(9)
5270 .channels(channels)
5271 .qmax(128)
5272 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5273 }
5274 }
5275
5276 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_lt_32) {
5277 TEST_REQUIRES_X86_FMA3;
5278 for (uint32_t channels = 1; channels < 32; channels++) {
5279 DWConvMicrokernelTester()
5280 .cr(32)
5281 .kr(9)
5282 .channels(channels)
5283 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5284 }
5285 }
5286
5287 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_gt_32) {
5288 TEST_REQUIRES_X86_FMA3;
5289 for (uint32_t channels = 33; channels < 64; channels++) {
5290 DWConvMicrokernelTester()
5291 .cr(32)
5292 .kr(9)
5293 .channels(channels)
5294 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5295 }
5296 }
5297
5298 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_gt_32_with_qmin) {
5299 TEST_REQUIRES_X86_FMA3;
5300 for (uint32_t channels = 33; channels < 64; channels++) {
5301 DWConvMicrokernelTester()
5302 .cr(32)
5303 .kr(9)
5304 .channels(channels)
5305 .qmin(128)
5306 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5307 }
5308 }
5309
5310 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_gt_32_with_qmax) {
5311 TEST_REQUIRES_X86_FMA3;
5312 for (uint32_t channels = 33; channels < 64; channels++) {
5313 DWConvMicrokernelTester()
5314 .cr(32)
5315 .kr(9)
5316 .channels(channels)
5317 .qmax(128)
5318 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5319 }
5320 }
5321
5322 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel) {
5323 TEST_REQUIRES_X86_FMA3;
5324 for (size_t channels = 1; channels <= 160; channels += 31) {
5325 DWConvMicrokernelTester()
5326 .cr(32)
5327 .kr(9)
5328 .channels(channels)
5329 .width(3)
5330 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5331 }
5332 }
5333
5334 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_step) {
5335 TEST_REQUIRES_X86_FMA3;
5336 for (size_t channels = 1; channels <= 160; channels += 31) {
5337 for (size_t step = 2; step <= 9; step++) {
5338 DWConvMicrokernelTester()
5339 .cr(32)
5340 .kr(9)
5341 .channels(channels)
5342 .width(3)
5343 .step(step)
5344 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5345 }
5346 }
5347 }
5348
5349 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_output_stride) {
5350 TEST_REQUIRES_X86_FMA3;
5351 for (size_t channels = 1; channels <= 160; channels += 31) {
5352 DWConvMicrokernelTester()
5353 .cr(32)
5354 .kr(9)
5355 .channels(32)
5356 .width(5)
5357 .output_stride(163)
5358 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5359 }
5360 }
5361
5362 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_qmin) {
5363 TEST_REQUIRES_X86_FMA3;
5364 for (size_t channels = 1; channels <= 160; channels += 31) {
5365 DWConvMicrokernelTester()
5366 .cr(32)
5367 .kr(9)
5368 .channels(channels)
5369 .width(3)
5370 .qmin(128)
5371 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5372 }
5373 }
5374
5375 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_qmax) {
5376 TEST_REQUIRES_X86_FMA3;
5377 for (size_t channels = 1; channels <= 160; channels += 31) {
5378 DWConvMicrokernelTester()
5379 .cr(32)
5380 .kr(9)
5381 .channels(channels)
5382 .width(3)
5383 .qmax(128)
5384 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5385 }
5386 }
5387
5388 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, input_offset) {
5389 TEST_REQUIRES_X86_FMA3;
5390 for (uint32_t channels = 64; channels < 512; channels += 96) {
5391 DWConvMicrokernelTester()
5392 .cr(32)
5393 .kr(9)
5394 .channels(channels)
5395 .input_offset(592)
5396 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5397 }
5398 }
5399
5400 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, zero) {
5401 TEST_REQUIRES_X86_FMA3;
5402 for (uint32_t mz = 0; mz < 9; mz++) {
5403 for (uint32_t channels = 64; channels < 512; channels += 96) {
5404 DWConvMicrokernelTester()
5405 .cr(32)
5406 .kr(9)
5407 .channels(channels)
5408 .input_offset(592)
5409 .zero_index(mz)
5410 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
5411 }
5412 }
5413 }
5414#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5415
5416
5417#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5418 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_eq_32) {
5419 TEST_REQUIRES_X86_FMA3;
5420 DWConvMicrokernelTester()
5421 .cr(32)
5422 .kr(9)
5423 .channels(32)
5424 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5425 }
5426
5427 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_div_32) {
5428 TEST_REQUIRES_X86_FMA3;
5429 for (uint32_t channels = 64; channels < 512; channels += 96) {
5430 DWConvMicrokernelTester()
5431 .cr(32)
5432 .kr(9)
5433 .channels(channels)
5434 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5435 }
5436 }
5437
5438 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_div_32_with_qmin) {
5439 TEST_REQUIRES_X86_FMA3;
5440 for (uint32_t channels = 64; channels < 512; channels += 96) {
5441 DWConvMicrokernelTester()
5442 .cr(32)
5443 .kr(9)
5444 .channels(channels)
5445 .qmin(128)
5446 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5447 }
5448 }
5449
5450 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_div_32_with_qmax) {
5451 TEST_REQUIRES_X86_FMA3;
5452 for (uint32_t channels = 64; channels < 512; channels += 96) {
5453 DWConvMicrokernelTester()
5454 .cr(32)
5455 .kr(9)
5456 .channels(channels)
5457 .qmax(128)
5458 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5459 }
5460 }
5461
5462 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_lt_32) {
5463 TEST_REQUIRES_X86_FMA3;
5464 for (uint32_t channels = 1; channels < 32; channels++) {
5465 DWConvMicrokernelTester()
5466 .cr(32)
5467 .kr(9)
5468 .channels(channels)
5469 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5470 }
5471 }
5472
5473 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_gt_32) {
5474 TEST_REQUIRES_X86_FMA3;
5475 for (uint32_t channels = 33; channels < 64; channels++) {
5476 DWConvMicrokernelTester()
5477 .cr(32)
5478 .kr(9)
5479 .channels(channels)
5480 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5481 }
5482 }
5483
5484 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_gt_32_with_qmin) {
5485 TEST_REQUIRES_X86_FMA3;
5486 for (uint32_t channels = 33; channels < 64; channels++) {
5487 DWConvMicrokernelTester()
5488 .cr(32)
5489 .kr(9)
5490 .channels(channels)
5491 .qmin(128)
5492 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5493 }
5494 }
5495
5496 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_gt_32_with_qmax) {
5497 TEST_REQUIRES_X86_FMA3;
5498 for (uint32_t channels = 33; channels < 64; channels++) {
5499 DWConvMicrokernelTester()
5500 .cr(32)
5501 .kr(9)
5502 .channels(channels)
5503 .qmax(128)
5504 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5505 }
5506 }
5507
5508 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel) {
5509 TEST_REQUIRES_X86_FMA3;
5510 for (size_t channels = 1; channels <= 160; channels += 31) {
5511 DWConvMicrokernelTester()
5512 .cr(32)
5513 .kr(9)
5514 .channels(channels)
5515 .width(3)
5516 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5517 }
5518 }
5519
5520 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_step) {
5521 TEST_REQUIRES_X86_FMA3;
5522 for (size_t channels = 1; channels <= 160; channels += 31) {
5523 for (size_t step = 2; step <= 9; step++) {
5524 DWConvMicrokernelTester()
5525 .cr(32)
5526 .kr(9)
5527 .channels(channels)
5528 .width(3)
5529 .step(step)
5530 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5531 }
5532 }
5533 }
5534
5535 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_output_stride) {
5536 TEST_REQUIRES_X86_FMA3;
5537 for (size_t channels = 1; channels <= 160; channels += 31) {
5538 DWConvMicrokernelTester()
5539 .cr(32)
5540 .kr(9)
5541 .channels(32)
5542 .width(5)
5543 .output_stride(163)
5544 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5545 }
5546 }
5547
5548 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_qmin) {
5549 TEST_REQUIRES_X86_FMA3;
5550 for (size_t channels = 1; channels <= 160; channels += 31) {
5551 DWConvMicrokernelTester()
5552 .cr(32)
5553 .kr(9)
5554 .channels(channels)
5555 .width(3)
5556 .qmin(128)
5557 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5558 }
5559 }
5560
5561 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_qmax) {
5562 TEST_REQUIRES_X86_FMA3;
5563 for (size_t channels = 1; channels <= 160; channels += 31) {
5564 DWConvMicrokernelTester()
5565 .cr(32)
5566 .kr(9)
5567 .channels(channels)
5568 .width(3)
5569 .qmax(128)
5570 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5571 }
5572 }
5573
5574 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, input_offset) {
5575 TEST_REQUIRES_X86_FMA3;
5576 for (uint32_t channels = 64; channels < 512; channels += 96) {
5577 DWConvMicrokernelTester()
5578 .cr(32)
5579 .kr(9)
5580 .channels(channels)
5581 .input_offset(592)
5582 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5583 }
5584 }
5585
5586 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, zero) {
5587 TEST_REQUIRES_X86_FMA3;
5588 for (uint32_t mz = 0; mz < 9; mz++) {
5589 for (uint32_t channels = 64; channels < 512; channels += 96) {
5590 DWConvMicrokernelTester()
5591 .cr(32)
5592 .kr(9)
5593 .channels(channels)
5594 .input_offset(592)
5595 .zero_index(mz)
5596 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5597 }
5598 }
5599 }
5600#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5601
5602
5603#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5604 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_eq_8) {
5605 TEST_REQUIRES_X86_FMA3;
5606 DWConvMicrokernelTester()
5607 .cr(8)
5608 .kr(4)
5609 .channels(8)
5610 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5611 }
5612
5613 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_div_8) {
5614 TEST_REQUIRES_X86_FMA3;
5615 for (uint32_t channels = 16; channels < 128; channels += 24) {
5616 DWConvMicrokernelTester()
5617 .cr(8)
5618 .kr(4)
5619 .channels(channels)
5620 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5621 }
5622 }
5623
5624 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_div_8_with_qmin) {
5625 TEST_REQUIRES_X86_FMA3;
5626 for (uint32_t channels = 16; channels < 128; channels += 24) {
5627 DWConvMicrokernelTester()
5628 .cr(8)
5629 .kr(4)
5630 .channels(channels)
5631 .qmin(128)
5632 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5633 }
5634 }
5635
5636 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_div_8_with_qmax) {
5637 TEST_REQUIRES_X86_FMA3;
5638 for (uint32_t channels = 16; channels < 128; channels += 24) {
5639 DWConvMicrokernelTester()
5640 .cr(8)
5641 .kr(4)
5642 .channels(channels)
5643 .qmax(128)
5644 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5645 }
5646 }
5647
5648 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_lt_8) {
5649 TEST_REQUIRES_X86_FMA3;
5650 for (uint32_t channels = 1; channels < 8; channels++) {
5651 DWConvMicrokernelTester()
5652 .cr(8)
5653 .kr(4)
5654 .channels(channels)
5655 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5656 }
5657 }
5658
5659 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_gt_8) {
5660 TEST_REQUIRES_X86_FMA3;
5661 for (uint32_t channels = 9; channels < 16; channels++) {
5662 DWConvMicrokernelTester()
5663 .cr(8)
5664 .kr(4)
5665 .channels(channels)
5666 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5667 }
5668 }
5669
5670 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_gt_8_with_qmin) {
5671 TEST_REQUIRES_X86_FMA3;
5672 for (uint32_t channels = 9; channels < 16; channels++) {
5673 DWConvMicrokernelTester()
5674 .cr(8)
5675 .kr(4)
5676 .channels(channels)
5677 .qmin(128)
5678 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5679 }
5680 }
5681
5682 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_gt_8_with_qmax) {
5683 TEST_REQUIRES_X86_FMA3;
5684 for (uint32_t channels = 9; channels < 16; channels++) {
5685 DWConvMicrokernelTester()
5686 .cr(8)
5687 .kr(4)
5688 .channels(channels)
5689 .qmax(128)
5690 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5691 }
5692 }
5693
5694 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel) {
5695 TEST_REQUIRES_X86_FMA3;
5696 for (size_t channels = 1; channels <= 40; channels += 7) {
5697 DWConvMicrokernelTester()
5698 .cr(8)
5699 .kr(4)
5700 .channels(channels)
5701 .width(3)
5702 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5703 }
5704 }
5705
5706 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_step) {
5707 TEST_REQUIRES_X86_FMA3;
5708 for (size_t channels = 1; channels <= 40; channels += 7) {
5709 for (size_t step = 2; step <= 4; step++) {
5710 DWConvMicrokernelTester()
5711 .cr(8)
5712 .kr(4)
5713 .channels(channels)
5714 .width(3)
5715 .step(step)
5716 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5717 }
5718 }
5719 }
5720
5721 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_output_stride) {
5722 TEST_REQUIRES_X86_FMA3;
5723 for (size_t channels = 1; channels <= 40; channels += 7) {
5724 DWConvMicrokernelTester()
5725 .cr(8)
5726 .kr(4)
5727 .channels(8)
5728 .width(5)
5729 .output_stride(43)
5730 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5731 }
5732 }
5733
5734 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_qmin) {
5735 TEST_REQUIRES_X86_FMA3;
5736 for (size_t channels = 1; channels <= 40; channels += 7) {
5737 DWConvMicrokernelTester()
5738 .cr(8)
5739 .kr(4)
5740 .channels(channels)
5741 .width(3)
5742 .qmin(128)
5743 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5744 }
5745 }
5746
5747 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_qmax) {
5748 TEST_REQUIRES_X86_FMA3;
5749 for (size_t channels = 1; channels <= 40; channels += 7) {
5750 DWConvMicrokernelTester()
5751 .cr(8)
5752 .kr(4)
5753 .channels(channels)
5754 .width(3)
5755 .qmax(128)
5756 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5757 }
5758 }
5759
5760 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, input_offset) {
5761 TEST_REQUIRES_X86_FMA3;
5762 for (uint32_t channels = 16; channels < 128; channels += 24) {
5763 DWConvMicrokernelTester()
5764 .cr(8)
5765 .kr(4)
5766 .channels(channels)
5767 .input_offset(176)
5768 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5769 }
5770 }
5771
5772 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, zero) {
5773 TEST_REQUIRES_X86_FMA3;
5774 for (uint32_t mz = 0; mz < 4; mz++) {
5775 for (uint32_t channels = 16; channels < 128; channels += 24) {
5776 DWConvMicrokernelTester()
5777 .cr(8)
5778 .kr(4)
5779 .channels(channels)
5780 .input_offset(176)
5781 .zero_index(mz)
5782 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5783 }
5784 }
5785 }
5786#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5787
5788
5789#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5790 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_eq_8) {
5791 TEST_REQUIRES_X86_FMA3;
5792 DWConvMicrokernelTester()
5793 .cr(8)
5794 .kr(4)
5795 .channels(8)
5796 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5797 }
5798
5799 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_div_8) {
5800 TEST_REQUIRES_X86_FMA3;
5801 for (uint32_t channels = 16; channels < 128; channels += 24) {
5802 DWConvMicrokernelTester()
5803 .cr(8)
5804 .kr(4)
5805 .channels(channels)
5806 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5807 }
5808 }
5809
5810 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_div_8_with_qmin) {
5811 TEST_REQUIRES_X86_FMA3;
5812 for (uint32_t channels = 16; channels < 128; channels += 24) {
5813 DWConvMicrokernelTester()
5814 .cr(8)
5815 .kr(4)
5816 .channels(channels)
5817 .qmin(128)
5818 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5819 }
5820 }
5821
5822 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_div_8_with_qmax) {
5823 TEST_REQUIRES_X86_FMA3;
5824 for (uint32_t channels = 16; channels < 128; channels += 24) {
5825 DWConvMicrokernelTester()
5826 .cr(8)
5827 .kr(4)
5828 .channels(channels)
5829 .qmax(128)
5830 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5831 }
5832 }
5833
5834 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_lt_8) {
5835 TEST_REQUIRES_X86_FMA3;
5836 for (uint32_t channels = 1; channels < 8; channels++) {
5837 DWConvMicrokernelTester()
5838 .cr(8)
5839 .kr(4)
5840 .channels(channels)
5841 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5842 }
5843 }
5844
5845 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_gt_8) {
5846 TEST_REQUIRES_X86_FMA3;
5847 for (uint32_t channels = 9; channels < 16; channels++) {
5848 DWConvMicrokernelTester()
5849 .cr(8)
5850 .kr(4)
5851 .channels(channels)
5852 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5853 }
5854 }
5855
5856 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_gt_8_with_qmin) {
5857 TEST_REQUIRES_X86_FMA3;
5858 for (uint32_t channels = 9; channels < 16; channels++) {
5859 DWConvMicrokernelTester()
5860 .cr(8)
5861 .kr(4)
5862 .channels(channels)
5863 .qmin(128)
5864 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5865 }
5866 }
5867
5868 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_gt_8_with_qmax) {
5869 TEST_REQUIRES_X86_FMA3;
5870 for (uint32_t channels = 9; channels < 16; channels++) {
5871 DWConvMicrokernelTester()
5872 .cr(8)
5873 .kr(4)
5874 .channels(channels)
5875 .qmax(128)
5876 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5877 }
5878 }
5879
5880 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel) {
5881 TEST_REQUIRES_X86_FMA3;
5882 for (size_t channels = 1; channels <= 40; channels += 7) {
5883 DWConvMicrokernelTester()
5884 .cr(8)
5885 .kr(4)
5886 .channels(channels)
5887 .width(3)
5888 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5889 }
5890 }
5891
5892 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_step) {
5893 TEST_REQUIRES_X86_FMA3;
5894 for (size_t channels = 1; channels <= 40; channels += 7) {
5895 for (size_t step = 2; step <= 4; step++) {
5896 DWConvMicrokernelTester()
5897 .cr(8)
5898 .kr(4)
5899 .channels(channels)
5900 .width(3)
5901 .step(step)
5902 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5903 }
5904 }
5905 }
5906
5907 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_output_stride) {
5908 TEST_REQUIRES_X86_FMA3;
5909 for (size_t channels = 1; channels <= 40; channels += 7) {
5910 DWConvMicrokernelTester()
5911 .cr(8)
5912 .kr(4)
5913 .channels(8)
5914 .width(5)
5915 .output_stride(43)
5916 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5917 }
5918 }
5919
5920 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_qmin) {
5921 TEST_REQUIRES_X86_FMA3;
5922 for (size_t channels = 1; channels <= 40; channels += 7) {
5923 DWConvMicrokernelTester()
5924 .cr(8)
5925 .kr(4)
5926 .channels(channels)
5927 .width(3)
5928 .qmin(128)
5929 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5930 }
5931 }
5932
5933 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_qmax) {
5934 TEST_REQUIRES_X86_FMA3;
5935 for (size_t channels = 1; channels <= 40; channels += 7) {
5936 DWConvMicrokernelTester()
5937 .cr(8)
5938 .kr(4)
5939 .channels(channels)
5940 .width(3)
5941 .qmax(128)
5942 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5943 }
5944 }
5945
5946 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, input_offset) {
5947 TEST_REQUIRES_X86_FMA3;
5948 for (uint32_t channels = 16; channels < 128; channels += 24) {
5949 DWConvMicrokernelTester()
5950 .cr(8)
5951 .kr(4)
5952 .channels(channels)
5953 .input_offset(176)
5954 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5955 }
5956 }
5957
5958 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, zero) {
5959 TEST_REQUIRES_X86_FMA3;
5960 for (uint32_t mz = 0; mz < 4; mz++) {
5961 for (uint32_t channels = 16; channels < 128; channels += 24) {
5962 DWConvMicrokernelTester()
5963 .cr(8)
5964 .kr(4)
5965 .channels(channels)
5966 .input_offset(176)
5967 .zero_index(mz)
5968 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5969 }
5970 }
5971 }
5972#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5973
5974
5975#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5976 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_eq_16) {
5977 TEST_REQUIRES_X86_FMA3;
5978 DWConvMicrokernelTester()
5979 .cr(16)
5980 .kr(4)
5981 .channels(16)
5982 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
5983 }
5984
5985 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_div_16) {
5986 TEST_REQUIRES_X86_FMA3;
5987 for (uint32_t channels = 32; channels < 256; channels += 48) {
5988 DWConvMicrokernelTester()
5989 .cr(16)
5990 .kr(4)
5991 .channels(channels)
5992 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
5993 }
5994 }
5995
5996 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_div_16_with_qmin) {
5997 TEST_REQUIRES_X86_FMA3;
5998 for (uint32_t channels = 32; channels < 256; channels += 48) {
5999 DWConvMicrokernelTester()
6000 .cr(16)
6001 .kr(4)
6002 .channels(channels)
6003 .qmin(128)
6004 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6005 }
6006 }
6007
6008 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_div_16_with_qmax) {
6009 TEST_REQUIRES_X86_FMA3;
6010 for (uint32_t channels = 32; channels < 256; channels += 48) {
6011 DWConvMicrokernelTester()
6012 .cr(16)
6013 .kr(4)
6014 .channels(channels)
6015 .qmax(128)
6016 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6017 }
6018 }
6019
6020 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_lt_16) {
6021 TEST_REQUIRES_X86_FMA3;
6022 for (uint32_t channels = 1; channels < 16; channels++) {
6023 DWConvMicrokernelTester()
6024 .cr(16)
6025 .kr(4)
6026 .channels(channels)
6027 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6028 }
6029 }
6030
6031 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_gt_16) {
6032 TEST_REQUIRES_X86_FMA3;
6033 for (uint32_t channels = 17; channels < 32; channels++) {
6034 DWConvMicrokernelTester()
6035 .cr(16)
6036 .kr(4)
6037 .channels(channels)
6038 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6039 }
6040 }
6041
6042 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_gt_16_with_qmin) {
6043 TEST_REQUIRES_X86_FMA3;
6044 for (uint32_t channels = 17; channels < 32; channels++) {
6045 DWConvMicrokernelTester()
6046 .cr(16)
6047 .kr(4)
6048 .channels(channels)
6049 .qmin(128)
6050 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6051 }
6052 }
6053
6054 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_gt_16_with_qmax) {
6055 TEST_REQUIRES_X86_FMA3;
6056 for (uint32_t channels = 17; channels < 32; channels++) {
6057 DWConvMicrokernelTester()
6058 .cr(16)
6059 .kr(4)
6060 .channels(channels)
6061 .qmax(128)
6062 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6063 }
6064 }
6065
6066 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel) {
6067 TEST_REQUIRES_X86_FMA3;
6068 for (size_t channels = 1; channels <= 80; channels += 15) {
6069 DWConvMicrokernelTester()
6070 .cr(16)
6071 .kr(4)
6072 .channels(channels)
6073 .width(3)
6074 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6075 }
6076 }
6077
6078 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_step) {
6079 TEST_REQUIRES_X86_FMA3;
6080 for (size_t channels = 1; channels <= 80; channels += 15) {
6081 for (size_t step = 2; step <= 4; step++) {
6082 DWConvMicrokernelTester()
6083 .cr(16)
6084 .kr(4)
6085 .channels(channels)
6086 .width(3)
6087 .step(step)
6088 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6089 }
6090 }
6091 }
6092
6093 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_output_stride) {
6094 TEST_REQUIRES_X86_FMA3;
6095 for (size_t channels = 1; channels <= 80; channels += 15) {
6096 DWConvMicrokernelTester()
6097 .cr(16)
6098 .kr(4)
6099 .channels(16)
6100 .width(5)
6101 .output_stride(83)
6102 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6103 }
6104 }
6105
6106 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_qmin) {
6107 TEST_REQUIRES_X86_FMA3;
6108 for (size_t channels = 1; channels <= 80; channels += 15) {
6109 DWConvMicrokernelTester()
6110 .cr(16)
6111 .kr(4)
6112 .channels(channels)
6113 .width(3)
6114 .qmin(128)
6115 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6116 }
6117 }
6118
6119 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_qmax) {
6120 TEST_REQUIRES_X86_FMA3;
6121 for (size_t channels = 1; channels <= 80; channels += 15) {
6122 DWConvMicrokernelTester()
6123 .cr(16)
6124 .kr(4)
6125 .channels(channels)
6126 .width(3)
6127 .qmax(128)
6128 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6129 }
6130 }
6131
6132 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, input_offset) {
6133 TEST_REQUIRES_X86_FMA3;
6134 for (uint32_t channels = 32; channels < 256; channels += 48) {
6135 DWConvMicrokernelTester()
6136 .cr(16)
6137 .kr(4)
6138 .channels(channels)
6139 .input_offset(304)
6140 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6141 }
6142 }
6143
6144 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, zero) {
6145 TEST_REQUIRES_X86_FMA3;
6146 for (uint32_t mz = 0; mz < 4; mz++) {
6147 for (uint32_t channels = 32; channels < 256; channels += 48) {
6148 DWConvMicrokernelTester()
6149 .cr(16)
6150 .kr(4)
6151 .channels(channels)
6152 .input_offset(304)
6153 .zero_index(mz)
6154 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6155 }
6156 }
6157 }
6158#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6159
6160
6161#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6162 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_eq_16) {
6163 TEST_REQUIRES_X86_FMA3;
6164 DWConvMicrokernelTester()
6165 .cr(16)
6166 .kr(4)
6167 .channels(16)
6168 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6169 }
6170
6171 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_div_16) {
6172 TEST_REQUIRES_X86_FMA3;
6173 for (uint32_t channels = 32; channels < 256; channels += 48) {
6174 DWConvMicrokernelTester()
6175 .cr(16)
6176 .kr(4)
6177 .channels(channels)
6178 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6179 }
6180 }
6181
6182 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_div_16_with_qmin) {
6183 TEST_REQUIRES_X86_FMA3;
6184 for (uint32_t channels = 32; channels < 256; channels += 48) {
6185 DWConvMicrokernelTester()
6186 .cr(16)
6187 .kr(4)
6188 .channels(channels)
6189 .qmin(128)
6190 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6191 }
6192 }
6193
6194 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_div_16_with_qmax) {
6195 TEST_REQUIRES_X86_FMA3;
6196 for (uint32_t channels = 32; channels < 256; channels += 48) {
6197 DWConvMicrokernelTester()
6198 .cr(16)
6199 .kr(4)
6200 .channels(channels)
6201 .qmax(128)
6202 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6203 }
6204 }
6205
6206 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_lt_16) {
6207 TEST_REQUIRES_X86_FMA3;
6208 for (uint32_t channels = 1; channels < 16; channels++) {
6209 DWConvMicrokernelTester()
6210 .cr(16)
6211 .kr(4)
6212 .channels(channels)
6213 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6214 }
6215 }
6216
6217 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_gt_16) {
6218 TEST_REQUIRES_X86_FMA3;
6219 for (uint32_t channels = 17; channels < 32; channels++) {
6220 DWConvMicrokernelTester()
6221 .cr(16)
6222 .kr(4)
6223 .channels(channels)
6224 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6225 }
6226 }
6227
6228 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_gt_16_with_qmin) {
6229 TEST_REQUIRES_X86_FMA3;
6230 for (uint32_t channels = 17; channels < 32; channels++) {
6231 DWConvMicrokernelTester()
6232 .cr(16)
6233 .kr(4)
6234 .channels(channels)
6235 .qmin(128)
6236 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6237 }
6238 }
6239
6240 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_gt_16_with_qmax) {
6241 TEST_REQUIRES_X86_FMA3;
6242 for (uint32_t channels = 17; channels < 32; channels++) {
6243 DWConvMicrokernelTester()
6244 .cr(16)
6245 .kr(4)
6246 .channels(channels)
6247 .qmax(128)
6248 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6249 }
6250 }
6251
6252 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel) {
6253 TEST_REQUIRES_X86_FMA3;
6254 for (size_t channels = 1; channels <= 80; channels += 15) {
6255 DWConvMicrokernelTester()
6256 .cr(16)
6257 .kr(4)
6258 .channels(channels)
6259 .width(3)
6260 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6261 }
6262 }
6263
6264 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_step) {
6265 TEST_REQUIRES_X86_FMA3;
6266 for (size_t channels = 1; channels <= 80; channels += 15) {
6267 for (size_t step = 2; step <= 4; step++) {
6268 DWConvMicrokernelTester()
6269 .cr(16)
6270 .kr(4)
6271 .channels(channels)
6272 .width(3)
6273 .step(step)
6274 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6275 }
6276 }
6277 }
6278
6279 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_output_stride) {
6280 TEST_REQUIRES_X86_FMA3;
6281 for (size_t channels = 1; channels <= 80; channels += 15) {
6282 DWConvMicrokernelTester()
6283 .cr(16)
6284 .kr(4)
6285 .channels(16)
6286 .width(5)
6287 .output_stride(83)
6288 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6289 }
6290 }
6291
6292 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_qmin) {
6293 TEST_REQUIRES_X86_FMA3;
6294 for (size_t channels = 1; channels <= 80; channels += 15) {
6295 DWConvMicrokernelTester()
6296 .cr(16)
6297 .kr(4)
6298 .channels(channels)
6299 .width(3)
6300 .qmin(128)
6301 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6302 }
6303 }
6304
6305 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_qmax) {
6306 TEST_REQUIRES_X86_FMA3;
6307 for (size_t channels = 1; channels <= 80; channels += 15) {
6308 DWConvMicrokernelTester()
6309 .cr(16)
6310 .kr(4)
6311 .channels(channels)
6312 .width(3)
6313 .qmax(128)
6314 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6315 }
6316 }
6317
6318 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, input_offset) {
6319 TEST_REQUIRES_X86_FMA3;
6320 for (uint32_t channels = 32; channels < 256; channels += 48) {
6321 DWConvMicrokernelTester()
6322 .cr(16)
6323 .kr(4)
6324 .channels(channels)
6325 .input_offset(304)
6326 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6327 }
6328 }
6329
6330 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, zero) {
6331 TEST_REQUIRES_X86_FMA3;
6332 for (uint32_t mz = 0; mz < 4; mz++) {
6333 for (uint32_t channels = 32; channels < 256; channels += 48) {
6334 DWConvMicrokernelTester()
6335 .cr(16)
6336 .kr(4)
6337 .channels(channels)
6338 .input_offset(304)
6339 .zero_index(mz)
6340 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6341 }
6342 }
6343 }
6344#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6345
6346
6347#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6348 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_eq_32) {
6349 TEST_REQUIRES_X86_FMA3;
6350 DWConvMicrokernelTester()
6351 .cr(32)
6352 .kr(4)
6353 .channels(32)
6354 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6355 }
6356
6357 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_div_32) {
6358 TEST_REQUIRES_X86_FMA3;
6359 for (uint32_t channels = 64; channels < 512; channels += 96) {
6360 DWConvMicrokernelTester()
6361 .cr(32)
6362 .kr(4)
6363 .channels(channels)
6364 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6365 }
6366 }
6367
6368 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_div_32_with_qmin) {
6369 TEST_REQUIRES_X86_FMA3;
6370 for (uint32_t channels = 64; channels < 512; channels += 96) {
6371 DWConvMicrokernelTester()
6372 .cr(32)
6373 .kr(4)
6374 .channels(channels)
6375 .qmin(128)
6376 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6377 }
6378 }
6379
6380 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_div_32_with_qmax) {
6381 TEST_REQUIRES_X86_FMA3;
6382 for (uint32_t channels = 64; channels < 512; channels += 96) {
6383 DWConvMicrokernelTester()
6384 .cr(32)
6385 .kr(4)
6386 .channels(channels)
6387 .qmax(128)
6388 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6389 }
6390 }
6391
6392 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_lt_32) {
6393 TEST_REQUIRES_X86_FMA3;
6394 for (uint32_t channels = 1; channels < 32; channels++) {
6395 DWConvMicrokernelTester()
6396 .cr(32)
6397 .kr(4)
6398 .channels(channels)
6399 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6400 }
6401 }
6402
6403 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_gt_32) {
6404 TEST_REQUIRES_X86_FMA3;
6405 for (uint32_t channels = 33; channels < 64; channels++) {
6406 DWConvMicrokernelTester()
6407 .cr(32)
6408 .kr(4)
6409 .channels(channels)
6410 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6411 }
6412 }
6413
6414 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_gt_32_with_qmin) {
6415 TEST_REQUIRES_X86_FMA3;
6416 for (uint32_t channels = 33; channels < 64; channels++) {
6417 DWConvMicrokernelTester()
6418 .cr(32)
6419 .kr(4)
6420 .channels(channels)
6421 .qmin(128)
6422 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6423 }
6424 }
6425
6426 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_gt_32_with_qmax) {
6427 TEST_REQUIRES_X86_FMA3;
6428 for (uint32_t channels = 33; channels < 64; channels++) {
6429 DWConvMicrokernelTester()
6430 .cr(32)
6431 .kr(4)
6432 .channels(channels)
6433 .qmax(128)
6434 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6435 }
6436 }
6437
6438 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel) {
6439 TEST_REQUIRES_X86_FMA3;
6440 for (size_t channels = 1; channels <= 160; channels += 31) {
6441 DWConvMicrokernelTester()
6442 .cr(32)
6443 .kr(4)
6444 .channels(channels)
6445 .width(3)
6446 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6447 }
6448 }
6449
6450 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_step) {
6451 TEST_REQUIRES_X86_FMA3;
6452 for (size_t channels = 1; channels <= 160; channels += 31) {
6453 for (size_t step = 2; step <= 4; step++) {
6454 DWConvMicrokernelTester()
6455 .cr(32)
6456 .kr(4)
6457 .channels(channels)
6458 .width(3)
6459 .step(step)
6460 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6461 }
6462 }
6463 }
6464
6465 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_output_stride) {
6466 TEST_REQUIRES_X86_FMA3;
6467 for (size_t channels = 1; channels <= 160; channels += 31) {
6468 DWConvMicrokernelTester()
6469 .cr(32)
6470 .kr(4)
6471 .channels(32)
6472 .width(5)
6473 .output_stride(163)
6474 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6475 }
6476 }
6477
6478 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_qmin) {
6479 TEST_REQUIRES_X86_FMA3;
6480 for (size_t channels = 1; channels <= 160; channels += 31) {
6481 DWConvMicrokernelTester()
6482 .cr(32)
6483 .kr(4)
6484 .channels(channels)
6485 .width(3)
6486 .qmin(128)
6487 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6488 }
6489 }
6490
6491 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_qmax) {
6492 TEST_REQUIRES_X86_FMA3;
6493 for (size_t channels = 1; channels <= 160; channels += 31) {
6494 DWConvMicrokernelTester()
6495 .cr(32)
6496 .kr(4)
6497 .channels(channels)
6498 .width(3)
6499 .qmax(128)
6500 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6501 }
6502 }
6503
6504 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, input_offset) {
6505 TEST_REQUIRES_X86_FMA3;
6506 for (uint32_t channels = 64; channels < 512; channels += 96) {
6507 DWConvMicrokernelTester()
6508 .cr(32)
6509 .kr(4)
6510 .channels(channels)
6511 .input_offset(592)
6512 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6513 }
6514 }
6515
6516 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, zero) {
6517 TEST_REQUIRES_X86_FMA3;
6518 for (uint32_t mz = 0; mz < 4; mz++) {
6519 for (uint32_t channels = 64; channels < 512; channels += 96) {
6520 DWConvMicrokernelTester()
6521 .cr(32)
6522 .kr(4)
6523 .channels(channels)
6524 .input_offset(592)
6525 .zero_index(mz)
6526 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
6527 }
6528 }
6529 }
6530#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6531
6532
6533#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6534 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_eq_32) {
6535 TEST_REQUIRES_X86_FMA3;
6536 DWConvMicrokernelTester()
6537 .cr(32)
6538 .kr(4)
6539 .channels(32)
6540 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6541 }
6542
6543 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_div_32) {
6544 TEST_REQUIRES_X86_FMA3;
6545 for (uint32_t channels = 64; channels < 512; channels += 96) {
6546 DWConvMicrokernelTester()
6547 .cr(32)
6548 .kr(4)
6549 .channels(channels)
6550 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6551 }
6552 }
6553
6554 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_div_32_with_qmin) {
6555 TEST_REQUIRES_X86_FMA3;
6556 for (uint32_t channels = 64; channels < 512; channels += 96) {
6557 DWConvMicrokernelTester()
6558 .cr(32)
6559 .kr(4)
6560 .channels(channels)
6561 .qmin(128)
6562 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6563 }
6564 }
6565
6566 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_div_32_with_qmax) {
6567 TEST_REQUIRES_X86_FMA3;
6568 for (uint32_t channels = 64; channels < 512; channels += 96) {
6569 DWConvMicrokernelTester()
6570 .cr(32)
6571 .kr(4)
6572 .channels(channels)
6573 .qmax(128)
6574 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6575 }
6576 }
6577
6578 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_lt_32) {
6579 TEST_REQUIRES_X86_FMA3;
6580 for (uint32_t channels = 1; channels < 32; channels++) {
6581 DWConvMicrokernelTester()
6582 .cr(32)
6583 .kr(4)
6584 .channels(channels)
6585 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6586 }
6587 }
6588
6589 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_gt_32) {
6590 TEST_REQUIRES_X86_FMA3;
6591 for (uint32_t channels = 33; channels < 64; channels++) {
6592 DWConvMicrokernelTester()
6593 .cr(32)
6594 .kr(4)
6595 .channels(channels)
6596 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6597 }
6598 }
6599
6600 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_gt_32_with_qmin) {
6601 TEST_REQUIRES_X86_FMA3;
6602 for (uint32_t channels = 33; channels < 64; channels++) {
6603 DWConvMicrokernelTester()
6604 .cr(32)
6605 .kr(4)
6606 .channels(channels)
6607 .qmin(128)
6608 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6609 }
6610 }
6611
6612 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_gt_32_with_qmax) {
6613 TEST_REQUIRES_X86_FMA3;
6614 for (uint32_t channels = 33; channels < 64; channels++) {
6615 DWConvMicrokernelTester()
6616 .cr(32)
6617 .kr(4)
6618 .channels(channels)
6619 .qmax(128)
6620 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6621 }
6622 }
6623
6624 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel) {
6625 TEST_REQUIRES_X86_FMA3;
6626 for (size_t channels = 1; channels <= 160; channels += 31) {
6627 DWConvMicrokernelTester()
6628 .cr(32)
6629 .kr(4)
6630 .channels(channels)
6631 .width(3)
6632 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6633 }
6634 }
6635
6636 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_step) {
6637 TEST_REQUIRES_X86_FMA3;
6638 for (size_t channels = 1; channels <= 160; channels += 31) {
6639 for (size_t step = 2; step <= 4; step++) {
6640 DWConvMicrokernelTester()
6641 .cr(32)
6642 .kr(4)
6643 .channels(channels)
6644 .width(3)
6645 .step(step)
6646 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6647 }
6648 }
6649 }
6650
6651 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_output_stride) {
6652 TEST_REQUIRES_X86_FMA3;
6653 for (size_t channels = 1; channels <= 160; channels += 31) {
6654 DWConvMicrokernelTester()
6655 .cr(32)
6656 .kr(4)
6657 .channels(32)
6658 .width(5)
6659 .output_stride(163)
6660 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6661 }
6662 }
6663
6664 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_qmin) {
6665 TEST_REQUIRES_X86_FMA3;
6666 for (size_t channels = 1; channels <= 160; channels += 31) {
6667 DWConvMicrokernelTester()
6668 .cr(32)
6669 .kr(4)
6670 .channels(channels)
6671 .width(3)
6672 .qmin(128)
6673 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6674 }
6675 }
6676
6677 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_qmax) {
6678 TEST_REQUIRES_X86_FMA3;
6679 for (size_t channels = 1; channels <= 160; channels += 31) {
6680 DWConvMicrokernelTester()
6681 .cr(32)
6682 .kr(4)
6683 .channels(channels)
6684 .width(3)
6685 .qmax(128)
6686 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6687 }
6688 }
6689
6690 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, input_offset) {
6691 TEST_REQUIRES_X86_FMA3;
6692 for (uint32_t channels = 64; channels < 512; channels += 96) {
6693 DWConvMicrokernelTester()
6694 .cr(32)
6695 .kr(4)
6696 .channels(channels)
6697 .input_offset(592)
6698 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6699 }
6700 }
6701
6702 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, zero) {
6703 TEST_REQUIRES_X86_FMA3;
6704 for (uint32_t mz = 0; mz < 4; mz++) {
6705 for (uint32_t channels = 64; channels < 512; channels += 96) {
6706 DWConvMicrokernelTester()
6707 .cr(32)
6708 .kr(4)
6709 .channels(channels)
6710 .input_offset(592)
6711 .zero_index(mz)
6712 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6713 }
6714 }
6715 }
6716#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64