blob: ccb2fd900a4f80d92ea871226eb095399230fac4 [file] [log] [blame]
Marat Dukhan1c587112020-04-08 20:04:28 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-dwconv-minmax.yaml
11// Generator: tools/generate-dwconv-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/dwconv.h>
20#include "dwconv-microkernel-tester.h"
21
22
23#if XNN_ARCH_ARM64
24 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, c_eq_4) {
25 TEST_REQUIRES_ARM_NEON_FMA;
26 DWConvMicrokernelTester()
27 .cr(4)
28 .kr(9)
29 .channels(4)
30 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
31 }
32
33 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, c_div_4) {
34 TEST_REQUIRES_ARM_NEON_FMA;
35 for (uint32_t channels = 8; channels < 64; channels += 12) {
36 DWConvMicrokernelTester()
37 .cr(4)
38 .kr(9)
39 .channels(channels)
40 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
41 }
42 }
43
44 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, c_div_4_with_qmin) {
45 TEST_REQUIRES_ARM_NEON_FMA;
46 for (uint32_t channels = 8; channels < 64; channels += 12) {
47 DWConvMicrokernelTester()
48 .cr(4)
49 .kr(9)
50 .channels(channels)
51 .qmin(128)
52 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
53 }
54 }
55
56 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, c_div_4_with_qmax) {
57 TEST_REQUIRES_ARM_NEON_FMA;
58 for (uint32_t channels = 8; channels < 64; channels += 12) {
59 DWConvMicrokernelTester()
60 .cr(4)
61 .kr(9)
62 .channels(channels)
63 .qmax(128)
64 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
65 }
66 }
67
68 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, c_lt_4) {
69 TEST_REQUIRES_ARM_NEON_FMA;
70 for (uint32_t channels = 1; channels < 4; channels++) {
71 DWConvMicrokernelTester()
72 .cr(4)
73 .kr(9)
74 .channels(channels)
75 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
76 }
77 }
78
79 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, c_gt_4) {
80 TEST_REQUIRES_ARM_NEON_FMA;
81 for (uint32_t channels = 5; channels < 8; channels++) {
82 DWConvMicrokernelTester()
83 .cr(4)
84 .kr(9)
85 .channels(channels)
86 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
87 }
88 }
89
90 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, c_gt_4_with_qmin) {
91 TEST_REQUIRES_ARM_NEON_FMA;
92 for (uint32_t channels = 5; channels < 8; channels++) {
93 DWConvMicrokernelTester()
94 .cr(4)
95 .kr(9)
96 .channels(channels)
97 .qmin(128)
98 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
99 }
100 }
101
102 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, c_gt_4_with_qmax) {
103 TEST_REQUIRES_ARM_NEON_FMA;
104 for (uint32_t channels = 5; channels < 8; channels++) {
105 DWConvMicrokernelTester()
106 .cr(4)
107 .kr(9)
108 .channels(channels)
109 .qmax(128)
110 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
111 }
112 }
113
114 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, multipixel) {
115 TEST_REQUIRES_ARM_NEON_FMA;
116 for (size_t channels = 1; channels <= 20; channels += 3) {
117 DWConvMicrokernelTester()
118 .cr(4)
119 .kr(9)
120 .channels(channels)
121 .width(3)
122 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
123 }
124 }
125
126 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, multipixel_with_step) {
127 TEST_REQUIRES_ARM_NEON_FMA;
128 for (size_t channels = 1; channels <= 20; channels += 3) {
129 for (size_t step = 2; step <= 9; step++) {
130 DWConvMicrokernelTester()
131 .cr(4)
132 .kr(9)
133 .channels(channels)
134 .width(3)
135 .step(step)
136 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
137 }
138 }
139 }
140
141 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, multipixel_with_output_stride) {
142 TEST_REQUIRES_ARM_NEON_FMA;
143 for (size_t channels = 1; channels <= 20; channels += 3) {
144 DWConvMicrokernelTester()
145 .cr(4)
146 .kr(9)
147 .channels(4)
148 .width(5)
149 .output_stride(23)
150 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
151 }
152 }
153
154 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, multipixel_with_qmin) {
155 TEST_REQUIRES_ARM_NEON_FMA;
156 for (size_t channels = 1; channels <= 20; channels += 3) {
157 DWConvMicrokernelTester()
158 .cr(4)
159 .kr(9)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
163 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
164 }
165 }
166
167 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, multipixel_with_qmax) {
168 TEST_REQUIRES_ARM_NEON_FMA;
169 for (size_t channels = 1; channels <= 20; channels += 3) {
170 DWConvMicrokernelTester()
171 .cr(4)
172 .kr(9)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
176 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
177 }
178 }
179#endif // XNN_ARCH_ARM64
180
181
182#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
183 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_eq_4) {
184 TEST_REQUIRES_ARM_NEON_FMA;
185 DWConvMicrokernelTester()
186 .cr(4)
187 .kr(9)
188 .channels(4)
189 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
190 }
191
192 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_eq_8) {
193 TEST_REQUIRES_ARM_NEON_FMA;
194 DWConvMicrokernelTester()
195 .cr(4)
196 .kr(9)
197 .channels(8)
198 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
199 }
200
201 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_div_4) {
202 TEST_REQUIRES_ARM_NEON_FMA;
203 for (uint32_t channels = 12; channels < 64; channels += 12) {
204 DWConvMicrokernelTester()
205 .cr(4)
206 .kr(9)
207 .channels(channels)
208 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
209 }
210 }
211
212 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_div_4_with_qmin) {
213 TEST_REQUIRES_ARM_NEON_FMA;
214 for (uint32_t channels = 12; channels < 64; channels += 12) {
215 DWConvMicrokernelTester()
216 .cr(4)
217 .kr(9)
218 .channels(channels)
219 .qmin(128)
220 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
221 }
222 }
223
224 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_div_4_with_qmax) {
225 TEST_REQUIRES_ARM_NEON_FMA;
226 for (uint32_t channels = 12; channels < 64; channels += 12) {
227 DWConvMicrokernelTester()
228 .cr(4)
229 .kr(9)
230 .channels(channels)
231 .qmax(128)
232 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
233 }
234 }
235
236 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_lt_8) {
237 TEST_REQUIRES_ARM_NEON_FMA;
238 for (uint32_t channels = 1; channels < 8; channels++) {
239 DWConvMicrokernelTester()
240 .cr(4)
241 .kr(9)
242 .channels(channels)
243 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
244 }
245 }
246
247 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_gt_8) {
248 TEST_REQUIRES_ARM_NEON_FMA;
249 for (uint32_t channels = 9; channels < 12; channels++) {
250 DWConvMicrokernelTester()
251 .cr(4)
252 .kr(9)
253 .channels(channels)
254 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
255 }
256 }
257
258 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_gt_8_with_qmin) {
259 TEST_REQUIRES_ARM_NEON_FMA;
260 for (uint32_t channels = 9; channels < 12; channels++) {
261 DWConvMicrokernelTester()
262 .cr(4)
263 .kr(9)
264 .channels(channels)
265 .qmin(128)
266 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
267 }
268 }
269
270 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_gt_8_with_qmax) {
271 TEST_REQUIRES_ARM_NEON_FMA;
272 for (uint32_t channels = 9; channels < 12; channels++) {
273 DWConvMicrokernelTester()
274 .cr(4)
275 .kr(9)
276 .channels(channels)
277 .qmax(128)
278 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
279 }
280 }
281
282 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, multipixel) {
283 TEST_REQUIRES_ARM_NEON_FMA;
284 for (size_t channels = 1; channels <= 20; channels += 3) {
285 DWConvMicrokernelTester()
286 .cr(4)
287 .kr(9)
288 .channels(channels)
289 .width(3)
290 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
291 }
292 }
293
294 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, multipixel_with_step) {
295 TEST_REQUIRES_ARM_NEON_FMA;
296 for (size_t channels = 1; channels <= 20; channels += 3) {
297 for (size_t step = 2; step <= 9; step++) {
298 DWConvMicrokernelTester()
299 .cr(4)
300 .kr(9)
301 .channels(channels)
302 .width(3)
303 .step(step)
304 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
305 }
306 }
307 }
308
309 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, multipixel_with_output_stride) {
310 TEST_REQUIRES_ARM_NEON_FMA;
311 for (size_t channels = 1; channels <= 20; channels += 3) {
312 DWConvMicrokernelTester()
313 .cr(4)
314 .kr(9)
315 .channels(4)
316 .width(5)
317 .output_stride(23)
318 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
319 }
320 }
321
322 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, multipixel_with_qmin) {
323 TEST_REQUIRES_ARM_NEON_FMA;
324 for (size_t channels = 1; channels <= 20; channels += 3) {
325 DWConvMicrokernelTester()
326 .cr(4)
327 .kr(9)
328 .channels(channels)
329 .width(3)
330 .qmin(128)
331 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
332 }
333 }
334
335 TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, multipixel_with_qmax) {
336 TEST_REQUIRES_ARM_NEON_FMA;
337 for (size_t channels = 1; channels <= 20; channels += 3) {
338 DWConvMicrokernelTester()
339 .cr(4)
340 .kr(9)
341 .channels(channels)
342 .width(3)
343 .qmax(128)
344 .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
345 }
346 }
347#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
348
349
350#if XNN_ARCH_ARM || XNN_ARCH_ARM64
351 TEST(F32_DWCONV_UP4X9__NEONFMA, c_eq_4) {
352 TEST_REQUIRES_ARM_NEON_FMA;
353 DWConvMicrokernelTester()
354 .cr(4)
355 .kr(9)
356 .channels(4)
357 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
358 }
359
360 TEST(F32_DWCONV_UP4X9__NEONFMA, c_div_4) {
361 TEST_REQUIRES_ARM_NEON_FMA;
362 for (uint32_t channels = 8; channels < 64; channels += 12) {
363 DWConvMicrokernelTester()
364 .cr(4)
365 .kr(9)
366 .channels(channels)
367 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
368 }
369 }
370
371 TEST(F32_DWCONV_UP4X9__NEONFMA, c_div_4_with_qmin) {
372 TEST_REQUIRES_ARM_NEON_FMA;
373 for (uint32_t channels = 8; channels < 64; channels += 12) {
374 DWConvMicrokernelTester()
375 .cr(4)
376 .kr(9)
377 .channels(channels)
378 .qmin(128)
379 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
380 }
381 }
382
383 TEST(F32_DWCONV_UP4X9__NEONFMA, c_div_4_with_qmax) {
384 TEST_REQUIRES_ARM_NEON_FMA;
385 for (uint32_t channels = 8; channels < 64; channels += 12) {
386 DWConvMicrokernelTester()
387 .cr(4)
388 .kr(9)
389 .channels(channels)
390 .qmax(128)
391 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
392 }
393 }
394
395 TEST(F32_DWCONV_UP4X9__NEONFMA, c_lt_4) {
396 TEST_REQUIRES_ARM_NEON_FMA;
397 for (uint32_t channels = 1; channels < 4; channels++) {
398 DWConvMicrokernelTester()
399 .cr(4)
400 .kr(9)
401 .channels(channels)
402 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
403 }
404 }
405
406 TEST(F32_DWCONV_UP4X9__NEONFMA, c_gt_4) {
407 TEST_REQUIRES_ARM_NEON_FMA;
408 for (uint32_t channels = 5; channels < 8; channels++) {
409 DWConvMicrokernelTester()
410 .cr(4)
411 .kr(9)
412 .channels(channels)
413 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
414 }
415 }
416
417 TEST(F32_DWCONV_UP4X9__NEONFMA, c_gt_4_with_qmin) {
418 TEST_REQUIRES_ARM_NEON_FMA;
419 for (uint32_t channels = 5; channels < 8; channels++) {
420 DWConvMicrokernelTester()
421 .cr(4)
422 .kr(9)
423 .channels(channels)
424 .qmin(128)
425 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
426 }
427 }
428
429 TEST(F32_DWCONV_UP4X9__NEONFMA, c_gt_4_with_qmax) {
430 TEST_REQUIRES_ARM_NEON_FMA;
431 for (uint32_t channels = 5; channels < 8; channels++) {
432 DWConvMicrokernelTester()
433 .cr(4)
434 .kr(9)
435 .channels(channels)
436 .qmax(128)
437 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
438 }
439 }
440
441 TEST(F32_DWCONV_UP4X9__NEONFMA, multipixel) {
442 TEST_REQUIRES_ARM_NEON_FMA;
443 for (size_t channels = 1; channels <= 20; channels += 3) {
444 DWConvMicrokernelTester()
445 .cr(4)
446 .kr(9)
447 .channels(channels)
448 .width(3)
449 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
450 }
451 }
452
453 TEST(F32_DWCONV_UP4X9__NEONFMA, multipixel_with_step) {
454 TEST_REQUIRES_ARM_NEON_FMA;
455 for (size_t channels = 1; channels <= 20; channels += 3) {
456 for (size_t step = 2; step <= 9; step++) {
457 DWConvMicrokernelTester()
458 .cr(4)
459 .kr(9)
460 .channels(channels)
461 .width(3)
462 .step(step)
463 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
464 }
465 }
466 }
467
468 TEST(F32_DWCONV_UP4X9__NEONFMA, multipixel_with_output_stride) {
469 TEST_REQUIRES_ARM_NEON_FMA;
470 for (size_t channels = 1; channels <= 20; channels += 3) {
471 DWConvMicrokernelTester()
472 .cr(4)
473 .kr(9)
474 .channels(4)
475 .width(5)
476 .output_stride(23)
477 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
478 }
479 }
480
481 TEST(F32_DWCONV_UP4X9__NEONFMA, multipixel_with_qmin) {
482 TEST_REQUIRES_ARM_NEON_FMA;
483 for (size_t channels = 1; channels <= 20; channels += 3) {
484 DWConvMicrokernelTester()
485 .cr(4)
486 .kr(9)
487 .channels(channels)
488 .width(3)
489 .qmin(128)
490 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
491 }
492 }
493
494 TEST(F32_DWCONV_UP4X9__NEONFMA, multipixel_with_qmax) {
495 TEST_REQUIRES_ARM_NEON_FMA;
496 for (size_t channels = 1; channels <= 20; channels += 3) {
497 DWConvMicrokernelTester()
498 .cr(4)
499 .kr(9)
500 .channels(channels)
501 .width(3)
502 .qmax(128)
503 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
504 }
505 }
506#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
507
508
509#if XNN_ARCH_ARM || XNN_ARCH_ARM64
510 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, c_eq_4) {
511 TEST_REQUIRES_ARM_NEON_FMA;
512 DWConvMicrokernelTester()
513 .cr(4)
514 .kr(9)
515 .channels(4)
516 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
517 }
518
519 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, c_div_4) {
520 TEST_REQUIRES_ARM_NEON_FMA;
521 for (uint32_t channels = 8; channels < 64; channels += 12) {
522 DWConvMicrokernelTester()
523 .cr(4)
524 .kr(9)
525 .channels(channels)
526 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
527 }
528 }
529
530 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, c_div_4_with_qmin) {
531 TEST_REQUIRES_ARM_NEON_FMA;
532 for (uint32_t channels = 8; channels < 64; channels += 12) {
533 DWConvMicrokernelTester()
534 .cr(4)
535 .kr(9)
536 .channels(channels)
537 .qmin(128)
538 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
539 }
540 }
541
542 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, c_div_4_with_qmax) {
543 TEST_REQUIRES_ARM_NEON_FMA;
544 for (uint32_t channels = 8; channels < 64; channels += 12) {
545 DWConvMicrokernelTester()
546 .cr(4)
547 .kr(9)
548 .channels(channels)
549 .qmax(128)
550 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
551 }
552 }
553
554 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, c_lt_4) {
555 TEST_REQUIRES_ARM_NEON_FMA;
556 for (uint32_t channels = 1; channels < 4; channels++) {
557 DWConvMicrokernelTester()
558 .cr(4)
559 .kr(9)
560 .channels(channels)
561 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
562 }
563 }
564
565 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, c_gt_4) {
566 TEST_REQUIRES_ARM_NEON_FMA;
567 for (uint32_t channels = 5; channels < 8; channels++) {
568 DWConvMicrokernelTester()
569 .cr(4)
570 .kr(9)
571 .channels(channels)
572 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
573 }
574 }
575
576 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, c_gt_4_with_qmin) {
577 TEST_REQUIRES_ARM_NEON_FMA;
578 for (uint32_t channels = 5; channels < 8; channels++) {
579 DWConvMicrokernelTester()
580 .cr(4)
581 .kr(9)
582 .channels(channels)
583 .qmin(128)
584 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
585 }
586 }
587
588 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, c_gt_4_with_qmax) {
589 TEST_REQUIRES_ARM_NEON_FMA;
590 for (uint32_t channels = 5; channels < 8; channels++) {
591 DWConvMicrokernelTester()
592 .cr(4)
593 .kr(9)
594 .channels(channels)
595 .qmax(128)
596 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
597 }
598 }
599
600 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, multipixel) {
601 TEST_REQUIRES_ARM_NEON_FMA;
602 for (size_t channels = 1; channels <= 20; channels += 3) {
603 DWConvMicrokernelTester()
604 .cr(4)
605 .kr(9)
606 .channels(channels)
607 .width(3)
608 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
609 }
610 }
611
612 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, multipixel_with_step) {
613 TEST_REQUIRES_ARM_NEON_FMA;
614 for (size_t channels = 1; channels <= 20; channels += 3) {
615 for (size_t step = 2; step <= 9; step++) {
616 DWConvMicrokernelTester()
617 .cr(4)
618 .kr(9)
619 .channels(channels)
620 .width(3)
621 .step(step)
622 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
623 }
624 }
625 }
626
627 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, multipixel_with_output_stride) {
628 TEST_REQUIRES_ARM_NEON_FMA;
629 for (size_t channels = 1; channels <= 20; channels += 3) {
630 DWConvMicrokernelTester()
631 .cr(4)
632 .kr(9)
633 .channels(4)
634 .width(5)
635 .output_stride(23)
636 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
637 }
638 }
639
640 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, multipixel_with_qmin) {
641 TEST_REQUIRES_ARM_NEON_FMA;
642 for (size_t channels = 1; channels <= 20; channels += 3) {
643 DWConvMicrokernelTester()
644 .cr(4)
645 .kr(9)
646 .channels(channels)
647 .width(3)
648 .qmin(128)
649 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
650 }
651 }
652
653 TEST(F32_DWCONV_UP4X9__NEONFMA_ACC2, multipixel_with_qmax) {
654 TEST_REQUIRES_ARM_NEON_FMA;
655 for (size_t channels = 1; channels <= 20; channels += 3) {
656 DWConvMicrokernelTester()
657 .cr(4)
658 .kr(9)
659 .channels(channels)
660 .width(3)
661 .qmax(128)
662 .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2);
663 }
664 }
665#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
666
667
668#if XNN_ARCH_ARM || XNN_ARCH_ARM64
669 TEST(F32_DWCONV_UP8X9__NEONFMA, c_eq_8) {
670 TEST_REQUIRES_ARM_NEON_FMA;
671 DWConvMicrokernelTester()
672 .cr(8)
673 .kr(9)
674 .channels(8)
675 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
676 }
677
678 TEST(F32_DWCONV_UP8X9__NEONFMA, c_div_8) {
679 TEST_REQUIRES_ARM_NEON_FMA;
680 for (uint32_t channels = 16; channels < 128; channels += 24) {
681 DWConvMicrokernelTester()
682 .cr(8)
683 .kr(9)
684 .channels(channels)
685 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
686 }
687 }
688
689 TEST(F32_DWCONV_UP8X9__NEONFMA, c_div_8_with_qmin) {
690 TEST_REQUIRES_ARM_NEON_FMA;
691 for (uint32_t channels = 16; channels < 128; channels += 24) {
692 DWConvMicrokernelTester()
693 .cr(8)
694 .kr(9)
695 .channels(channels)
696 .qmin(128)
697 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
698 }
699 }
700
701 TEST(F32_DWCONV_UP8X9__NEONFMA, c_div_8_with_qmax) {
702 TEST_REQUIRES_ARM_NEON_FMA;
703 for (uint32_t channels = 16; channels < 128; channels += 24) {
704 DWConvMicrokernelTester()
705 .cr(8)
706 .kr(9)
707 .channels(channels)
708 .qmax(128)
709 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
710 }
711 }
712
713 TEST(F32_DWCONV_UP8X9__NEONFMA, c_lt_8) {
714 TEST_REQUIRES_ARM_NEON_FMA;
715 for (uint32_t channels = 1; channels < 8; channels++) {
716 DWConvMicrokernelTester()
717 .cr(8)
718 .kr(9)
719 .channels(channels)
720 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
721 }
722 }
723
724 TEST(F32_DWCONV_UP8X9__NEONFMA, c_gt_8) {
725 TEST_REQUIRES_ARM_NEON_FMA;
726 for (uint32_t channels = 9; channels < 16; channels++) {
727 DWConvMicrokernelTester()
728 .cr(8)
729 .kr(9)
730 .channels(channels)
731 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
732 }
733 }
734
735 TEST(F32_DWCONV_UP8X9__NEONFMA, c_gt_8_with_qmin) {
736 TEST_REQUIRES_ARM_NEON_FMA;
737 for (uint32_t channels = 9; channels < 16; channels++) {
738 DWConvMicrokernelTester()
739 .cr(8)
740 .kr(9)
741 .channels(channels)
742 .qmin(128)
743 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
744 }
745 }
746
747 TEST(F32_DWCONV_UP8X9__NEONFMA, c_gt_8_with_qmax) {
748 TEST_REQUIRES_ARM_NEON_FMA;
749 for (uint32_t channels = 9; channels < 16; channels++) {
750 DWConvMicrokernelTester()
751 .cr(8)
752 .kr(9)
753 .channels(channels)
754 .qmax(128)
755 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
756 }
757 }
758
759 TEST(F32_DWCONV_UP8X9__NEONFMA, multipixel) {
760 TEST_REQUIRES_ARM_NEON_FMA;
761 for (size_t channels = 1; channels <= 40; channels += 7) {
762 DWConvMicrokernelTester()
763 .cr(8)
764 .kr(9)
765 .channels(channels)
766 .width(3)
767 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
768 }
769 }
770
771 TEST(F32_DWCONV_UP8X9__NEONFMA, multipixel_with_step) {
772 TEST_REQUIRES_ARM_NEON_FMA;
773 for (size_t channels = 1; channels <= 40; channels += 7) {
774 for (size_t step = 2; step <= 9; step++) {
775 DWConvMicrokernelTester()
776 .cr(8)
777 .kr(9)
778 .channels(channels)
779 .width(3)
780 .step(step)
781 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
782 }
783 }
784 }
785
786 TEST(F32_DWCONV_UP8X9__NEONFMA, multipixel_with_output_stride) {
787 TEST_REQUIRES_ARM_NEON_FMA;
788 for (size_t channels = 1; channels <= 40; channels += 7) {
789 DWConvMicrokernelTester()
790 .cr(8)
791 .kr(9)
792 .channels(8)
793 .width(5)
794 .output_stride(43)
795 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
796 }
797 }
798
799 TEST(F32_DWCONV_UP8X9__NEONFMA, multipixel_with_qmin) {
800 TEST_REQUIRES_ARM_NEON_FMA;
801 for (size_t channels = 1; channels <= 40; channels += 7) {
802 DWConvMicrokernelTester()
803 .cr(8)
804 .kr(9)
805 .channels(channels)
806 .width(3)
807 .qmin(128)
808 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
809 }
810 }
811
812 TEST(F32_DWCONV_UP8X9__NEONFMA, multipixel_with_qmax) {
813 TEST_REQUIRES_ARM_NEON_FMA;
814 for (size_t channels = 1; channels <= 40; channels += 7) {
815 DWConvMicrokernelTester()
816 .cr(8)
817 .kr(9)
818 .channels(channels)
819 .width(3)
820 .qmax(128)
821 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma);
822 }
823 }
824#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
825
826
827#if XNN_ARCH_ARM || XNN_ARCH_ARM64
828 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, c_eq_8) {
829 TEST_REQUIRES_ARM_NEON_FMA;
830 DWConvMicrokernelTester()
831 .cr(8)
832 .kr(9)
833 .channels(8)
834 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
835 }
836
837 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, c_div_8) {
838 TEST_REQUIRES_ARM_NEON_FMA;
839 for (uint32_t channels = 16; channels < 128; channels += 24) {
840 DWConvMicrokernelTester()
841 .cr(8)
842 .kr(9)
843 .channels(channels)
844 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
845 }
846 }
847
848 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, c_div_8_with_qmin) {
849 TEST_REQUIRES_ARM_NEON_FMA;
850 for (uint32_t channels = 16; channels < 128; channels += 24) {
851 DWConvMicrokernelTester()
852 .cr(8)
853 .kr(9)
854 .channels(channels)
855 .qmin(128)
856 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
857 }
858 }
859
860 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, c_div_8_with_qmax) {
861 TEST_REQUIRES_ARM_NEON_FMA;
862 for (uint32_t channels = 16; channels < 128; channels += 24) {
863 DWConvMicrokernelTester()
864 .cr(8)
865 .kr(9)
866 .channels(channels)
867 .qmax(128)
868 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
869 }
870 }
871
872 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, c_lt_8) {
873 TEST_REQUIRES_ARM_NEON_FMA;
874 for (uint32_t channels = 1; channels < 8; channels++) {
875 DWConvMicrokernelTester()
876 .cr(8)
877 .kr(9)
878 .channels(channels)
879 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
880 }
881 }
882
883 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, c_gt_8) {
884 TEST_REQUIRES_ARM_NEON_FMA;
885 for (uint32_t channels = 9; channels < 16; channels++) {
886 DWConvMicrokernelTester()
887 .cr(8)
888 .kr(9)
889 .channels(channels)
890 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
891 }
892 }
893
894 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, c_gt_8_with_qmin) {
895 TEST_REQUIRES_ARM_NEON_FMA;
896 for (uint32_t channels = 9; channels < 16; channels++) {
897 DWConvMicrokernelTester()
898 .cr(8)
899 .kr(9)
900 .channels(channels)
901 .qmin(128)
902 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
903 }
904 }
905
906 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, c_gt_8_with_qmax) {
907 TEST_REQUIRES_ARM_NEON_FMA;
908 for (uint32_t channels = 9; channels < 16; channels++) {
909 DWConvMicrokernelTester()
910 .cr(8)
911 .kr(9)
912 .channels(channels)
913 .qmax(128)
914 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
915 }
916 }
917
918 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, multipixel) {
919 TEST_REQUIRES_ARM_NEON_FMA;
920 for (size_t channels = 1; channels <= 40; channels += 7) {
921 DWConvMicrokernelTester()
922 .cr(8)
923 .kr(9)
924 .channels(channels)
925 .width(3)
926 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
927 }
928 }
929
930 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, multipixel_with_step) {
931 TEST_REQUIRES_ARM_NEON_FMA;
932 for (size_t channels = 1; channels <= 40; channels += 7) {
933 for (size_t step = 2; step <= 9; step++) {
934 DWConvMicrokernelTester()
935 .cr(8)
936 .kr(9)
937 .channels(channels)
938 .width(3)
939 .step(step)
940 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
941 }
942 }
943 }
944
945 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, multipixel_with_output_stride) {
946 TEST_REQUIRES_ARM_NEON_FMA;
947 for (size_t channels = 1; channels <= 40; channels += 7) {
948 DWConvMicrokernelTester()
949 .cr(8)
950 .kr(9)
951 .channels(8)
952 .width(5)
953 .output_stride(43)
954 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
955 }
956 }
957
958 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, multipixel_with_qmin) {
959 TEST_REQUIRES_ARM_NEON_FMA;
960 for (size_t channels = 1; channels <= 40; channels += 7) {
961 DWConvMicrokernelTester()
962 .cr(8)
963 .kr(9)
964 .channels(channels)
965 .width(3)
966 .qmin(128)
967 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
968 }
969 }
970
971 TEST(F32_DWCONV_UP8X9__NEONFMA_ACC2, multipixel_with_qmax) {
972 TEST_REQUIRES_ARM_NEON_FMA;
973 for (size_t channels = 1; channels <= 40; channels += 7) {
974 DWConvMicrokernelTester()
975 .cr(8)
976 .kr(9)
977 .channels(channels)
978 .width(3)
979 .qmax(128)
980 .Test(xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2);
981 }
982 }
983#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
984
985
986#if XNN_ARCH_ARM || XNN_ARCH_ARM64
987 TEST(F32_DWCONV_UP4X9__NEON, c_eq_4) {
988 TEST_REQUIRES_ARM_NEON;
989 DWConvMicrokernelTester()
990 .cr(4)
991 .kr(9)
992 .channels(4)
993 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
994 }
995
996 TEST(F32_DWCONV_UP4X9__NEON, c_div_4) {
997 TEST_REQUIRES_ARM_NEON;
998 for (uint32_t channels = 8; channels < 64; channels += 12) {
999 DWConvMicrokernelTester()
1000 .cr(4)
1001 .kr(9)
1002 .channels(channels)
1003 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1004 }
1005 }
1006
1007 TEST(F32_DWCONV_UP4X9__NEON, c_div_4_with_qmin) {
1008 TEST_REQUIRES_ARM_NEON;
1009 for (uint32_t channels = 8; channels < 64; channels += 12) {
1010 DWConvMicrokernelTester()
1011 .cr(4)
1012 .kr(9)
1013 .channels(channels)
1014 .qmin(128)
1015 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1016 }
1017 }
1018
1019 TEST(F32_DWCONV_UP4X9__NEON, c_div_4_with_qmax) {
1020 TEST_REQUIRES_ARM_NEON;
1021 for (uint32_t channels = 8; channels < 64; channels += 12) {
1022 DWConvMicrokernelTester()
1023 .cr(4)
1024 .kr(9)
1025 .channels(channels)
1026 .qmax(128)
1027 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1028 }
1029 }
1030
1031 TEST(F32_DWCONV_UP4X9__NEON, c_lt_4) {
1032 TEST_REQUIRES_ARM_NEON;
1033 for (uint32_t channels = 1; channels < 4; channels++) {
1034 DWConvMicrokernelTester()
1035 .cr(4)
1036 .kr(9)
1037 .channels(channels)
1038 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1039 }
1040 }
1041
1042 TEST(F32_DWCONV_UP4X9__NEON, c_gt_4) {
1043 TEST_REQUIRES_ARM_NEON;
1044 for (uint32_t channels = 5; channels < 8; channels++) {
1045 DWConvMicrokernelTester()
1046 .cr(4)
1047 .kr(9)
1048 .channels(channels)
1049 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1050 }
1051 }
1052
1053 TEST(F32_DWCONV_UP4X9__NEON, c_gt_4_with_qmin) {
1054 TEST_REQUIRES_ARM_NEON;
1055 for (uint32_t channels = 5; channels < 8; channels++) {
1056 DWConvMicrokernelTester()
1057 .cr(4)
1058 .kr(9)
1059 .channels(channels)
1060 .qmin(128)
1061 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1062 }
1063 }
1064
1065 TEST(F32_DWCONV_UP4X9__NEON, c_gt_4_with_qmax) {
1066 TEST_REQUIRES_ARM_NEON;
1067 for (uint32_t channels = 5; channels < 8; channels++) {
1068 DWConvMicrokernelTester()
1069 .cr(4)
1070 .kr(9)
1071 .channels(channels)
1072 .qmax(128)
1073 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1074 }
1075 }
1076
1077 TEST(F32_DWCONV_UP4X9__NEON, multipixel) {
1078 TEST_REQUIRES_ARM_NEON;
1079 for (size_t channels = 1; channels <= 20; channels += 3) {
1080 DWConvMicrokernelTester()
1081 .cr(4)
1082 .kr(9)
1083 .channels(channels)
1084 .width(3)
1085 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1086 }
1087 }
1088
1089 TEST(F32_DWCONV_UP4X9__NEON, multipixel_with_step) {
1090 TEST_REQUIRES_ARM_NEON;
1091 for (size_t channels = 1; channels <= 20; channels += 3) {
1092 for (size_t step = 2; step <= 9; step++) {
1093 DWConvMicrokernelTester()
1094 .cr(4)
1095 .kr(9)
1096 .channels(channels)
1097 .width(3)
1098 .step(step)
1099 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1100 }
1101 }
1102 }
1103
1104 TEST(F32_DWCONV_UP4X9__NEON, multipixel_with_output_stride) {
1105 TEST_REQUIRES_ARM_NEON;
1106 for (size_t channels = 1; channels <= 20; channels += 3) {
1107 DWConvMicrokernelTester()
1108 .cr(4)
1109 .kr(9)
1110 .channels(4)
1111 .width(5)
1112 .output_stride(23)
1113 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1114 }
1115 }
1116
1117 TEST(F32_DWCONV_UP4X9__NEON, multipixel_with_qmin) {
1118 TEST_REQUIRES_ARM_NEON;
1119 for (size_t channels = 1; channels <= 20; channels += 3) {
1120 DWConvMicrokernelTester()
1121 .cr(4)
1122 .kr(9)
1123 .channels(channels)
1124 .width(3)
1125 .qmin(128)
1126 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1127 }
1128 }
1129
1130 TEST(F32_DWCONV_UP4X9__NEON, multipixel_with_qmax) {
1131 TEST_REQUIRES_ARM_NEON;
1132 for (size_t channels = 1; channels <= 20; channels += 3) {
1133 DWConvMicrokernelTester()
1134 .cr(4)
1135 .kr(9)
1136 .channels(channels)
1137 .width(3)
1138 .qmax(128)
1139 .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
1140 }
1141 }
1142#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1143
1144
1145#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1146 TEST(F32_DWCONV_UP4X9__NEON_ACC2, c_eq_4) {
1147 TEST_REQUIRES_ARM_NEON;
1148 DWConvMicrokernelTester()
1149 .cr(4)
1150 .kr(9)
1151 .channels(4)
1152 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1153 }
1154
1155 TEST(F32_DWCONV_UP4X9__NEON_ACC2, c_div_4) {
1156 TEST_REQUIRES_ARM_NEON;
1157 for (uint32_t channels = 8; channels < 64; channels += 12) {
1158 DWConvMicrokernelTester()
1159 .cr(4)
1160 .kr(9)
1161 .channels(channels)
1162 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1163 }
1164 }
1165
1166 TEST(F32_DWCONV_UP4X9__NEON_ACC2, c_div_4_with_qmin) {
1167 TEST_REQUIRES_ARM_NEON;
1168 for (uint32_t channels = 8; channels < 64; channels += 12) {
1169 DWConvMicrokernelTester()
1170 .cr(4)
1171 .kr(9)
1172 .channels(channels)
1173 .qmin(128)
1174 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1175 }
1176 }
1177
1178 TEST(F32_DWCONV_UP4X9__NEON_ACC2, c_div_4_with_qmax) {
1179 TEST_REQUIRES_ARM_NEON;
1180 for (uint32_t channels = 8; channels < 64; channels += 12) {
1181 DWConvMicrokernelTester()
1182 .cr(4)
1183 .kr(9)
1184 .channels(channels)
1185 .qmax(128)
1186 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1187 }
1188 }
1189
1190 TEST(F32_DWCONV_UP4X9__NEON_ACC2, c_lt_4) {
1191 TEST_REQUIRES_ARM_NEON;
1192 for (uint32_t channels = 1; channels < 4; channels++) {
1193 DWConvMicrokernelTester()
1194 .cr(4)
1195 .kr(9)
1196 .channels(channels)
1197 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1198 }
1199 }
1200
1201 TEST(F32_DWCONV_UP4X9__NEON_ACC2, c_gt_4) {
1202 TEST_REQUIRES_ARM_NEON;
1203 for (uint32_t channels = 5; channels < 8; channels++) {
1204 DWConvMicrokernelTester()
1205 .cr(4)
1206 .kr(9)
1207 .channels(channels)
1208 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1209 }
1210 }
1211
1212 TEST(F32_DWCONV_UP4X9__NEON_ACC2, c_gt_4_with_qmin) {
1213 TEST_REQUIRES_ARM_NEON;
1214 for (uint32_t channels = 5; channels < 8; channels++) {
1215 DWConvMicrokernelTester()
1216 .cr(4)
1217 .kr(9)
1218 .channels(channels)
1219 .qmin(128)
1220 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1221 }
1222 }
1223
1224 TEST(F32_DWCONV_UP4X9__NEON_ACC2, c_gt_4_with_qmax) {
1225 TEST_REQUIRES_ARM_NEON;
1226 for (uint32_t channels = 5; channels < 8; channels++) {
1227 DWConvMicrokernelTester()
1228 .cr(4)
1229 .kr(9)
1230 .channels(channels)
1231 .qmax(128)
1232 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1233 }
1234 }
1235
1236 TEST(F32_DWCONV_UP4X9__NEON_ACC2, multipixel) {
1237 TEST_REQUIRES_ARM_NEON;
1238 for (size_t channels = 1; channels <= 20; channels += 3) {
1239 DWConvMicrokernelTester()
1240 .cr(4)
1241 .kr(9)
1242 .channels(channels)
1243 .width(3)
1244 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1245 }
1246 }
1247
1248 TEST(F32_DWCONV_UP4X9__NEON_ACC2, multipixel_with_step) {
1249 TEST_REQUIRES_ARM_NEON;
1250 for (size_t channels = 1; channels <= 20; channels += 3) {
1251 for (size_t step = 2; step <= 9; step++) {
1252 DWConvMicrokernelTester()
1253 .cr(4)
1254 .kr(9)
1255 .channels(channels)
1256 .width(3)
1257 .step(step)
1258 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1259 }
1260 }
1261 }
1262
1263 TEST(F32_DWCONV_UP4X9__NEON_ACC2, multipixel_with_output_stride) {
1264 TEST_REQUIRES_ARM_NEON;
1265 for (size_t channels = 1; channels <= 20; channels += 3) {
1266 DWConvMicrokernelTester()
1267 .cr(4)
1268 .kr(9)
1269 .channels(4)
1270 .width(5)
1271 .output_stride(23)
1272 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1273 }
1274 }
1275
1276 TEST(F32_DWCONV_UP4X9__NEON_ACC2, multipixel_with_qmin) {
1277 TEST_REQUIRES_ARM_NEON;
1278 for (size_t channels = 1; channels <= 20; channels += 3) {
1279 DWConvMicrokernelTester()
1280 .cr(4)
1281 .kr(9)
1282 .channels(channels)
1283 .width(3)
1284 .qmin(128)
1285 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1286 }
1287 }
1288
1289 TEST(F32_DWCONV_UP4X9__NEON_ACC2, multipixel_with_qmax) {
1290 TEST_REQUIRES_ARM_NEON;
1291 for (size_t channels = 1; channels <= 20; channels += 3) {
1292 DWConvMicrokernelTester()
1293 .cr(4)
1294 .kr(9)
1295 .channels(channels)
1296 .width(3)
1297 .qmax(128)
1298 .Test(xnn_f32_dwconv_ukernel_up4x9__neon_acc2);
1299 }
1300 }
1301#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1302
1303
1304#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1305 TEST(F32_DWCONV_UP8X9__NEON, c_eq_8) {
1306 TEST_REQUIRES_ARM_NEON;
1307 DWConvMicrokernelTester()
1308 .cr(8)
1309 .kr(9)
1310 .channels(8)
1311 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1312 }
1313
1314 TEST(F32_DWCONV_UP8X9__NEON, c_div_8) {
1315 TEST_REQUIRES_ARM_NEON;
1316 for (uint32_t channels = 16; channels < 128; channels += 24) {
1317 DWConvMicrokernelTester()
1318 .cr(8)
1319 .kr(9)
1320 .channels(channels)
1321 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1322 }
1323 }
1324
1325 TEST(F32_DWCONV_UP8X9__NEON, c_div_8_with_qmin) {
1326 TEST_REQUIRES_ARM_NEON;
1327 for (uint32_t channels = 16; channels < 128; channels += 24) {
1328 DWConvMicrokernelTester()
1329 .cr(8)
1330 .kr(9)
1331 .channels(channels)
1332 .qmin(128)
1333 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1334 }
1335 }
1336
1337 TEST(F32_DWCONV_UP8X9__NEON, c_div_8_with_qmax) {
1338 TEST_REQUIRES_ARM_NEON;
1339 for (uint32_t channels = 16; channels < 128; channels += 24) {
1340 DWConvMicrokernelTester()
1341 .cr(8)
1342 .kr(9)
1343 .channels(channels)
1344 .qmax(128)
1345 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1346 }
1347 }
1348
1349 TEST(F32_DWCONV_UP8X9__NEON, c_lt_8) {
1350 TEST_REQUIRES_ARM_NEON;
1351 for (uint32_t channels = 1; channels < 8; channels++) {
1352 DWConvMicrokernelTester()
1353 .cr(8)
1354 .kr(9)
1355 .channels(channels)
1356 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1357 }
1358 }
1359
1360 TEST(F32_DWCONV_UP8X9__NEON, c_gt_8) {
1361 TEST_REQUIRES_ARM_NEON;
1362 for (uint32_t channels = 9; channels < 16; channels++) {
1363 DWConvMicrokernelTester()
1364 .cr(8)
1365 .kr(9)
1366 .channels(channels)
1367 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1368 }
1369 }
1370
1371 TEST(F32_DWCONV_UP8X9__NEON, c_gt_8_with_qmin) {
1372 TEST_REQUIRES_ARM_NEON;
1373 for (uint32_t channels = 9; channels < 16; channels++) {
1374 DWConvMicrokernelTester()
1375 .cr(8)
1376 .kr(9)
1377 .channels(channels)
1378 .qmin(128)
1379 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1380 }
1381 }
1382
1383 TEST(F32_DWCONV_UP8X9__NEON, c_gt_8_with_qmax) {
1384 TEST_REQUIRES_ARM_NEON;
1385 for (uint32_t channels = 9; channels < 16; channels++) {
1386 DWConvMicrokernelTester()
1387 .cr(8)
1388 .kr(9)
1389 .channels(channels)
1390 .qmax(128)
1391 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1392 }
1393 }
1394
1395 TEST(F32_DWCONV_UP8X9__NEON, multipixel) {
1396 TEST_REQUIRES_ARM_NEON;
1397 for (size_t channels = 1; channels <= 40; channels += 7) {
1398 DWConvMicrokernelTester()
1399 .cr(8)
1400 .kr(9)
1401 .channels(channels)
1402 .width(3)
1403 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1404 }
1405 }
1406
1407 TEST(F32_DWCONV_UP8X9__NEON, multipixel_with_step) {
1408 TEST_REQUIRES_ARM_NEON;
1409 for (size_t channels = 1; channels <= 40; channels += 7) {
1410 for (size_t step = 2; step <= 9; step++) {
1411 DWConvMicrokernelTester()
1412 .cr(8)
1413 .kr(9)
1414 .channels(channels)
1415 .width(3)
1416 .step(step)
1417 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1418 }
1419 }
1420 }
1421
1422 TEST(F32_DWCONV_UP8X9__NEON, multipixel_with_output_stride) {
1423 TEST_REQUIRES_ARM_NEON;
1424 for (size_t channels = 1; channels <= 40; channels += 7) {
1425 DWConvMicrokernelTester()
1426 .cr(8)
1427 .kr(9)
1428 .channels(8)
1429 .width(5)
1430 .output_stride(43)
1431 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1432 }
1433 }
1434
1435 TEST(F32_DWCONV_UP8X9__NEON, multipixel_with_qmin) {
1436 TEST_REQUIRES_ARM_NEON;
1437 for (size_t channels = 1; channels <= 40; channels += 7) {
1438 DWConvMicrokernelTester()
1439 .cr(8)
1440 .kr(9)
1441 .channels(channels)
1442 .width(3)
1443 .qmin(128)
1444 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1445 }
1446 }
1447
1448 TEST(F32_DWCONV_UP8X9__NEON, multipixel_with_qmax) {
1449 TEST_REQUIRES_ARM_NEON;
1450 for (size_t channels = 1; channels <= 40; channels += 7) {
1451 DWConvMicrokernelTester()
1452 .cr(8)
1453 .kr(9)
1454 .channels(channels)
1455 .width(3)
1456 .qmax(128)
1457 .Test(xnn_f32_dwconv_ukernel_up8x9__neon);
1458 }
1459 }
1460#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1461
1462
1463#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1464 TEST(F32_DWCONV_UP8X9__NEON_ACC2, c_eq_8) {
1465 TEST_REQUIRES_ARM_NEON;
1466 DWConvMicrokernelTester()
1467 .cr(8)
1468 .kr(9)
1469 .channels(8)
1470 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1471 }
1472
1473 TEST(F32_DWCONV_UP8X9__NEON_ACC2, c_div_8) {
1474 TEST_REQUIRES_ARM_NEON;
1475 for (uint32_t channels = 16; channels < 128; channels += 24) {
1476 DWConvMicrokernelTester()
1477 .cr(8)
1478 .kr(9)
1479 .channels(channels)
1480 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1481 }
1482 }
1483
1484 TEST(F32_DWCONV_UP8X9__NEON_ACC2, c_div_8_with_qmin) {
1485 TEST_REQUIRES_ARM_NEON;
1486 for (uint32_t channels = 16; channels < 128; channels += 24) {
1487 DWConvMicrokernelTester()
1488 .cr(8)
1489 .kr(9)
1490 .channels(channels)
1491 .qmin(128)
1492 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1493 }
1494 }
1495
1496 TEST(F32_DWCONV_UP8X9__NEON_ACC2, c_div_8_with_qmax) {
1497 TEST_REQUIRES_ARM_NEON;
1498 for (uint32_t channels = 16; channels < 128; channels += 24) {
1499 DWConvMicrokernelTester()
1500 .cr(8)
1501 .kr(9)
1502 .channels(channels)
1503 .qmax(128)
1504 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1505 }
1506 }
1507
1508 TEST(F32_DWCONV_UP8X9__NEON_ACC2, c_lt_8) {
1509 TEST_REQUIRES_ARM_NEON;
1510 for (uint32_t channels = 1; channels < 8; channels++) {
1511 DWConvMicrokernelTester()
1512 .cr(8)
1513 .kr(9)
1514 .channels(channels)
1515 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1516 }
1517 }
1518
1519 TEST(F32_DWCONV_UP8X9__NEON_ACC2, c_gt_8) {
1520 TEST_REQUIRES_ARM_NEON;
1521 for (uint32_t channels = 9; channels < 16; channels++) {
1522 DWConvMicrokernelTester()
1523 .cr(8)
1524 .kr(9)
1525 .channels(channels)
1526 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1527 }
1528 }
1529
1530 TEST(F32_DWCONV_UP8X9__NEON_ACC2, c_gt_8_with_qmin) {
1531 TEST_REQUIRES_ARM_NEON;
1532 for (uint32_t channels = 9; channels < 16; channels++) {
1533 DWConvMicrokernelTester()
1534 .cr(8)
1535 .kr(9)
1536 .channels(channels)
1537 .qmin(128)
1538 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1539 }
1540 }
1541
1542 TEST(F32_DWCONV_UP8X9__NEON_ACC2, c_gt_8_with_qmax) {
1543 TEST_REQUIRES_ARM_NEON;
1544 for (uint32_t channels = 9; channels < 16; channels++) {
1545 DWConvMicrokernelTester()
1546 .cr(8)
1547 .kr(9)
1548 .channels(channels)
1549 .qmax(128)
1550 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1551 }
1552 }
1553
1554 TEST(F32_DWCONV_UP8X9__NEON_ACC2, multipixel) {
1555 TEST_REQUIRES_ARM_NEON;
1556 for (size_t channels = 1; channels <= 40; channels += 7) {
1557 DWConvMicrokernelTester()
1558 .cr(8)
1559 .kr(9)
1560 .channels(channels)
1561 .width(3)
1562 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1563 }
1564 }
1565
1566 TEST(F32_DWCONV_UP8X9__NEON_ACC2, multipixel_with_step) {
1567 TEST_REQUIRES_ARM_NEON;
1568 for (size_t channels = 1; channels <= 40; channels += 7) {
1569 for (size_t step = 2; step <= 9; step++) {
1570 DWConvMicrokernelTester()
1571 .cr(8)
1572 .kr(9)
1573 .channels(channels)
1574 .width(3)
1575 .step(step)
1576 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1577 }
1578 }
1579 }
1580
1581 TEST(F32_DWCONV_UP8X9__NEON_ACC2, multipixel_with_output_stride) {
1582 TEST_REQUIRES_ARM_NEON;
1583 for (size_t channels = 1; channels <= 40; channels += 7) {
1584 DWConvMicrokernelTester()
1585 .cr(8)
1586 .kr(9)
1587 .channels(8)
1588 .width(5)
1589 .output_stride(43)
1590 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1591 }
1592 }
1593
1594 TEST(F32_DWCONV_UP8X9__NEON_ACC2, multipixel_with_qmin) {
1595 TEST_REQUIRES_ARM_NEON;
1596 for (size_t channels = 1; channels <= 40; channels += 7) {
1597 DWConvMicrokernelTester()
1598 .cr(8)
1599 .kr(9)
1600 .channels(channels)
1601 .width(3)
1602 .qmin(128)
1603 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1604 }
1605 }
1606
1607 TEST(F32_DWCONV_UP8X9__NEON_ACC2, multipixel_with_qmax) {
1608 TEST_REQUIRES_ARM_NEON;
1609 for (size_t channels = 1; channels <= 40; channels += 7) {
1610 DWConvMicrokernelTester()
1611 .cr(8)
1612 .kr(9)
1613 .channels(channels)
1614 .width(3)
1615 .qmax(128)
1616 .Test(xnn_f32_dwconv_ukernel_up8x9__neon_acc2);
1617 }
1618 }
1619#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1620
1621
1622#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1623 TEST(F32_DWCONV_UP4X25__SSE, c_eq_4) {
1624 TEST_REQUIRES_X86_SSE;
1625 DWConvMicrokernelTester()
1626 .cr(4)
1627 .kr(25)
1628 .channels(4)
1629 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1630 }
1631
1632 TEST(F32_DWCONV_UP4X25__SSE, c_div_4) {
1633 TEST_REQUIRES_X86_SSE;
1634 for (uint32_t channels = 8; channels < 64; channels += 12) {
1635 DWConvMicrokernelTester()
1636 .cr(4)
1637 .kr(25)
1638 .channels(channels)
1639 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1640 }
1641 }
1642
1643 TEST(F32_DWCONV_UP4X25__SSE, c_div_4_with_qmin) {
1644 TEST_REQUIRES_X86_SSE;
1645 for (uint32_t channels = 8; channels < 64; channels += 12) {
1646 DWConvMicrokernelTester()
1647 .cr(4)
1648 .kr(25)
1649 .channels(channels)
1650 .qmin(128)
1651 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1652 }
1653 }
1654
1655 TEST(F32_DWCONV_UP4X25__SSE, c_div_4_with_qmax) {
1656 TEST_REQUIRES_X86_SSE;
1657 for (uint32_t channels = 8; channels < 64; channels += 12) {
1658 DWConvMicrokernelTester()
1659 .cr(4)
1660 .kr(25)
1661 .channels(channels)
1662 .qmax(128)
1663 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1664 }
1665 }
1666
1667 TEST(F32_DWCONV_UP4X25__SSE, c_lt_4) {
1668 TEST_REQUIRES_X86_SSE;
1669 for (uint32_t channels = 1; channels < 4; channels++) {
1670 DWConvMicrokernelTester()
1671 .cr(4)
1672 .kr(25)
1673 .channels(channels)
1674 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1675 }
1676 }
1677
1678 TEST(F32_DWCONV_UP4X25__SSE, c_gt_4) {
1679 TEST_REQUIRES_X86_SSE;
1680 for (uint32_t channels = 5; channels < 8; channels++) {
1681 DWConvMicrokernelTester()
1682 .cr(4)
1683 .kr(25)
1684 .channels(channels)
1685 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1686 }
1687 }
1688
1689 TEST(F32_DWCONV_UP4X25__SSE, c_gt_4_with_qmin) {
1690 TEST_REQUIRES_X86_SSE;
1691 for (uint32_t channels = 5; channels < 8; channels++) {
1692 DWConvMicrokernelTester()
1693 .cr(4)
1694 .kr(25)
1695 .channels(channels)
1696 .qmin(128)
1697 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1698 }
1699 }
1700
1701 TEST(F32_DWCONV_UP4X25__SSE, c_gt_4_with_qmax) {
1702 TEST_REQUIRES_X86_SSE;
1703 for (uint32_t channels = 5; channels < 8; channels++) {
1704 DWConvMicrokernelTester()
1705 .cr(4)
1706 .kr(25)
1707 .channels(channels)
1708 .qmax(128)
1709 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1710 }
1711 }
1712
1713 TEST(F32_DWCONV_UP4X25__SSE, multipixel) {
1714 TEST_REQUIRES_X86_SSE;
1715 for (size_t channels = 1; channels <= 20; channels += 3) {
1716 DWConvMicrokernelTester()
1717 .cr(4)
1718 .kr(25)
1719 .channels(channels)
1720 .width(3)
1721 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1722 }
1723 }
1724
1725 TEST(F32_DWCONV_UP4X25__SSE, multipixel_with_step) {
1726 TEST_REQUIRES_X86_SSE;
1727 for (size_t channels = 1; channels <= 20; channels += 3) {
1728 for (size_t step = 2; step <= 25; step++) {
1729 DWConvMicrokernelTester()
1730 .cr(4)
1731 .kr(25)
1732 .channels(channels)
1733 .width(3)
1734 .step(step)
1735 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1736 }
1737 }
1738 }
1739
1740 TEST(F32_DWCONV_UP4X25__SSE, multipixel_with_output_stride) {
1741 TEST_REQUIRES_X86_SSE;
1742 for (size_t channels = 1; channels <= 20; channels += 3) {
1743 DWConvMicrokernelTester()
1744 .cr(4)
1745 .kr(25)
1746 .channels(4)
1747 .width(5)
1748 .output_stride(23)
1749 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1750 }
1751 }
1752
1753 TEST(F32_DWCONV_UP4X25__SSE, multipixel_with_qmin) {
1754 TEST_REQUIRES_X86_SSE;
1755 for (size_t channels = 1; channels <= 20; channels += 3) {
1756 DWConvMicrokernelTester()
1757 .cr(4)
1758 .kr(25)
1759 .channels(channels)
1760 .width(3)
1761 .qmin(128)
1762 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1763 }
1764 }
1765
1766 TEST(F32_DWCONV_UP4X25__SSE, multipixel_with_qmax) {
1767 TEST_REQUIRES_X86_SSE;
1768 for (size_t channels = 1; channels <= 20; channels += 3) {
1769 DWConvMicrokernelTester()
1770 .cr(4)
1771 .kr(25)
1772 .channels(channels)
1773 .width(3)
1774 .qmax(128)
1775 .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
1776 }
1777 }
1778#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779
1780
1781#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1782 TEST(F32_DWCONV_UP4X25__SSE_ACC2, c_eq_4) {
1783 TEST_REQUIRES_X86_SSE;
1784 DWConvMicrokernelTester()
1785 .cr(4)
1786 .kr(25)
1787 .channels(4)
1788 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1789 }
1790
1791 TEST(F32_DWCONV_UP4X25__SSE_ACC2, c_div_4) {
1792 TEST_REQUIRES_X86_SSE;
1793 for (uint32_t channels = 8; channels < 64; channels += 12) {
1794 DWConvMicrokernelTester()
1795 .cr(4)
1796 .kr(25)
1797 .channels(channels)
1798 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1799 }
1800 }
1801
1802 TEST(F32_DWCONV_UP4X25__SSE_ACC2, c_div_4_with_qmin) {
1803 TEST_REQUIRES_X86_SSE;
1804 for (uint32_t channels = 8; channels < 64; channels += 12) {
1805 DWConvMicrokernelTester()
1806 .cr(4)
1807 .kr(25)
1808 .channels(channels)
1809 .qmin(128)
1810 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1811 }
1812 }
1813
1814 TEST(F32_DWCONV_UP4X25__SSE_ACC2, c_div_4_with_qmax) {
1815 TEST_REQUIRES_X86_SSE;
1816 for (uint32_t channels = 8; channels < 64; channels += 12) {
1817 DWConvMicrokernelTester()
1818 .cr(4)
1819 .kr(25)
1820 .channels(channels)
1821 .qmax(128)
1822 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1823 }
1824 }
1825
1826 TEST(F32_DWCONV_UP4X25__SSE_ACC2, c_lt_4) {
1827 TEST_REQUIRES_X86_SSE;
1828 for (uint32_t channels = 1; channels < 4; channels++) {
1829 DWConvMicrokernelTester()
1830 .cr(4)
1831 .kr(25)
1832 .channels(channels)
1833 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1834 }
1835 }
1836
1837 TEST(F32_DWCONV_UP4X25__SSE_ACC2, c_gt_4) {
1838 TEST_REQUIRES_X86_SSE;
1839 for (uint32_t channels = 5; channels < 8; channels++) {
1840 DWConvMicrokernelTester()
1841 .cr(4)
1842 .kr(25)
1843 .channels(channels)
1844 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1845 }
1846 }
1847
1848 TEST(F32_DWCONV_UP4X25__SSE_ACC2, c_gt_4_with_qmin) {
1849 TEST_REQUIRES_X86_SSE;
1850 for (uint32_t channels = 5; channels < 8; channels++) {
1851 DWConvMicrokernelTester()
1852 .cr(4)
1853 .kr(25)
1854 .channels(channels)
1855 .qmin(128)
1856 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1857 }
1858 }
1859
1860 TEST(F32_DWCONV_UP4X25__SSE_ACC2, c_gt_4_with_qmax) {
1861 TEST_REQUIRES_X86_SSE;
1862 for (uint32_t channels = 5; channels < 8; channels++) {
1863 DWConvMicrokernelTester()
1864 .cr(4)
1865 .kr(25)
1866 .channels(channels)
1867 .qmax(128)
1868 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1869 }
1870 }
1871
1872 TEST(F32_DWCONV_UP4X25__SSE_ACC2, multipixel) {
1873 TEST_REQUIRES_X86_SSE;
1874 for (size_t channels = 1; channels <= 20; channels += 3) {
1875 DWConvMicrokernelTester()
1876 .cr(4)
1877 .kr(25)
1878 .channels(channels)
1879 .width(3)
1880 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1881 }
1882 }
1883
1884 TEST(F32_DWCONV_UP4X25__SSE_ACC2, multipixel_with_step) {
1885 TEST_REQUIRES_X86_SSE;
1886 for (size_t channels = 1; channels <= 20; channels += 3) {
1887 for (size_t step = 2; step <= 25; step++) {
1888 DWConvMicrokernelTester()
1889 .cr(4)
1890 .kr(25)
1891 .channels(channels)
1892 .width(3)
1893 .step(step)
1894 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1895 }
1896 }
1897 }
1898
1899 TEST(F32_DWCONV_UP4X25__SSE_ACC2, multipixel_with_output_stride) {
1900 TEST_REQUIRES_X86_SSE;
1901 for (size_t channels = 1; channels <= 20; channels += 3) {
1902 DWConvMicrokernelTester()
1903 .cr(4)
1904 .kr(25)
1905 .channels(4)
1906 .width(5)
1907 .output_stride(23)
1908 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1909 }
1910 }
1911
1912 TEST(F32_DWCONV_UP4X25__SSE_ACC2, multipixel_with_qmin) {
1913 TEST_REQUIRES_X86_SSE;
1914 for (size_t channels = 1; channels <= 20; channels += 3) {
1915 DWConvMicrokernelTester()
1916 .cr(4)
1917 .kr(25)
1918 .channels(channels)
1919 .width(3)
1920 .qmin(128)
1921 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1922 }
1923 }
1924
1925 TEST(F32_DWCONV_UP4X25__SSE_ACC2, multipixel_with_qmax) {
1926 TEST_REQUIRES_X86_SSE;
1927 for (size_t channels = 1; channels <= 20; channels += 3) {
1928 DWConvMicrokernelTester()
1929 .cr(4)
1930 .kr(25)
1931 .channels(channels)
1932 .width(3)
1933 .qmax(128)
1934 .Test(xnn_f32_dwconv_ukernel_up4x25__sse_acc2);
1935 }
1936 }
1937#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1938
1939
1940#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1941 TEST(F32_DWCONV_UP8X25__SSE, c_eq_8) {
1942 TEST_REQUIRES_X86_SSE;
1943 DWConvMicrokernelTester()
1944 .cr(8)
1945 .kr(25)
1946 .channels(8)
1947 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
1948 }
1949
1950 TEST(F32_DWCONV_UP8X25__SSE, c_div_8) {
1951 TEST_REQUIRES_X86_SSE;
1952 for (uint32_t channels = 16; channels < 128; channels += 24) {
1953 DWConvMicrokernelTester()
1954 .cr(8)
1955 .kr(25)
1956 .channels(channels)
1957 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
1958 }
1959 }
1960
1961 TEST(F32_DWCONV_UP8X25__SSE, c_div_8_with_qmin) {
1962 TEST_REQUIRES_X86_SSE;
1963 for (uint32_t channels = 16; channels < 128; channels += 24) {
1964 DWConvMicrokernelTester()
1965 .cr(8)
1966 .kr(25)
1967 .channels(channels)
1968 .qmin(128)
1969 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
1970 }
1971 }
1972
1973 TEST(F32_DWCONV_UP8X25__SSE, c_div_8_with_qmax) {
1974 TEST_REQUIRES_X86_SSE;
1975 for (uint32_t channels = 16; channels < 128; channels += 24) {
1976 DWConvMicrokernelTester()
1977 .cr(8)
1978 .kr(25)
1979 .channels(channels)
1980 .qmax(128)
1981 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
1982 }
1983 }
1984
1985 TEST(F32_DWCONV_UP8X25__SSE, c_lt_8) {
1986 TEST_REQUIRES_X86_SSE;
1987 for (uint32_t channels = 1; channels < 8; channels++) {
1988 DWConvMicrokernelTester()
1989 .cr(8)
1990 .kr(25)
1991 .channels(channels)
1992 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
1993 }
1994 }
1995
1996 TEST(F32_DWCONV_UP8X25__SSE, c_gt_8) {
1997 TEST_REQUIRES_X86_SSE;
1998 for (uint32_t channels = 9; channels < 16; channels++) {
1999 DWConvMicrokernelTester()
2000 .cr(8)
2001 .kr(25)
2002 .channels(channels)
2003 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
2004 }
2005 }
2006
2007 TEST(F32_DWCONV_UP8X25__SSE, c_gt_8_with_qmin) {
2008 TEST_REQUIRES_X86_SSE;
2009 for (uint32_t channels = 9; channels < 16; channels++) {
2010 DWConvMicrokernelTester()
2011 .cr(8)
2012 .kr(25)
2013 .channels(channels)
2014 .qmin(128)
2015 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
2016 }
2017 }
2018
2019 TEST(F32_DWCONV_UP8X25__SSE, c_gt_8_with_qmax) {
2020 TEST_REQUIRES_X86_SSE;
2021 for (uint32_t channels = 9; channels < 16; channels++) {
2022 DWConvMicrokernelTester()
2023 .cr(8)
2024 .kr(25)
2025 .channels(channels)
2026 .qmax(128)
2027 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
2028 }
2029 }
2030
2031 TEST(F32_DWCONV_UP8X25__SSE, multipixel) {
2032 TEST_REQUIRES_X86_SSE;
2033 for (size_t channels = 1; channels <= 40; channels += 7) {
2034 DWConvMicrokernelTester()
2035 .cr(8)
2036 .kr(25)
2037 .channels(channels)
2038 .width(3)
2039 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
2040 }
2041 }
2042
2043 TEST(F32_DWCONV_UP8X25__SSE, multipixel_with_step) {
2044 TEST_REQUIRES_X86_SSE;
2045 for (size_t channels = 1; channels <= 40; channels += 7) {
2046 for (size_t step = 2; step <= 25; step++) {
2047 DWConvMicrokernelTester()
2048 .cr(8)
2049 .kr(25)
2050 .channels(channels)
2051 .width(3)
2052 .step(step)
2053 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
2054 }
2055 }
2056 }
2057
2058 TEST(F32_DWCONV_UP8X25__SSE, multipixel_with_output_stride) {
2059 TEST_REQUIRES_X86_SSE;
2060 for (size_t channels = 1; channels <= 40; channels += 7) {
2061 DWConvMicrokernelTester()
2062 .cr(8)
2063 .kr(25)
2064 .channels(8)
2065 .width(5)
2066 .output_stride(43)
2067 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
2068 }
2069 }
2070
2071 TEST(F32_DWCONV_UP8X25__SSE, multipixel_with_qmin) {
2072 TEST_REQUIRES_X86_SSE;
2073 for (size_t channels = 1; channels <= 40; channels += 7) {
2074 DWConvMicrokernelTester()
2075 .cr(8)
2076 .kr(25)
2077 .channels(channels)
2078 .width(3)
2079 .qmin(128)
2080 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
2081 }
2082 }
2083
2084 TEST(F32_DWCONV_UP8X25__SSE, multipixel_with_qmax) {
2085 TEST_REQUIRES_X86_SSE;
2086 for (size_t channels = 1; channels <= 40; channels += 7) {
2087 DWConvMicrokernelTester()
2088 .cr(8)
2089 .kr(25)
2090 .channels(channels)
2091 .width(3)
2092 .qmax(128)
2093 .Test(xnn_f32_dwconv_ukernel_up8x25__sse);
2094 }
2095 }
2096#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2097
2098
2099#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2100 TEST(F32_DWCONV_UP8X25__SSE_ACC2, c_eq_8) {
2101 TEST_REQUIRES_X86_SSE;
2102 DWConvMicrokernelTester()
2103 .cr(8)
2104 .kr(25)
2105 .channels(8)
2106 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2107 }
2108
2109 TEST(F32_DWCONV_UP8X25__SSE_ACC2, c_div_8) {
2110 TEST_REQUIRES_X86_SSE;
2111 for (uint32_t channels = 16; channels < 128; channels += 24) {
2112 DWConvMicrokernelTester()
2113 .cr(8)
2114 .kr(25)
2115 .channels(channels)
2116 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2117 }
2118 }
2119
2120 TEST(F32_DWCONV_UP8X25__SSE_ACC2, c_div_8_with_qmin) {
2121 TEST_REQUIRES_X86_SSE;
2122 for (uint32_t channels = 16; channels < 128; channels += 24) {
2123 DWConvMicrokernelTester()
2124 .cr(8)
2125 .kr(25)
2126 .channels(channels)
2127 .qmin(128)
2128 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2129 }
2130 }
2131
2132 TEST(F32_DWCONV_UP8X25__SSE_ACC2, c_div_8_with_qmax) {
2133 TEST_REQUIRES_X86_SSE;
2134 for (uint32_t channels = 16; channels < 128; channels += 24) {
2135 DWConvMicrokernelTester()
2136 .cr(8)
2137 .kr(25)
2138 .channels(channels)
2139 .qmax(128)
2140 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2141 }
2142 }
2143
2144 TEST(F32_DWCONV_UP8X25__SSE_ACC2, c_lt_8) {
2145 TEST_REQUIRES_X86_SSE;
2146 for (uint32_t channels = 1; channels < 8; channels++) {
2147 DWConvMicrokernelTester()
2148 .cr(8)
2149 .kr(25)
2150 .channels(channels)
2151 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2152 }
2153 }
2154
2155 TEST(F32_DWCONV_UP8X25__SSE_ACC2, c_gt_8) {
2156 TEST_REQUIRES_X86_SSE;
2157 for (uint32_t channels = 9; channels < 16; channels++) {
2158 DWConvMicrokernelTester()
2159 .cr(8)
2160 .kr(25)
2161 .channels(channels)
2162 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2163 }
2164 }
2165
2166 TEST(F32_DWCONV_UP8X25__SSE_ACC2, c_gt_8_with_qmin) {
2167 TEST_REQUIRES_X86_SSE;
2168 for (uint32_t channels = 9; channels < 16; channels++) {
2169 DWConvMicrokernelTester()
2170 .cr(8)
2171 .kr(25)
2172 .channels(channels)
2173 .qmin(128)
2174 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2175 }
2176 }
2177
2178 TEST(F32_DWCONV_UP8X25__SSE_ACC2, c_gt_8_with_qmax) {
2179 TEST_REQUIRES_X86_SSE;
2180 for (uint32_t channels = 9; channels < 16; channels++) {
2181 DWConvMicrokernelTester()
2182 .cr(8)
2183 .kr(25)
2184 .channels(channels)
2185 .qmax(128)
2186 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2187 }
2188 }
2189
2190 TEST(F32_DWCONV_UP8X25__SSE_ACC2, multipixel) {
2191 TEST_REQUIRES_X86_SSE;
2192 for (size_t channels = 1; channels <= 40; channels += 7) {
2193 DWConvMicrokernelTester()
2194 .cr(8)
2195 .kr(25)
2196 .channels(channels)
2197 .width(3)
2198 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2199 }
2200 }
2201
2202 TEST(F32_DWCONV_UP8X25__SSE_ACC2, multipixel_with_step) {
2203 TEST_REQUIRES_X86_SSE;
2204 for (size_t channels = 1; channels <= 40; channels += 7) {
2205 for (size_t step = 2; step <= 25; step++) {
2206 DWConvMicrokernelTester()
2207 .cr(8)
2208 .kr(25)
2209 .channels(channels)
2210 .width(3)
2211 .step(step)
2212 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2213 }
2214 }
2215 }
2216
2217 TEST(F32_DWCONV_UP8X25__SSE_ACC2, multipixel_with_output_stride) {
2218 TEST_REQUIRES_X86_SSE;
2219 for (size_t channels = 1; channels <= 40; channels += 7) {
2220 DWConvMicrokernelTester()
2221 .cr(8)
2222 .kr(25)
2223 .channels(8)
2224 .width(5)
2225 .output_stride(43)
2226 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2227 }
2228 }
2229
2230 TEST(F32_DWCONV_UP8X25__SSE_ACC2, multipixel_with_qmin) {
2231 TEST_REQUIRES_X86_SSE;
2232 for (size_t channels = 1; channels <= 40; channels += 7) {
2233 DWConvMicrokernelTester()
2234 .cr(8)
2235 .kr(25)
2236 .channels(channels)
2237 .width(3)
2238 .qmin(128)
2239 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2240 }
2241 }
2242
2243 TEST(F32_DWCONV_UP8X25__SSE_ACC2, multipixel_with_qmax) {
2244 TEST_REQUIRES_X86_SSE;
2245 for (size_t channels = 1; channels <= 40; channels += 7) {
2246 DWConvMicrokernelTester()
2247 .cr(8)
2248 .kr(25)
2249 .channels(channels)
2250 .width(3)
2251 .qmax(128)
2252 .Test(xnn_f32_dwconv_ukernel_up8x25__sse_acc2);
2253 }
2254 }
2255#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2256
2257
2258#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2259 TEST(F32_DWCONV_UP4X9__SSE, c_eq_4) {
2260 TEST_REQUIRES_X86_SSE;
2261 DWConvMicrokernelTester()
2262 .cr(4)
2263 .kr(9)
2264 .channels(4)
2265 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2266 }
2267
2268 TEST(F32_DWCONV_UP4X9__SSE, c_div_4) {
2269 TEST_REQUIRES_X86_SSE;
2270 for (uint32_t channels = 8; channels < 64; channels += 12) {
2271 DWConvMicrokernelTester()
2272 .cr(4)
2273 .kr(9)
2274 .channels(channels)
2275 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2276 }
2277 }
2278
2279 TEST(F32_DWCONV_UP4X9__SSE, c_div_4_with_qmin) {
2280 TEST_REQUIRES_X86_SSE;
2281 for (uint32_t channels = 8; channels < 64; channels += 12) {
2282 DWConvMicrokernelTester()
2283 .cr(4)
2284 .kr(9)
2285 .channels(channels)
2286 .qmin(128)
2287 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2288 }
2289 }
2290
2291 TEST(F32_DWCONV_UP4X9__SSE, c_div_4_with_qmax) {
2292 TEST_REQUIRES_X86_SSE;
2293 for (uint32_t channels = 8; channels < 64; channels += 12) {
2294 DWConvMicrokernelTester()
2295 .cr(4)
2296 .kr(9)
2297 .channels(channels)
2298 .qmax(128)
2299 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2300 }
2301 }
2302
2303 TEST(F32_DWCONV_UP4X9__SSE, c_lt_4) {
2304 TEST_REQUIRES_X86_SSE;
2305 for (uint32_t channels = 1; channels < 4; channels++) {
2306 DWConvMicrokernelTester()
2307 .cr(4)
2308 .kr(9)
2309 .channels(channels)
2310 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2311 }
2312 }
2313
2314 TEST(F32_DWCONV_UP4X9__SSE, c_gt_4) {
2315 TEST_REQUIRES_X86_SSE;
2316 for (uint32_t channels = 5; channels < 8; channels++) {
2317 DWConvMicrokernelTester()
2318 .cr(4)
2319 .kr(9)
2320 .channels(channels)
2321 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2322 }
2323 }
2324
2325 TEST(F32_DWCONV_UP4X9__SSE, c_gt_4_with_qmin) {
2326 TEST_REQUIRES_X86_SSE;
2327 for (uint32_t channels = 5; channels < 8; channels++) {
2328 DWConvMicrokernelTester()
2329 .cr(4)
2330 .kr(9)
2331 .channels(channels)
2332 .qmin(128)
2333 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2334 }
2335 }
2336
2337 TEST(F32_DWCONV_UP4X9__SSE, c_gt_4_with_qmax) {
2338 TEST_REQUIRES_X86_SSE;
2339 for (uint32_t channels = 5; channels < 8; channels++) {
2340 DWConvMicrokernelTester()
2341 .cr(4)
2342 .kr(9)
2343 .channels(channels)
2344 .qmax(128)
2345 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2346 }
2347 }
2348
2349 TEST(F32_DWCONV_UP4X9__SSE, multipixel) {
2350 TEST_REQUIRES_X86_SSE;
2351 for (size_t channels = 1; channels <= 20; channels += 3) {
2352 DWConvMicrokernelTester()
2353 .cr(4)
2354 .kr(9)
2355 .channels(channels)
2356 .width(3)
2357 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2358 }
2359 }
2360
2361 TEST(F32_DWCONV_UP4X9__SSE, multipixel_with_step) {
2362 TEST_REQUIRES_X86_SSE;
2363 for (size_t channels = 1; channels <= 20; channels += 3) {
2364 for (size_t step = 2; step <= 9; step++) {
2365 DWConvMicrokernelTester()
2366 .cr(4)
2367 .kr(9)
2368 .channels(channels)
2369 .width(3)
2370 .step(step)
2371 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2372 }
2373 }
2374 }
2375
2376 TEST(F32_DWCONV_UP4X9__SSE, multipixel_with_output_stride) {
2377 TEST_REQUIRES_X86_SSE;
2378 for (size_t channels = 1; channels <= 20; channels += 3) {
2379 DWConvMicrokernelTester()
2380 .cr(4)
2381 .kr(9)
2382 .channels(4)
2383 .width(5)
2384 .output_stride(23)
2385 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2386 }
2387 }
2388
2389 TEST(F32_DWCONV_UP4X9__SSE, multipixel_with_qmin) {
2390 TEST_REQUIRES_X86_SSE;
2391 for (size_t channels = 1; channels <= 20; channels += 3) {
2392 DWConvMicrokernelTester()
2393 .cr(4)
2394 .kr(9)
2395 .channels(channels)
2396 .width(3)
2397 .qmin(128)
2398 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2399 }
2400 }
2401
2402 TEST(F32_DWCONV_UP4X9__SSE, multipixel_with_qmax) {
2403 TEST_REQUIRES_X86_SSE;
2404 for (size_t channels = 1; channels <= 20; channels += 3) {
2405 DWConvMicrokernelTester()
2406 .cr(4)
2407 .kr(9)
2408 .channels(channels)
2409 .width(3)
2410 .qmax(128)
2411 .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
2412 }
2413 }
2414#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2415
2416
2417#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2418 TEST(F32_DWCONV_UP4X9__SSE_ACC2, c_eq_4) {
2419 TEST_REQUIRES_X86_SSE;
2420 DWConvMicrokernelTester()
2421 .cr(4)
2422 .kr(9)
2423 .channels(4)
2424 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2425 }
2426
2427 TEST(F32_DWCONV_UP4X9__SSE_ACC2, c_div_4) {
2428 TEST_REQUIRES_X86_SSE;
2429 for (uint32_t channels = 8; channels < 64; channels += 12) {
2430 DWConvMicrokernelTester()
2431 .cr(4)
2432 .kr(9)
2433 .channels(channels)
2434 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2435 }
2436 }
2437
2438 TEST(F32_DWCONV_UP4X9__SSE_ACC2, c_div_4_with_qmin) {
2439 TEST_REQUIRES_X86_SSE;
2440 for (uint32_t channels = 8; channels < 64; channels += 12) {
2441 DWConvMicrokernelTester()
2442 .cr(4)
2443 .kr(9)
2444 .channels(channels)
2445 .qmin(128)
2446 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2447 }
2448 }
2449
2450 TEST(F32_DWCONV_UP4X9__SSE_ACC2, c_div_4_with_qmax) {
2451 TEST_REQUIRES_X86_SSE;
2452 for (uint32_t channels = 8; channels < 64; channels += 12) {
2453 DWConvMicrokernelTester()
2454 .cr(4)
2455 .kr(9)
2456 .channels(channels)
2457 .qmax(128)
2458 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2459 }
2460 }
2461
2462 TEST(F32_DWCONV_UP4X9__SSE_ACC2, c_lt_4) {
2463 TEST_REQUIRES_X86_SSE;
2464 for (uint32_t channels = 1; channels < 4; channels++) {
2465 DWConvMicrokernelTester()
2466 .cr(4)
2467 .kr(9)
2468 .channels(channels)
2469 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2470 }
2471 }
2472
2473 TEST(F32_DWCONV_UP4X9__SSE_ACC2, c_gt_4) {
2474 TEST_REQUIRES_X86_SSE;
2475 for (uint32_t channels = 5; channels < 8; channels++) {
2476 DWConvMicrokernelTester()
2477 .cr(4)
2478 .kr(9)
2479 .channels(channels)
2480 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2481 }
2482 }
2483
2484 TEST(F32_DWCONV_UP4X9__SSE_ACC2, c_gt_4_with_qmin) {
2485 TEST_REQUIRES_X86_SSE;
2486 for (uint32_t channels = 5; channels < 8; channels++) {
2487 DWConvMicrokernelTester()
2488 .cr(4)
2489 .kr(9)
2490 .channels(channels)
2491 .qmin(128)
2492 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2493 }
2494 }
2495
2496 TEST(F32_DWCONV_UP4X9__SSE_ACC2, c_gt_4_with_qmax) {
2497 TEST_REQUIRES_X86_SSE;
2498 for (uint32_t channels = 5; channels < 8; channels++) {
2499 DWConvMicrokernelTester()
2500 .cr(4)
2501 .kr(9)
2502 .channels(channels)
2503 .qmax(128)
2504 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2505 }
2506 }
2507
2508 TEST(F32_DWCONV_UP4X9__SSE_ACC2, multipixel) {
2509 TEST_REQUIRES_X86_SSE;
2510 for (size_t channels = 1; channels <= 20; channels += 3) {
2511 DWConvMicrokernelTester()
2512 .cr(4)
2513 .kr(9)
2514 .channels(channels)
2515 .width(3)
2516 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2517 }
2518 }
2519
2520 TEST(F32_DWCONV_UP4X9__SSE_ACC2, multipixel_with_step) {
2521 TEST_REQUIRES_X86_SSE;
2522 for (size_t channels = 1; channels <= 20; channels += 3) {
2523 for (size_t step = 2; step <= 9; step++) {
2524 DWConvMicrokernelTester()
2525 .cr(4)
2526 .kr(9)
2527 .channels(channels)
2528 .width(3)
2529 .step(step)
2530 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2531 }
2532 }
2533 }
2534
2535 TEST(F32_DWCONV_UP4X9__SSE_ACC2, multipixel_with_output_stride) {
2536 TEST_REQUIRES_X86_SSE;
2537 for (size_t channels = 1; channels <= 20; channels += 3) {
2538 DWConvMicrokernelTester()
2539 .cr(4)
2540 .kr(9)
2541 .channels(4)
2542 .width(5)
2543 .output_stride(23)
2544 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2545 }
2546 }
2547
2548 TEST(F32_DWCONV_UP4X9__SSE_ACC2, multipixel_with_qmin) {
2549 TEST_REQUIRES_X86_SSE;
2550 for (size_t channels = 1; channels <= 20; channels += 3) {
2551 DWConvMicrokernelTester()
2552 .cr(4)
2553 .kr(9)
2554 .channels(channels)
2555 .width(3)
2556 .qmin(128)
2557 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2558 }
2559 }
2560
2561 TEST(F32_DWCONV_UP4X9__SSE_ACC2, multipixel_with_qmax) {
2562 TEST_REQUIRES_X86_SSE;
2563 for (size_t channels = 1; channels <= 20; channels += 3) {
2564 DWConvMicrokernelTester()
2565 .cr(4)
2566 .kr(9)
2567 .channels(channels)
2568 .width(3)
2569 .qmax(128)
2570 .Test(xnn_f32_dwconv_ukernel_up4x9__sse_acc2);
2571 }
2572 }
2573#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2574
2575
2576#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2577 TEST(F32_DWCONV_UP8X9__SSE, c_eq_8) {
2578 TEST_REQUIRES_X86_SSE;
2579 DWConvMicrokernelTester()
2580 .cr(8)
2581 .kr(9)
2582 .channels(8)
2583 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2584 }
2585
2586 TEST(F32_DWCONV_UP8X9__SSE, c_div_8) {
2587 TEST_REQUIRES_X86_SSE;
2588 for (uint32_t channels = 16; channels < 128; channels += 24) {
2589 DWConvMicrokernelTester()
2590 .cr(8)
2591 .kr(9)
2592 .channels(channels)
2593 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2594 }
2595 }
2596
2597 TEST(F32_DWCONV_UP8X9__SSE, c_div_8_with_qmin) {
2598 TEST_REQUIRES_X86_SSE;
2599 for (uint32_t channels = 16; channels < 128; channels += 24) {
2600 DWConvMicrokernelTester()
2601 .cr(8)
2602 .kr(9)
2603 .channels(channels)
2604 .qmin(128)
2605 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2606 }
2607 }
2608
2609 TEST(F32_DWCONV_UP8X9__SSE, c_div_8_with_qmax) {
2610 TEST_REQUIRES_X86_SSE;
2611 for (uint32_t channels = 16; channels < 128; channels += 24) {
2612 DWConvMicrokernelTester()
2613 .cr(8)
2614 .kr(9)
2615 .channels(channels)
2616 .qmax(128)
2617 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2618 }
2619 }
2620
2621 TEST(F32_DWCONV_UP8X9__SSE, c_lt_8) {
2622 TEST_REQUIRES_X86_SSE;
2623 for (uint32_t channels = 1; channels < 8; channels++) {
2624 DWConvMicrokernelTester()
2625 .cr(8)
2626 .kr(9)
2627 .channels(channels)
2628 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2629 }
2630 }
2631
2632 TEST(F32_DWCONV_UP8X9__SSE, c_gt_8) {
2633 TEST_REQUIRES_X86_SSE;
2634 for (uint32_t channels = 9; channels < 16; channels++) {
2635 DWConvMicrokernelTester()
2636 .cr(8)
2637 .kr(9)
2638 .channels(channels)
2639 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2640 }
2641 }
2642
2643 TEST(F32_DWCONV_UP8X9__SSE, c_gt_8_with_qmin) {
2644 TEST_REQUIRES_X86_SSE;
2645 for (uint32_t channels = 9; channels < 16; channels++) {
2646 DWConvMicrokernelTester()
2647 .cr(8)
2648 .kr(9)
2649 .channels(channels)
2650 .qmin(128)
2651 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2652 }
2653 }
2654
2655 TEST(F32_DWCONV_UP8X9__SSE, c_gt_8_with_qmax) {
2656 TEST_REQUIRES_X86_SSE;
2657 for (uint32_t channels = 9; channels < 16; channels++) {
2658 DWConvMicrokernelTester()
2659 .cr(8)
2660 .kr(9)
2661 .channels(channels)
2662 .qmax(128)
2663 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2664 }
2665 }
2666
2667 TEST(F32_DWCONV_UP8X9__SSE, multipixel) {
2668 TEST_REQUIRES_X86_SSE;
2669 for (size_t channels = 1; channels <= 40; channels += 7) {
2670 DWConvMicrokernelTester()
2671 .cr(8)
2672 .kr(9)
2673 .channels(channels)
2674 .width(3)
2675 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2676 }
2677 }
2678
2679 TEST(F32_DWCONV_UP8X9__SSE, multipixel_with_step) {
2680 TEST_REQUIRES_X86_SSE;
2681 for (size_t channels = 1; channels <= 40; channels += 7) {
2682 for (size_t step = 2; step <= 9; step++) {
2683 DWConvMicrokernelTester()
2684 .cr(8)
2685 .kr(9)
2686 .channels(channels)
2687 .width(3)
2688 .step(step)
2689 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2690 }
2691 }
2692 }
2693
2694 TEST(F32_DWCONV_UP8X9__SSE, multipixel_with_output_stride) {
2695 TEST_REQUIRES_X86_SSE;
2696 for (size_t channels = 1; channels <= 40; channels += 7) {
2697 DWConvMicrokernelTester()
2698 .cr(8)
2699 .kr(9)
2700 .channels(8)
2701 .width(5)
2702 .output_stride(43)
2703 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2704 }
2705 }
2706
2707 TEST(F32_DWCONV_UP8X9__SSE, multipixel_with_qmin) {
2708 TEST_REQUIRES_X86_SSE;
2709 for (size_t channels = 1; channels <= 40; channels += 7) {
2710 DWConvMicrokernelTester()
2711 .cr(8)
2712 .kr(9)
2713 .channels(channels)
2714 .width(3)
2715 .qmin(128)
2716 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2717 }
2718 }
2719
2720 TEST(F32_DWCONV_UP8X9__SSE, multipixel_with_qmax) {
2721 TEST_REQUIRES_X86_SSE;
2722 for (size_t channels = 1; channels <= 40; channels += 7) {
2723 DWConvMicrokernelTester()
2724 .cr(8)
2725 .kr(9)
2726 .channels(channels)
2727 .width(3)
2728 .qmax(128)
2729 .Test(xnn_f32_dwconv_ukernel_up8x9__sse);
2730 }
2731 }
2732#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2733
2734
2735#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2736 TEST(F32_DWCONV_UP8X9__SSE_ACC2, c_eq_8) {
2737 TEST_REQUIRES_X86_SSE;
2738 DWConvMicrokernelTester()
2739 .cr(8)
2740 .kr(9)
2741 .channels(8)
2742 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2743 }
2744
2745 TEST(F32_DWCONV_UP8X9__SSE_ACC2, c_div_8) {
2746 TEST_REQUIRES_X86_SSE;
2747 for (uint32_t channels = 16; channels < 128; channels += 24) {
2748 DWConvMicrokernelTester()
2749 .cr(8)
2750 .kr(9)
2751 .channels(channels)
2752 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2753 }
2754 }
2755
2756 TEST(F32_DWCONV_UP8X9__SSE_ACC2, c_div_8_with_qmin) {
2757 TEST_REQUIRES_X86_SSE;
2758 for (uint32_t channels = 16; channels < 128; channels += 24) {
2759 DWConvMicrokernelTester()
2760 .cr(8)
2761 .kr(9)
2762 .channels(channels)
2763 .qmin(128)
2764 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2765 }
2766 }
2767
2768 TEST(F32_DWCONV_UP8X9__SSE_ACC2, c_div_8_with_qmax) {
2769 TEST_REQUIRES_X86_SSE;
2770 for (uint32_t channels = 16; channels < 128; channels += 24) {
2771 DWConvMicrokernelTester()
2772 .cr(8)
2773 .kr(9)
2774 .channels(channels)
2775 .qmax(128)
2776 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2777 }
2778 }
2779
2780 TEST(F32_DWCONV_UP8X9__SSE_ACC2, c_lt_8) {
2781 TEST_REQUIRES_X86_SSE;
2782 for (uint32_t channels = 1; channels < 8; channels++) {
2783 DWConvMicrokernelTester()
2784 .cr(8)
2785 .kr(9)
2786 .channels(channels)
2787 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2788 }
2789 }
2790
2791 TEST(F32_DWCONV_UP8X9__SSE_ACC2, c_gt_8) {
2792 TEST_REQUIRES_X86_SSE;
2793 for (uint32_t channels = 9; channels < 16; channels++) {
2794 DWConvMicrokernelTester()
2795 .cr(8)
2796 .kr(9)
2797 .channels(channels)
2798 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2799 }
2800 }
2801
2802 TEST(F32_DWCONV_UP8X9__SSE_ACC2, c_gt_8_with_qmin) {
2803 TEST_REQUIRES_X86_SSE;
2804 for (uint32_t channels = 9; channels < 16; channels++) {
2805 DWConvMicrokernelTester()
2806 .cr(8)
2807 .kr(9)
2808 .channels(channels)
2809 .qmin(128)
2810 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2811 }
2812 }
2813
2814 TEST(F32_DWCONV_UP8X9__SSE_ACC2, c_gt_8_with_qmax) {
2815 TEST_REQUIRES_X86_SSE;
2816 for (uint32_t channels = 9; channels < 16; channels++) {
2817 DWConvMicrokernelTester()
2818 .cr(8)
2819 .kr(9)
2820 .channels(channels)
2821 .qmax(128)
2822 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2823 }
2824 }
2825
2826 TEST(F32_DWCONV_UP8X9__SSE_ACC2, multipixel) {
2827 TEST_REQUIRES_X86_SSE;
2828 for (size_t channels = 1; channels <= 40; channels += 7) {
2829 DWConvMicrokernelTester()
2830 .cr(8)
2831 .kr(9)
2832 .channels(channels)
2833 .width(3)
2834 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2835 }
2836 }
2837
2838 TEST(F32_DWCONV_UP8X9__SSE_ACC2, multipixel_with_step) {
2839 TEST_REQUIRES_X86_SSE;
2840 for (size_t channels = 1; channels <= 40; channels += 7) {
2841 for (size_t step = 2; step <= 9; step++) {
2842 DWConvMicrokernelTester()
2843 .cr(8)
2844 .kr(9)
2845 .channels(channels)
2846 .width(3)
2847 .step(step)
2848 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2849 }
2850 }
2851 }
2852
2853 TEST(F32_DWCONV_UP8X9__SSE_ACC2, multipixel_with_output_stride) {
2854 TEST_REQUIRES_X86_SSE;
2855 for (size_t channels = 1; channels <= 40; channels += 7) {
2856 DWConvMicrokernelTester()
2857 .cr(8)
2858 .kr(9)
2859 .channels(8)
2860 .width(5)
2861 .output_stride(43)
2862 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2863 }
2864 }
2865
2866 TEST(F32_DWCONV_UP8X9__SSE_ACC2, multipixel_with_qmin) {
2867 TEST_REQUIRES_X86_SSE;
2868 for (size_t channels = 1; channels <= 40; channels += 7) {
2869 DWConvMicrokernelTester()
2870 .cr(8)
2871 .kr(9)
2872 .channels(channels)
2873 .width(3)
2874 .qmin(128)
2875 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2876 }
2877 }
2878
2879 TEST(F32_DWCONV_UP8X9__SSE_ACC2, multipixel_with_qmax) {
2880 TEST_REQUIRES_X86_SSE;
2881 for (size_t channels = 1; channels <= 40; channels += 7) {
2882 DWConvMicrokernelTester()
2883 .cr(8)
2884 .kr(9)
2885 .channels(channels)
2886 .width(3)
2887 .qmax(128)
2888 .Test(xnn_f32_dwconv_ukernel_up8x9__sse_acc2);
2889 }
2890 }
2891#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2892
2893
2894#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2895 TEST(F32_DWCONV_UP4X4__SSE, c_eq_4) {
2896 TEST_REQUIRES_X86_SSE;
2897 DWConvMicrokernelTester()
2898 .cr(4)
2899 .kr(4)
2900 .channels(4)
2901 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
2902 }
2903
2904 TEST(F32_DWCONV_UP4X4__SSE, c_div_4) {
2905 TEST_REQUIRES_X86_SSE;
2906 for (uint32_t channels = 8; channels < 64; channels += 12) {
2907 DWConvMicrokernelTester()
2908 .cr(4)
2909 .kr(4)
2910 .channels(channels)
2911 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
2912 }
2913 }
2914
2915 TEST(F32_DWCONV_UP4X4__SSE, c_div_4_with_qmin) {
2916 TEST_REQUIRES_X86_SSE;
2917 for (uint32_t channels = 8; channels < 64; channels += 12) {
2918 DWConvMicrokernelTester()
2919 .cr(4)
2920 .kr(4)
2921 .channels(channels)
2922 .qmin(128)
2923 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
2924 }
2925 }
2926
2927 TEST(F32_DWCONV_UP4X4__SSE, c_div_4_with_qmax) {
2928 TEST_REQUIRES_X86_SSE;
2929 for (uint32_t channels = 8; channels < 64; channels += 12) {
2930 DWConvMicrokernelTester()
2931 .cr(4)
2932 .kr(4)
2933 .channels(channels)
2934 .qmax(128)
2935 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
2936 }
2937 }
2938
2939 TEST(F32_DWCONV_UP4X4__SSE, c_lt_4) {
2940 TEST_REQUIRES_X86_SSE;
2941 for (uint32_t channels = 1; channels < 4; channels++) {
2942 DWConvMicrokernelTester()
2943 .cr(4)
2944 .kr(4)
2945 .channels(channels)
2946 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
2947 }
2948 }
2949
2950 TEST(F32_DWCONV_UP4X4__SSE, c_gt_4) {
2951 TEST_REQUIRES_X86_SSE;
2952 for (uint32_t channels = 5; channels < 8; channels++) {
2953 DWConvMicrokernelTester()
2954 .cr(4)
2955 .kr(4)
2956 .channels(channels)
2957 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
2958 }
2959 }
2960
2961 TEST(F32_DWCONV_UP4X4__SSE, c_gt_4_with_qmin) {
2962 TEST_REQUIRES_X86_SSE;
2963 for (uint32_t channels = 5; channels < 8; channels++) {
2964 DWConvMicrokernelTester()
2965 .cr(4)
2966 .kr(4)
2967 .channels(channels)
2968 .qmin(128)
2969 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
2970 }
2971 }
2972
2973 TEST(F32_DWCONV_UP4X4__SSE, c_gt_4_with_qmax) {
2974 TEST_REQUIRES_X86_SSE;
2975 for (uint32_t channels = 5; channels < 8; channels++) {
2976 DWConvMicrokernelTester()
2977 .cr(4)
2978 .kr(4)
2979 .channels(channels)
2980 .qmax(128)
2981 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
2982 }
2983 }
2984
2985 TEST(F32_DWCONV_UP4X4__SSE, multipixel) {
2986 TEST_REQUIRES_X86_SSE;
2987 for (size_t channels = 1; channels <= 20; channels += 3) {
2988 DWConvMicrokernelTester()
2989 .cr(4)
2990 .kr(4)
2991 .channels(channels)
2992 .width(3)
2993 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
2994 }
2995 }
2996
2997 TEST(F32_DWCONV_UP4X4__SSE, multipixel_with_step) {
2998 TEST_REQUIRES_X86_SSE;
2999 for (size_t channels = 1; channels <= 20; channels += 3) {
3000 for (size_t step = 2; step <= 4; step++) {
3001 DWConvMicrokernelTester()
3002 .cr(4)
3003 .kr(4)
3004 .channels(channels)
3005 .width(3)
3006 .step(step)
3007 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
3008 }
3009 }
3010 }
3011
3012 TEST(F32_DWCONV_UP4X4__SSE, multipixel_with_output_stride) {
3013 TEST_REQUIRES_X86_SSE;
3014 for (size_t channels = 1; channels <= 20; channels += 3) {
3015 DWConvMicrokernelTester()
3016 .cr(4)
3017 .kr(4)
3018 .channels(4)
3019 .width(5)
3020 .output_stride(23)
3021 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
3022 }
3023 }
3024
3025 TEST(F32_DWCONV_UP4X4__SSE, multipixel_with_qmin) {
3026 TEST_REQUIRES_X86_SSE;
3027 for (size_t channels = 1; channels <= 20; channels += 3) {
3028 DWConvMicrokernelTester()
3029 .cr(4)
3030 .kr(4)
3031 .channels(channels)
3032 .width(3)
3033 .qmin(128)
3034 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
3035 }
3036 }
3037
3038 TEST(F32_DWCONV_UP4X4__SSE, multipixel_with_qmax) {
3039 TEST_REQUIRES_X86_SSE;
3040 for (size_t channels = 1; channels <= 20; channels += 3) {
3041 DWConvMicrokernelTester()
3042 .cr(4)
3043 .kr(4)
3044 .channels(channels)
3045 .width(3)
3046 .qmax(128)
3047 .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
3048 }
3049 }
3050#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3051
3052
3053#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3054 TEST(F32_DWCONV_UP4X4__SSE_ACC2, c_eq_4) {
3055 TEST_REQUIRES_X86_SSE;
3056 DWConvMicrokernelTester()
3057 .cr(4)
3058 .kr(4)
3059 .channels(4)
3060 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3061 }
3062
3063 TEST(F32_DWCONV_UP4X4__SSE_ACC2, c_div_4) {
3064 TEST_REQUIRES_X86_SSE;
3065 for (uint32_t channels = 8; channels < 64; channels += 12) {
3066 DWConvMicrokernelTester()
3067 .cr(4)
3068 .kr(4)
3069 .channels(channels)
3070 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3071 }
3072 }
3073
3074 TEST(F32_DWCONV_UP4X4__SSE_ACC2, c_div_4_with_qmin) {
3075 TEST_REQUIRES_X86_SSE;
3076 for (uint32_t channels = 8; channels < 64; channels += 12) {
3077 DWConvMicrokernelTester()
3078 .cr(4)
3079 .kr(4)
3080 .channels(channels)
3081 .qmin(128)
3082 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3083 }
3084 }
3085
3086 TEST(F32_DWCONV_UP4X4__SSE_ACC2, c_div_4_with_qmax) {
3087 TEST_REQUIRES_X86_SSE;
3088 for (uint32_t channels = 8; channels < 64; channels += 12) {
3089 DWConvMicrokernelTester()
3090 .cr(4)
3091 .kr(4)
3092 .channels(channels)
3093 .qmax(128)
3094 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3095 }
3096 }
3097
3098 TEST(F32_DWCONV_UP4X4__SSE_ACC2, c_lt_4) {
3099 TEST_REQUIRES_X86_SSE;
3100 for (uint32_t channels = 1; channels < 4; channels++) {
3101 DWConvMicrokernelTester()
3102 .cr(4)
3103 .kr(4)
3104 .channels(channels)
3105 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3106 }
3107 }
3108
3109 TEST(F32_DWCONV_UP4X4__SSE_ACC2, c_gt_4) {
3110 TEST_REQUIRES_X86_SSE;
3111 for (uint32_t channels = 5; channels < 8; channels++) {
3112 DWConvMicrokernelTester()
3113 .cr(4)
3114 .kr(4)
3115 .channels(channels)
3116 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3117 }
3118 }
3119
3120 TEST(F32_DWCONV_UP4X4__SSE_ACC2, c_gt_4_with_qmin) {
3121 TEST_REQUIRES_X86_SSE;
3122 for (uint32_t channels = 5; channels < 8; channels++) {
3123 DWConvMicrokernelTester()
3124 .cr(4)
3125 .kr(4)
3126 .channels(channels)
3127 .qmin(128)
3128 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3129 }
3130 }
3131
3132 TEST(F32_DWCONV_UP4X4__SSE_ACC2, c_gt_4_with_qmax) {
3133 TEST_REQUIRES_X86_SSE;
3134 for (uint32_t channels = 5; channels < 8; channels++) {
3135 DWConvMicrokernelTester()
3136 .cr(4)
3137 .kr(4)
3138 .channels(channels)
3139 .qmax(128)
3140 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3141 }
3142 }
3143
3144 TEST(F32_DWCONV_UP4X4__SSE_ACC2, multipixel) {
3145 TEST_REQUIRES_X86_SSE;
3146 for (size_t channels = 1; channels <= 20; channels += 3) {
3147 DWConvMicrokernelTester()
3148 .cr(4)
3149 .kr(4)
3150 .channels(channels)
3151 .width(3)
3152 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3153 }
3154 }
3155
3156 TEST(F32_DWCONV_UP4X4__SSE_ACC2, multipixel_with_step) {
3157 TEST_REQUIRES_X86_SSE;
3158 for (size_t channels = 1; channels <= 20; channels += 3) {
3159 for (size_t step = 2; step <= 4; step++) {
3160 DWConvMicrokernelTester()
3161 .cr(4)
3162 .kr(4)
3163 .channels(channels)
3164 .width(3)
3165 .step(step)
3166 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3167 }
3168 }
3169 }
3170
3171 TEST(F32_DWCONV_UP4X4__SSE_ACC2, multipixel_with_output_stride) {
3172 TEST_REQUIRES_X86_SSE;
3173 for (size_t channels = 1; channels <= 20; channels += 3) {
3174 DWConvMicrokernelTester()
3175 .cr(4)
3176 .kr(4)
3177 .channels(4)
3178 .width(5)
3179 .output_stride(23)
3180 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3181 }
3182 }
3183
3184 TEST(F32_DWCONV_UP4X4__SSE_ACC2, multipixel_with_qmin) {
3185 TEST_REQUIRES_X86_SSE;
3186 for (size_t channels = 1; channels <= 20; channels += 3) {
3187 DWConvMicrokernelTester()
3188 .cr(4)
3189 .kr(4)
3190 .channels(channels)
3191 .width(3)
3192 .qmin(128)
3193 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3194 }
3195 }
3196
3197 TEST(F32_DWCONV_UP4X4__SSE_ACC2, multipixel_with_qmax) {
3198 TEST_REQUIRES_X86_SSE;
3199 for (size_t channels = 1; channels <= 20; channels += 3) {
3200 DWConvMicrokernelTester()
3201 .cr(4)
3202 .kr(4)
3203 .channels(channels)
3204 .width(3)
3205 .qmax(128)
3206 .Test(xnn_f32_dwconv_ukernel_up4x4__sse_acc2);
3207 }
3208 }
3209#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3210
3211
3212#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3213 TEST(F32_DWCONV_UP8X4__SSE, c_eq_8) {
3214 TEST_REQUIRES_X86_SSE;
3215 DWConvMicrokernelTester()
3216 .cr(8)
3217 .kr(4)
3218 .channels(8)
3219 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3220 }
3221
3222 TEST(F32_DWCONV_UP8X4__SSE, c_div_8) {
3223 TEST_REQUIRES_X86_SSE;
3224 for (uint32_t channels = 16; channels < 128; channels += 24) {
3225 DWConvMicrokernelTester()
3226 .cr(8)
3227 .kr(4)
3228 .channels(channels)
3229 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3230 }
3231 }
3232
3233 TEST(F32_DWCONV_UP8X4__SSE, c_div_8_with_qmin) {
3234 TEST_REQUIRES_X86_SSE;
3235 for (uint32_t channels = 16; channels < 128; channels += 24) {
3236 DWConvMicrokernelTester()
3237 .cr(8)
3238 .kr(4)
3239 .channels(channels)
3240 .qmin(128)
3241 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3242 }
3243 }
3244
3245 TEST(F32_DWCONV_UP8X4__SSE, c_div_8_with_qmax) {
3246 TEST_REQUIRES_X86_SSE;
3247 for (uint32_t channels = 16; channels < 128; channels += 24) {
3248 DWConvMicrokernelTester()
3249 .cr(8)
3250 .kr(4)
3251 .channels(channels)
3252 .qmax(128)
3253 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3254 }
3255 }
3256
3257 TEST(F32_DWCONV_UP8X4__SSE, c_lt_8) {
3258 TEST_REQUIRES_X86_SSE;
3259 for (uint32_t channels = 1; channels < 8; channels++) {
3260 DWConvMicrokernelTester()
3261 .cr(8)
3262 .kr(4)
3263 .channels(channels)
3264 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3265 }
3266 }
3267
3268 TEST(F32_DWCONV_UP8X4__SSE, c_gt_8) {
3269 TEST_REQUIRES_X86_SSE;
3270 for (uint32_t channels = 9; channels < 16; channels++) {
3271 DWConvMicrokernelTester()
3272 .cr(8)
3273 .kr(4)
3274 .channels(channels)
3275 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3276 }
3277 }
3278
3279 TEST(F32_DWCONV_UP8X4__SSE, c_gt_8_with_qmin) {
3280 TEST_REQUIRES_X86_SSE;
3281 for (uint32_t channels = 9; channels < 16; channels++) {
3282 DWConvMicrokernelTester()
3283 .cr(8)
3284 .kr(4)
3285 .channels(channels)
3286 .qmin(128)
3287 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3288 }
3289 }
3290
3291 TEST(F32_DWCONV_UP8X4__SSE, c_gt_8_with_qmax) {
3292 TEST_REQUIRES_X86_SSE;
3293 for (uint32_t channels = 9; channels < 16; channels++) {
3294 DWConvMicrokernelTester()
3295 .cr(8)
3296 .kr(4)
3297 .channels(channels)
3298 .qmax(128)
3299 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3300 }
3301 }
3302
3303 TEST(F32_DWCONV_UP8X4__SSE, multipixel) {
3304 TEST_REQUIRES_X86_SSE;
3305 for (size_t channels = 1; channels <= 40; channels += 7) {
3306 DWConvMicrokernelTester()
3307 .cr(8)
3308 .kr(4)
3309 .channels(channels)
3310 .width(3)
3311 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3312 }
3313 }
3314
3315 TEST(F32_DWCONV_UP8X4__SSE, multipixel_with_step) {
3316 TEST_REQUIRES_X86_SSE;
3317 for (size_t channels = 1; channels <= 40; channels += 7) {
3318 for (size_t step = 2; step <= 4; step++) {
3319 DWConvMicrokernelTester()
3320 .cr(8)
3321 .kr(4)
3322 .channels(channels)
3323 .width(3)
3324 .step(step)
3325 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3326 }
3327 }
3328 }
3329
3330 TEST(F32_DWCONV_UP8X4__SSE, multipixel_with_output_stride) {
3331 TEST_REQUIRES_X86_SSE;
3332 for (size_t channels = 1; channels <= 40; channels += 7) {
3333 DWConvMicrokernelTester()
3334 .cr(8)
3335 .kr(4)
3336 .channels(8)
3337 .width(5)
3338 .output_stride(43)
3339 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3340 }
3341 }
3342
3343 TEST(F32_DWCONV_UP8X4__SSE, multipixel_with_qmin) {
3344 TEST_REQUIRES_X86_SSE;
3345 for (size_t channels = 1; channels <= 40; channels += 7) {
3346 DWConvMicrokernelTester()
3347 .cr(8)
3348 .kr(4)
3349 .channels(channels)
3350 .width(3)
3351 .qmin(128)
3352 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3353 }
3354 }
3355
3356 TEST(F32_DWCONV_UP8X4__SSE, multipixel_with_qmax) {
3357 TEST_REQUIRES_X86_SSE;
3358 for (size_t channels = 1; channels <= 40; channels += 7) {
3359 DWConvMicrokernelTester()
3360 .cr(8)
3361 .kr(4)
3362 .channels(channels)
3363 .width(3)
3364 .qmax(128)
3365 .Test(xnn_f32_dwconv_ukernel_up8x4__sse);
3366 }
3367 }
3368#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3369
3370
3371#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3372 TEST(F32_DWCONV_UP8X4__SSE_ACC2, c_eq_8) {
3373 TEST_REQUIRES_X86_SSE;
3374 DWConvMicrokernelTester()
3375 .cr(8)
3376 .kr(4)
3377 .channels(8)
3378 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3379 }
3380
3381 TEST(F32_DWCONV_UP8X4__SSE_ACC2, c_div_8) {
3382 TEST_REQUIRES_X86_SSE;
3383 for (uint32_t channels = 16; channels < 128; channels += 24) {
3384 DWConvMicrokernelTester()
3385 .cr(8)
3386 .kr(4)
3387 .channels(channels)
3388 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3389 }
3390 }
3391
3392 TEST(F32_DWCONV_UP8X4__SSE_ACC2, c_div_8_with_qmin) {
3393 TEST_REQUIRES_X86_SSE;
3394 for (uint32_t channels = 16; channels < 128; channels += 24) {
3395 DWConvMicrokernelTester()
3396 .cr(8)
3397 .kr(4)
3398 .channels(channels)
3399 .qmin(128)
3400 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3401 }
3402 }
3403
3404 TEST(F32_DWCONV_UP8X4__SSE_ACC2, c_div_8_with_qmax) {
3405 TEST_REQUIRES_X86_SSE;
3406 for (uint32_t channels = 16; channels < 128; channels += 24) {
3407 DWConvMicrokernelTester()
3408 .cr(8)
3409 .kr(4)
3410 .channels(channels)
3411 .qmax(128)
3412 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3413 }
3414 }
3415
3416 TEST(F32_DWCONV_UP8X4__SSE_ACC2, c_lt_8) {
3417 TEST_REQUIRES_X86_SSE;
3418 for (uint32_t channels = 1; channels < 8; channels++) {
3419 DWConvMicrokernelTester()
3420 .cr(8)
3421 .kr(4)
3422 .channels(channels)
3423 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3424 }
3425 }
3426
3427 TEST(F32_DWCONV_UP8X4__SSE_ACC2, c_gt_8) {
3428 TEST_REQUIRES_X86_SSE;
3429 for (uint32_t channels = 9; channels < 16; channels++) {
3430 DWConvMicrokernelTester()
3431 .cr(8)
3432 .kr(4)
3433 .channels(channels)
3434 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3435 }
3436 }
3437
3438 TEST(F32_DWCONV_UP8X4__SSE_ACC2, c_gt_8_with_qmin) {
3439 TEST_REQUIRES_X86_SSE;
3440 for (uint32_t channels = 9; channels < 16; channels++) {
3441 DWConvMicrokernelTester()
3442 .cr(8)
3443 .kr(4)
3444 .channels(channels)
3445 .qmin(128)
3446 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3447 }
3448 }
3449
3450 TEST(F32_DWCONV_UP8X4__SSE_ACC2, c_gt_8_with_qmax) {
3451 TEST_REQUIRES_X86_SSE;
3452 for (uint32_t channels = 9; channels < 16; channels++) {
3453 DWConvMicrokernelTester()
3454 .cr(8)
3455 .kr(4)
3456 .channels(channels)
3457 .qmax(128)
3458 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3459 }
3460 }
3461
3462 TEST(F32_DWCONV_UP8X4__SSE_ACC2, multipixel) {
3463 TEST_REQUIRES_X86_SSE;
3464 for (size_t channels = 1; channels <= 40; channels += 7) {
3465 DWConvMicrokernelTester()
3466 .cr(8)
3467 .kr(4)
3468 .channels(channels)
3469 .width(3)
3470 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3471 }
3472 }
3473
3474 TEST(F32_DWCONV_UP8X4__SSE_ACC2, multipixel_with_step) {
3475 TEST_REQUIRES_X86_SSE;
3476 for (size_t channels = 1; channels <= 40; channels += 7) {
3477 for (size_t step = 2; step <= 4; step++) {
3478 DWConvMicrokernelTester()
3479 .cr(8)
3480 .kr(4)
3481 .channels(channels)
3482 .width(3)
3483 .step(step)
3484 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3485 }
3486 }
3487 }
3488
3489 TEST(F32_DWCONV_UP8X4__SSE_ACC2, multipixel_with_output_stride) {
3490 TEST_REQUIRES_X86_SSE;
3491 for (size_t channels = 1; channels <= 40; channels += 7) {
3492 DWConvMicrokernelTester()
3493 .cr(8)
3494 .kr(4)
3495 .channels(8)
3496 .width(5)
3497 .output_stride(43)
3498 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3499 }
3500 }
3501
3502 TEST(F32_DWCONV_UP8X4__SSE_ACC2, multipixel_with_qmin) {
3503 TEST_REQUIRES_X86_SSE;
3504 for (size_t channels = 1; channels <= 40; channels += 7) {
3505 DWConvMicrokernelTester()
3506 .cr(8)
3507 .kr(4)
3508 .channels(channels)
3509 .width(3)
3510 .qmin(128)
3511 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3512 }
3513 }
3514
3515 TEST(F32_DWCONV_UP8X4__SSE_ACC2, multipixel_with_qmax) {
3516 TEST_REQUIRES_X86_SSE;
3517 for (size_t channels = 1; channels <= 40; channels += 7) {
3518 DWConvMicrokernelTester()
3519 .cr(8)
3520 .kr(4)
3521 .channels(channels)
3522 .width(3)
3523 .qmax(128)
3524 .Test(xnn_f32_dwconv_ukernel_up8x4__sse_acc2);
3525 }
3526 }
3527#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3528
3529
3530#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3531 TEST(F32_DWCONV_UP8X25__AVX, c_eq_8) {
3532 TEST_REQUIRES_X86_AVX;
3533 DWConvMicrokernelTester()
3534 .cr(8)
3535 .kr(25)
3536 .channels(8)
3537 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3538 }
3539
3540 TEST(F32_DWCONV_UP8X25__AVX, c_div_8) {
3541 TEST_REQUIRES_X86_AVX;
3542 for (uint32_t channels = 16; channels < 128; channels += 24) {
3543 DWConvMicrokernelTester()
3544 .cr(8)
3545 .kr(25)
3546 .channels(channels)
3547 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3548 }
3549 }
3550
3551 TEST(F32_DWCONV_UP8X25__AVX, c_div_8_with_qmin) {
3552 TEST_REQUIRES_X86_AVX;
3553 for (uint32_t channels = 16; channels < 128; channels += 24) {
3554 DWConvMicrokernelTester()
3555 .cr(8)
3556 .kr(25)
3557 .channels(channels)
3558 .qmin(128)
3559 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3560 }
3561 }
3562
3563 TEST(F32_DWCONV_UP8X25__AVX, c_div_8_with_qmax) {
3564 TEST_REQUIRES_X86_AVX;
3565 for (uint32_t channels = 16; channels < 128; channels += 24) {
3566 DWConvMicrokernelTester()
3567 .cr(8)
3568 .kr(25)
3569 .channels(channels)
3570 .qmax(128)
3571 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3572 }
3573 }
3574
3575 TEST(F32_DWCONV_UP8X25__AVX, c_lt_8) {
3576 TEST_REQUIRES_X86_AVX;
3577 for (uint32_t channels = 1; channels < 8; channels++) {
3578 DWConvMicrokernelTester()
3579 .cr(8)
3580 .kr(25)
3581 .channels(channels)
3582 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3583 }
3584 }
3585
3586 TEST(F32_DWCONV_UP8X25__AVX, c_gt_8) {
3587 TEST_REQUIRES_X86_AVX;
3588 for (uint32_t channels = 9; channels < 16; channels++) {
3589 DWConvMicrokernelTester()
3590 .cr(8)
3591 .kr(25)
3592 .channels(channels)
3593 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3594 }
3595 }
3596
3597 TEST(F32_DWCONV_UP8X25__AVX, c_gt_8_with_qmin) {
3598 TEST_REQUIRES_X86_AVX;
3599 for (uint32_t channels = 9; channels < 16; channels++) {
3600 DWConvMicrokernelTester()
3601 .cr(8)
3602 .kr(25)
3603 .channels(channels)
3604 .qmin(128)
3605 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3606 }
3607 }
3608
3609 TEST(F32_DWCONV_UP8X25__AVX, c_gt_8_with_qmax) {
3610 TEST_REQUIRES_X86_AVX;
3611 for (uint32_t channels = 9; channels < 16; channels++) {
3612 DWConvMicrokernelTester()
3613 .cr(8)
3614 .kr(25)
3615 .channels(channels)
3616 .qmax(128)
3617 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3618 }
3619 }
3620
3621 TEST(F32_DWCONV_UP8X25__AVX, multipixel) {
3622 TEST_REQUIRES_X86_AVX;
3623 for (size_t channels = 1; channels <= 40; channels += 7) {
3624 DWConvMicrokernelTester()
3625 .cr(8)
3626 .kr(25)
3627 .channels(channels)
3628 .width(3)
3629 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3630 }
3631 }
3632
3633 TEST(F32_DWCONV_UP8X25__AVX, multipixel_with_step) {
3634 TEST_REQUIRES_X86_AVX;
3635 for (size_t channels = 1; channels <= 40; channels += 7) {
3636 for (size_t step = 2; step <= 25; step++) {
3637 DWConvMicrokernelTester()
3638 .cr(8)
3639 .kr(25)
3640 .channels(channels)
3641 .width(3)
3642 .step(step)
3643 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3644 }
3645 }
3646 }
3647
3648 TEST(F32_DWCONV_UP8X25__AVX, multipixel_with_output_stride) {
3649 TEST_REQUIRES_X86_AVX;
3650 for (size_t channels = 1; channels <= 40; channels += 7) {
3651 DWConvMicrokernelTester()
3652 .cr(8)
3653 .kr(25)
3654 .channels(8)
3655 .width(5)
3656 .output_stride(43)
3657 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3658 }
3659 }
3660
3661 TEST(F32_DWCONV_UP8X25__AVX, multipixel_with_qmin) {
3662 TEST_REQUIRES_X86_AVX;
3663 for (size_t channels = 1; channels <= 40; channels += 7) {
3664 DWConvMicrokernelTester()
3665 .cr(8)
3666 .kr(25)
3667 .channels(channels)
3668 .width(3)
3669 .qmin(128)
3670 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3671 }
3672 }
3673
3674 TEST(F32_DWCONV_UP8X25__AVX, multipixel_with_qmax) {
3675 TEST_REQUIRES_X86_AVX;
3676 for (size_t channels = 1; channels <= 40; channels += 7) {
3677 DWConvMicrokernelTester()
3678 .cr(8)
3679 .kr(25)
3680 .channels(channels)
3681 .width(3)
3682 .qmax(128)
3683 .Test(xnn_f32_dwconv_ukernel_up8x25__avx);
3684 }
3685 }
3686#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3687
3688
3689#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3690 TEST(F32_DWCONV_UP8X25__AVX_ACC2, c_eq_8) {
3691 TEST_REQUIRES_X86_AVX;
3692 DWConvMicrokernelTester()
3693 .cr(8)
3694 .kr(25)
3695 .channels(8)
3696 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3697 }
3698
3699 TEST(F32_DWCONV_UP8X25__AVX_ACC2, c_div_8) {
3700 TEST_REQUIRES_X86_AVX;
3701 for (uint32_t channels = 16; channels < 128; channels += 24) {
3702 DWConvMicrokernelTester()
3703 .cr(8)
3704 .kr(25)
3705 .channels(channels)
3706 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3707 }
3708 }
3709
3710 TEST(F32_DWCONV_UP8X25__AVX_ACC2, c_div_8_with_qmin) {
3711 TEST_REQUIRES_X86_AVX;
3712 for (uint32_t channels = 16; channels < 128; channels += 24) {
3713 DWConvMicrokernelTester()
3714 .cr(8)
3715 .kr(25)
3716 .channels(channels)
3717 .qmin(128)
3718 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3719 }
3720 }
3721
3722 TEST(F32_DWCONV_UP8X25__AVX_ACC2, c_div_8_with_qmax) {
3723 TEST_REQUIRES_X86_AVX;
3724 for (uint32_t channels = 16; channels < 128; channels += 24) {
3725 DWConvMicrokernelTester()
3726 .cr(8)
3727 .kr(25)
3728 .channels(channels)
3729 .qmax(128)
3730 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3731 }
3732 }
3733
3734 TEST(F32_DWCONV_UP8X25__AVX_ACC2, c_lt_8) {
3735 TEST_REQUIRES_X86_AVX;
3736 for (uint32_t channels = 1; channels < 8; channels++) {
3737 DWConvMicrokernelTester()
3738 .cr(8)
3739 .kr(25)
3740 .channels(channels)
3741 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3742 }
3743 }
3744
3745 TEST(F32_DWCONV_UP8X25__AVX_ACC2, c_gt_8) {
3746 TEST_REQUIRES_X86_AVX;
3747 for (uint32_t channels = 9; channels < 16; channels++) {
3748 DWConvMicrokernelTester()
3749 .cr(8)
3750 .kr(25)
3751 .channels(channels)
3752 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3753 }
3754 }
3755
3756 TEST(F32_DWCONV_UP8X25__AVX_ACC2, c_gt_8_with_qmin) {
3757 TEST_REQUIRES_X86_AVX;
3758 for (uint32_t channels = 9; channels < 16; channels++) {
3759 DWConvMicrokernelTester()
3760 .cr(8)
3761 .kr(25)
3762 .channels(channels)
3763 .qmin(128)
3764 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3765 }
3766 }
3767
3768 TEST(F32_DWCONV_UP8X25__AVX_ACC2, c_gt_8_with_qmax) {
3769 TEST_REQUIRES_X86_AVX;
3770 for (uint32_t channels = 9; channels < 16; channels++) {
3771 DWConvMicrokernelTester()
3772 .cr(8)
3773 .kr(25)
3774 .channels(channels)
3775 .qmax(128)
3776 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3777 }
3778 }
3779
3780 TEST(F32_DWCONV_UP8X25__AVX_ACC2, multipixel) {
3781 TEST_REQUIRES_X86_AVX;
3782 for (size_t channels = 1; channels <= 40; channels += 7) {
3783 DWConvMicrokernelTester()
3784 .cr(8)
3785 .kr(25)
3786 .channels(channels)
3787 .width(3)
3788 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3789 }
3790 }
3791
3792 TEST(F32_DWCONV_UP8X25__AVX_ACC2, multipixel_with_step) {
3793 TEST_REQUIRES_X86_AVX;
3794 for (size_t channels = 1; channels <= 40; channels += 7) {
3795 for (size_t step = 2; step <= 25; step++) {
3796 DWConvMicrokernelTester()
3797 .cr(8)
3798 .kr(25)
3799 .channels(channels)
3800 .width(3)
3801 .step(step)
3802 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3803 }
3804 }
3805 }
3806
3807 TEST(F32_DWCONV_UP8X25__AVX_ACC2, multipixel_with_output_stride) {
3808 TEST_REQUIRES_X86_AVX;
3809 for (size_t channels = 1; channels <= 40; channels += 7) {
3810 DWConvMicrokernelTester()
3811 .cr(8)
3812 .kr(25)
3813 .channels(8)
3814 .width(5)
3815 .output_stride(43)
3816 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3817 }
3818 }
3819
3820 TEST(F32_DWCONV_UP8X25__AVX_ACC2, multipixel_with_qmin) {
3821 TEST_REQUIRES_X86_AVX;
3822 for (size_t channels = 1; channels <= 40; channels += 7) {
3823 DWConvMicrokernelTester()
3824 .cr(8)
3825 .kr(25)
3826 .channels(channels)
3827 .width(3)
3828 .qmin(128)
3829 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3830 }
3831 }
3832
3833 TEST(F32_DWCONV_UP8X25__AVX_ACC2, multipixel_with_qmax) {
3834 TEST_REQUIRES_X86_AVX;
3835 for (size_t channels = 1; channels <= 40; channels += 7) {
3836 DWConvMicrokernelTester()
3837 .cr(8)
3838 .kr(25)
3839 .channels(channels)
3840 .width(3)
3841 .qmax(128)
3842 .Test(xnn_f32_dwconv_ukernel_up8x25__avx_acc2);
3843 }
3844 }
3845#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3846
3847
3848#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3849 TEST(F32_DWCONV_UP16X25__AVX, c_eq_16) {
3850 TEST_REQUIRES_X86_AVX;
3851 DWConvMicrokernelTester()
3852 .cr(16)
3853 .kr(25)
3854 .channels(16)
3855 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3856 }
3857
3858 TEST(F32_DWCONV_UP16X25__AVX, c_div_16) {
3859 TEST_REQUIRES_X86_AVX;
3860 for (uint32_t channels = 32; channels < 256; channels += 48) {
3861 DWConvMicrokernelTester()
3862 .cr(16)
3863 .kr(25)
3864 .channels(channels)
3865 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3866 }
3867 }
3868
3869 TEST(F32_DWCONV_UP16X25__AVX, c_div_16_with_qmin) {
3870 TEST_REQUIRES_X86_AVX;
3871 for (uint32_t channels = 32; channels < 256; channels += 48) {
3872 DWConvMicrokernelTester()
3873 .cr(16)
3874 .kr(25)
3875 .channels(channels)
3876 .qmin(128)
3877 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3878 }
3879 }
3880
3881 TEST(F32_DWCONV_UP16X25__AVX, c_div_16_with_qmax) {
3882 TEST_REQUIRES_X86_AVX;
3883 for (uint32_t channels = 32; channels < 256; channels += 48) {
3884 DWConvMicrokernelTester()
3885 .cr(16)
3886 .kr(25)
3887 .channels(channels)
3888 .qmax(128)
3889 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3890 }
3891 }
3892
3893 TEST(F32_DWCONV_UP16X25__AVX, c_lt_16) {
3894 TEST_REQUIRES_X86_AVX;
3895 for (uint32_t channels = 1; channels < 16; channels++) {
3896 DWConvMicrokernelTester()
3897 .cr(16)
3898 .kr(25)
3899 .channels(channels)
3900 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3901 }
3902 }
3903
3904 TEST(F32_DWCONV_UP16X25__AVX, c_gt_16) {
3905 TEST_REQUIRES_X86_AVX;
3906 for (uint32_t channels = 17; channels < 32; channels++) {
3907 DWConvMicrokernelTester()
3908 .cr(16)
3909 .kr(25)
3910 .channels(channels)
3911 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3912 }
3913 }
3914
3915 TEST(F32_DWCONV_UP16X25__AVX, c_gt_16_with_qmin) {
3916 TEST_REQUIRES_X86_AVX;
3917 for (uint32_t channels = 17; channels < 32; channels++) {
3918 DWConvMicrokernelTester()
3919 .cr(16)
3920 .kr(25)
3921 .channels(channels)
3922 .qmin(128)
3923 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3924 }
3925 }
3926
3927 TEST(F32_DWCONV_UP16X25__AVX, c_gt_16_with_qmax) {
3928 TEST_REQUIRES_X86_AVX;
3929 for (uint32_t channels = 17; channels < 32; channels++) {
3930 DWConvMicrokernelTester()
3931 .cr(16)
3932 .kr(25)
3933 .channels(channels)
3934 .qmax(128)
3935 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3936 }
3937 }
3938
3939 TEST(F32_DWCONV_UP16X25__AVX, multipixel) {
3940 TEST_REQUIRES_X86_AVX;
3941 for (size_t channels = 1; channels <= 80; channels += 15) {
3942 DWConvMicrokernelTester()
3943 .cr(16)
3944 .kr(25)
3945 .channels(channels)
3946 .width(3)
3947 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3948 }
3949 }
3950
3951 TEST(F32_DWCONV_UP16X25__AVX, multipixel_with_step) {
3952 TEST_REQUIRES_X86_AVX;
3953 for (size_t channels = 1; channels <= 80; channels += 15) {
3954 for (size_t step = 2; step <= 25; step++) {
3955 DWConvMicrokernelTester()
3956 .cr(16)
3957 .kr(25)
3958 .channels(channels)
3959 .width(3)
3960 .step(step)
3961 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3962 }
3963 }
3964 }
3965
3966 TEST(F32_DWCONV_UP16X25__AVX, multipixel_with_output_stride) {
3967 TEST_REQUIRES_X86_AVX;
3968 for (size_t channels = 1; channels <= 80; channels += 15) {
3969 DWConvMicrokernelTester()
3970 .cr(16)
3971 .kr(25)
3972 .channels(16)
3973 .width(5)
3974 .output_stride(83)
3975 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3976 }
3977 }
3978
3979 TEST(F32_DWCONV_UP16X25__AVX, multipixel_with_qmin) {
3980 TEST_REQUIRES_X86_AVX;
3981 for (size_t channels = 1; channels <= 80; channels += 15) {
3982 DWConvMicrokernelTester()
3983 .cr(16)
3984 .kr(25)
3985 .channels(channels)
3986 .width(3)
3987 .qmin(128)
3988 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
3989 }
3990 }
3991
3992 TEST(F32_DWCONV_UP16X25__AVX, multipixel_with_qmax) {
3993 TEST_REQUIRES_X86_AVX;
3994 for (size_t channels = 1; channels <= 80; channels += 15) {
3995 DWConvMicrokernelTester()
3996 .cr(16)
3997 .kr(25)
3998 .channels(channels)
3999 .width(3)
4000 .qmax(128)
4001 .Test(xnn_f32_dwconv_ukernel_up16x25__avx);
4002 }
4003 }
4004#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4005
4006
4007#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4008 TEST(F32_DWCONV_UP16X25__AVX_ACC2, c_eq_16) {
4009 TEST_REQUIRES_X86_AVX;
4010 DWConvMicrokernelTester()
4011 .cr(16)
4012 .kr(25)
4013 .channels(16)
4014 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4015 }
4016
4017 TEST(F32_DWCONV_UP16X25__AVX_ACC2, c_div_16) {
4018 TEST_REQUIRES_X86_AVX;
4019 for (uint32_t channels = 32; channels < 256; channels += 48) {
4020 DWConvMicrokernelTester()
4021 .cr(16)
4022 .kr(25)
4023 .channels(channels)
4024 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4025 }
4026 }
4027
4028 TEST(F32_DWCONV_UP16X25__AVX_ACC2, c_div_16_with_qmin) {
4029 TEST_REQUIRES_X86_AVX;
4030 for (uint32_t channels = 32; channels < 256; channels += 48) {
4031 DWConvMicrokernelTester()
4032 .cr(16)
4033 .kr(25)
4034 .channels(channels)
4035 .qmin(128)
4036 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4037 }
4038 }
4039
4040 TEST(F32_DWCONV_UP16X25__AVX_ACC2, c_div_16_with_qmax) {
4041 TEST_REQUIRES_X86_AVX;
4042 for (uint32_t channels = 32; channels < 256; channels += 48) {
4043 DWConvMicrokernelTester()
4044 .cr(16)
4045 .kr(25)
4046 .channels(channels)
4047 .qmax(128)
4048 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4049 }
4050 }
4051
4052 TEST(F32_DWCONV_UP16X25__AVX_ACC2, c_lt_16) {
4053 TEST_REQUIRES_X86_AVX;
4054 for (uint32_t channels = 1; channels < 16; channels++) {
4055 DWConvMicrokernelTester()
4056 .cr(16)
4057 .kr(25)
4058 .channels(channels)
4059 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4060 }
4061 }
4062
4063 TEST(F32_DWCONV_UP16X25__AVX_ACC2, c_gt_16) {
4064 TEST_REQUIRES_X86_AVX;
4065 for (uint32_t channels = 17; channels < 32; channels++) {
4066 DWConvMicrokernelTester()
4067 .cr(16)
4068 .kr(25)
4069 .channels(channels)
4070 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4071 }
4072 }
4073
4074 TEST(F32_DWCONV_UP16X25__AVX_ACC2, c_gt_16_with_qmin) {
4075 TEST_REQUIRES_X86_AVX;
4076 for (uint32_t channels = 17; channels < 32; channels++) {
4077 DWConvMicrokernelTester()
4078 .cr(16)
4079 .kr(25)
4080 .channels(channels)
4081 .qmin(128)
4082 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4083 }
4084 }
4085
4086 TEST(F32_DWCONV_UP16X25__AVX_ACC2, c_gt_16_with_qmax) {
4087 TEST_REQUIRES_X86_AVX;
4088 for (uint32_t channels = 17; channels < 32; channels++) {
4089 DWConvMicrokernelTester()
4090 .cr(16)
4091 .kr(25)
4092 .channels(channels)
4093 .qmax(128)
4094 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4095 }
4096 }
4097
4098 TEST(F32_DWCONV_UP16X25__AVX_ACC2, multipixel) {
4099 TEST_REQUIRES_X86_AVX;
4100 for (size_t channels = 1; channels <= 80; channels += 15) {
4101 DWConvMicrokernelTester()
4102 .cr(16)
4103 .kr(25)
4104 .channels(channels)
4105 .width(3)
4106 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4107 }
4108 }
4109
4110 TEST(F32_DWCONV_UP16X25__AVX_ACC2, multipixel_with_step) {
4111 TEST_REQUIRES_X86_AVX;
4112 for (size_t channels = 1; channels <= 80; channels += 15) {
4113 for (size_t step = 2; step <= 25; step++) {
4114 DWConvMicrokernelTester()
4115 .cr(16)
4116 .kr(25)
4117 .channels(channels)
4118 .width(3)
4119 .step(step)
4120 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4121 }
4122 }
4123 }
4124
4125 TEST(F32_DWCONV_UP16X25__AVX_ACC2, multipixel_with_output_stride) {
4126 TEST_REQUIRES_X86_AVX;
4127 for (size_t channels = 1; channels <= 80; channels += 15) {
4128 DWConvMicrokernelTester()
4129 .cr(16)
4130 .kr(25)
4131 .channels(16)
4132 .width(5)
4133 .output_stride(83)
4134 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4135 }
4136 }
4137
4138 TEST(F32_DWCONV_UP16X25__AVX_ACC2, multipixel_with_qmin) {
4139 TEST_REQUIRES_X86_AVX;
4140 for (size_t channels = 1; channels <= 80; channels += 15) {
4141 DWConvMicrokernelTester()
4142 .cr(16)
4143 .kr(25)
4144 .channels(channels)
4145 .width(3)
4146 .qmin(128)
4147 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4148 }
4149 }
4150
4151 TEST(F32_DWCONV_UP16X25__AVX_ACC2, multipixel_with_qmax) {
4152 TEST_REQUIRES_X86_AVX;
4153 for (size_t channels = 1; channels <= 80; channels += 15) {
4154 DWConvMicrokernelTester()
4155 .cr(16)
4156 .kr(25)
4157 .channels(channels)
4158 .width(3)
4159 .qmax(128)
4160 .Test(xnn_f32_dwconv_ukernel_up16x25__avx_acc2);
4161 }
4162 }
4163#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4164
4165
4166#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4167 TEST(F32_DWCONV_UP8X9__AVX, c_eq_8) {
4168 TEST_REQUIRES_X86_AVX;
4169 DWConvMicrokernelTester()
4170 .cr(8)
4171 .kr(9)
4172 .channels(8)
4173 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4174 }
4175
4176 TEST(F32_DWCONV_UP8X9__AVX, c_div_8) {
4177 TEST_REQUIRES_X86_AVX;
4178 for (uint32_t channels = 16; channels < 128; channels += 24) {
4179 DWConvMicrokernelTester()
4180 .cr(8)
4181 .kr(9)
4182 .channels(channels)
4183 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4184 }
4185 }
4186
4187 TEST(F32_DWCONV_UP8X9__AVX, c_div_8_with_qmin) {
4188 TEST_REQUIRES_X86_AVX;
4189 for (uint32_t channels = 16; channels < 128; channels += 24) {
4190 DWConvMicrokernelTester()
4191 .cr(8)
4192 .kr(9)
4193 .channels(channels)
4194 .qmin(128)
4195 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4196 }
4197 }
4198
4199 TEST(F32_DWCONV_UP8X9__AVX, c_div_8_with_qmax) {
4200 TEST_REQUIRES_X86_AVX;
4201 for (uint32_t channels = 16; channels < 128; channels += 24) {
4202 DWConvMicrokernelTester()
4203 .cr(8)
4204 .kr(9)
4205 .channels(channels)
4206 .qmax(128)
4207 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4208 }
4209 }
4210
4211 TEST(F32_DWCONV_UP8X9__AVX, c_lt_8) {
4212 TEST_REQUIRES_X86_AVX;
4213 for (uint32_t channels = 1; channels < 8; channels++) {
4214 DWConvMicrokernelTester()
4215 .cr(8)
4216 .kr(9)
4217 .channels(channels)
4218 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4219 }
4220 }
4221
4222 TEST(F32_DWCONV_UP8X9__AVX, c_gt_8) {
4223 TEST_REQUIRES_X86_AVX;
4224 for (uint32_t channels = 9; channels < 16; channels++) {
4225 DWConvMicrokernelTester()
4226 .cr(8)
4227 .kr(9)
4228 .channels(channels)
4229 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4230 }
4231 }
4232
4233 TEST(F32_DWCONV_UP8X9__AVX, c_gt_8_with_qmin) {
4234 TEST_REQUIRES_X86_AVX;
4235 for (uint32_t channels = 9; channels < 16; channels++) {
4236 DWConvMicrokernelTester()
4237 .cr(8)
4238 .kr(9)
4239 .channels(channels)
4240 .qmin(128)
4241 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4242 }
4243 }
4244
4245 TEST(F32_DWCONV_UP8X9__AVX, c_gt_8_with_qmax) {
4246 TEST_REQUIRES_X86_AVX;
4247 for (uint32_t channels = 9; channels < 16; channels++) {
4248 DWConvMicrokernelTester()
4249 .cr(8)
4250 .kr(9)
4251 .channels(channels)
4252 .qmax(128)
4253 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4254 }
4255 }
4256
4257 TEST(F32_DWCONV_UP8X9__AVX, multipixel) {
4258 TEST_REQUIRES_X86_AVX;
4259 for (size_t channels = 1; channels <= 40; channels += 7) {
4260 DWConvMicrokernelTester()
4261 .cr(8)
4262 .kr(9)
4263 .channels(channels)
4264 .width(3)
4265 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4266 }
4267 }
4268
4269 TEST(F32_DWCONV_UP8X9__AVX, multipixel_with_step) {
4270 TEST_REQUIRES_X86_AVX;
4271 for (size_t channels = 1; channels <= 40; channels += 7) {
4272 for (size_t step = 2; step <= 9; step++) {
4273 DWConvMicrokernelTester()
4274 .cr(8)
4275 .kr(9)
4276 .channels(channels)
4277 .width(3)
4278 .step(step)
4279 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4280 }
4281 }
4282 }
4283
4284 TEST(F32_DWCONV_UP8X9__AVX, multipixel_with_output_stride) {
4285 TEST_REQUIRES_X86_AVX;
4286 for (size_t channels = 1; channels <= 40; channels += 7) {
4287 DWConvMicrokernelTester()
4288 .cr(8)
4289 .kr(9)
4290 .channels(8)
4291 .width(5)
4292 .output_stride(43)
4293 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4294 }
4295 }
4296
4297 TEST(F32_DWCONV_UP8X9__AVX, multipixel_with_qmin) {
4298 TEST_REQUIRES_X86_AVX;
4299 for (size_t channels = 1; channels <= 40; channels += 7) {
4300 DWConvMicrokernelTester()
4301 .cr(8)
4302 .kr(9)
4303 .channels(channels)
4304 .width(3)
4305 .qmin(128)
4306 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4307 }
4308 }
4309
4310 TEST(F32_DWCONV_UP8X9__AVX, multipixel_with_qmax) {
4311 TEST_REQUIRES_X86_AVX;
4312 for (size_t channels = 1; channels <= 40; channels += 7) {
4313 DWConvMicrokernelTester()
4314 .cr(8)
4315 .kr(9)
4316 .channels(channels)
4317 .width(3)
4318 .qmax(128)
4319 .Test(xnn_f32_dwconv_ukernel_up8x9__avx);
4320 }
4321 }
4322#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4323
4324
4325#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4326 TEST(F32_DWCONV_UP8X9__AVX_ACC2, c_eq_8) {
4327 TEST_REQUIRES_X86_AVX;
4328 DWConvMicrokernelTester()
4329 .cr(8)
4330 .kr(9)
4331 .channels(8)
4332 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4333 }
4334
4335 TEST(F32_DWCONV_UP8X9__AVX_ACC2, c_div_8) {
4336 TEST_REQUIRES_X86_AVX;
4337 for (uint32_t channels = 16; channels < 128; channels += 24) {
4338 DWConvMicrokernelTester()
4339 .cr(8)
4340 .kr(9)
4341 .channels(channels)
4342 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4343 }
4344 }
4345
4346 TEST(F32_DWCONV_UP8X9__AVX_ACC2, c_div_8_with_qmin) {
4347 TEST_REQUIRES_X86_AVX;
4348 for (uint32_t channels = 16; channels < 128; channels += 24) {
4349 DWConvMicrokernelTester()
4350 .cr(8)
4351 .kr(9)
4352 .channels(channels)
4353 .qmin(128)
4354 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4355 }
4356 }
4357
4358 TEST(F32_DWCONV_UP8X9__AVX_ACC2, c_div_8_with_qmax) {
4359 TEST_REQUIRES_X86_AVX;
4360 for (uint32_t channels = 16; channels < 128; channels += 24) {
4361 DWConvMicrokernelTester()
4362 .cr(8)
4363 .kr(9)
4364 .channels(channels)
4365 .qmax(128)
4366 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4367 }
4368 }
4369
4370 TEST(F32_DWCONV_UP8X9__AVX_ACC2, c_lt_8) {
4371 TEST_REQUIRES_X86_AVX;
4372 for (uint32_t channels = 1; channels < 8; channels++) {
4373 DWConvMicrokernelTester()
4374 .cr(8)
4375 .kr(9)
4376 .channels(channels)
4377 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4378 }
4379 }
4380
4381 TEST(F32_DWCONV_UP8X9__AVX_ACC2, c_gt_8) {
4382 TEST_REQUIRES_X86_AVX;
4383 for (uint32_t channels = 9; channels < 16; channels++) {
4384 DWConvMicrokernelTester()
4385 .cr(8)
4386 .kr(9)
4387 .channels(channels)
4388 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4389 }
4390 }
4391
4392 TEST(F32_DWCONV_UP8X9__AVX_ACC2, c_gt_8_with_qmin) {
4393 TEST_REQUIRES_X86_AVX;
4394 for (uint32_t channels = 9; channels < 16; channels++) {
4395 DWConvMicrokernelTester()
4396 .cr(8)
4397 .kr(9)
4398 .channels(channels)
4399 .qmin(128)
4400 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4401 }
4402 }
4403
4404 TEST(F32_DWCONV_UP8X9__AVX_ACC2, c_gt_8_with_qmax) {
4405 TEST_REQUIRES_X86_AVX;
4406 for (uint32_t channels = 9; channels < 16; channels++) {
4407 DWConvMicrokernelTester()
4408 .cr(8)
4409 .kr(9)
4410 .channels(channels)
4411 .qmax(128)
4412 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4413 }
4414 }
4415
4416 TEST(F32_DWCONV_UP8X9__AVX_ACC2, multipixel) {
4417 TEST_REQUIRES_X86_AVX;
4418 for (size_t channels = 1; channels <= 40; channels += 7) {
4419 DWConvMicrokernelTester()
4420 .cr(8)
4421 .kr(9)
4422 .channels(channels)
4423 .width(3)
4424 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4425 }
4426 }
4427
4428 TEST(F32_DWCONV_UP8X9__AVX_ACC2, multipixel_with_step) {
4429 TEST_REQUIRES_X86_AVX;
4430 for (size_t channels = 1; channels <= 40; channels += 7) {
4431 for (size_t step = 2; step <= 9; step++) {
4432 DWConvMicrokernelTester()
4433 .cr(8)
4434 .kr(9)
4435 .channels(channels)
4436 .width(3)
4437 .step(step)
4438 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4439 }
4440 }
4441 }
4442
4443 TEST(F32_DWCONV_UP8X9__AVX_ACC2, multipixel_with_output_stride) {
4444 TEST_REQUIRES_X86_AVX;
4445 for (size_t channels = 1; channels <= 40; channels += 7) {
4446 DWConvMicrokernelTester()
4447 .cr(8)
4448 .kr(9)
4449 .channels(8)
4450 .width(5)
4451 .output_stride(43)
4452 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4453 }
4454 }
4455
4456 TEST(F32_DWCONV_UP8X9__AVX_ACC2, multipixel_with_qmin) {
4457 TEST_REQUIRES_X86_AVX;
4458 for (size_t channels = 1; channels <= 40; channels += 7) {
4459 DWConvMicrokernelTester()
4460 .cr(8)
4461 .kr(9)
4462 .channels(channels)
4463 .width(3)
4464 .qmin(128)
4465 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4466 }
4467 }
4468
4469 TEST(F32_DWCONV_UP8X9__AVX_ACC2, multipixel_with_qmax) {
4470 TEST_REQUIRES_X86_AVX;
4471 for (size_t channels = 1; channels <= 40; channels += 7) {
4472 DWConvMicrokernelTester()
4473 .cr(8)
4474 .kr(9)
4475 .channels(channels)
4476 .width(3)
4477 .qmax(128)
4478 .Test(xnn_f32_dwconv_ukernel_up8x9__avx_acc2);
4479 }
4480 }
4481#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4482
4483
4484#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4485 TEST(F32_DWCONV_UP16X9__AVX, c_eq_16) {
4486 TEST_REQUIRES_X86_AVX;
4487 DWConvMicrokernelTester()
4488 .cr(16)
4489 .kr(9)
4490 .channels(16)
4491 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4492 }
4493
4494 TEST(F32_DWCONV_UP16X9__AVX, c_div_16) {
4495 TEST_REQUIRES_X86_AVX;
4496 for (uint32_t channels = 32; channels < 256; channels += 48) {
4497 DWConvMicrokernelTester()
4498 .cr(16)
4499 .kr(9)
4500 .channels(channels)
4501 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4502 }
4503 }
4504
4505 TEST(F32_DWCONV_UP16X9__AVX, c_div_16_with_qmin) {
4506 TEST_REQUIRES_X86_AVX;
4507 for (uint32_t channels = 32; channels < 256; channels += 48) {
4508 DWConvMicrokernelTester()
4509 .cr(16)
4510 .kr(9)
4511 .channels(channels)
4512 .qmin(128)
4513 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4514 }
4515 }
4516
4517 TEST(F32_DWCONV_UP16X9__AVX, c_div_16_with_qmax) {
4518 TEST_REQUIRES_X86_AVX;
4519 for (uint32_t channels = 32; channels < 256; channels += 48) {
4520 DWConvMicrokernelTester()
4521 .cr(16)
4522 .kr(9)
4523 .channels(channels)
4524 .qmax(128)
4525 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4526 }
4527 }
4528
4529 TEST(F32_DWCONV_UP16X9__AVX, c_lt_16) {
4530 TEST_REQUIRES_X86_AVX;
4531 for (uint32_t channels = 1; channels < 16; channels++) {
4532 DWConvMicrokernelTester()
4533 .cr(16)
4534 .kr(9)
4535 .channels(channels)
4536 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4537 }
4538 }
4539
4540 TEST(F32_DWCONV_UP16X9__AVX, c_gt_16) {
4541 TEST_REQUIRES_X86_AVX;
4542 for (uint32_t channels = 17; channels < 32; channels++) {
4543 DWConvMicrokernelTester()
4544 .cr(16)
4545 .kr(9)
4546 .channels(channels)
4547 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4548 }
4549 }
4550
4551 TEST(F32_DWCONV_UP16X9__AVX, c_gt_16_with_qmin) {
4552 TEST_REQUIRES_X86_AVX;
4553 for (uint32_t channels = 17; channels < 32; channels++) {
4554 DWConvMicrokernelTester()
4555 .cr(16)
4556 .kr(9)
4557 .channels(channels)
4558 .qmin(128)
4559 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4560 }
4561 }
4562
4563 TEST(F32_DWCONV_UP16X9__AVX, c_gt_16_with_qmax) {
4564 TEST_REQUIRES_X86_AVX;
4565 for (uint32_t channels = 17; channels < 32; channels++) {
4566 DWConvMicrokernelTester()
4567 .cr(16)
4568 .kr(9)
4569 .channels(channels)
4570 .qmax(128)
4571 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4572 }
4573 }
4574
4575 TEST(F32_DWCONV_UP16X9__AVX, multipixel) {
4576 TEST_REQUIRES_X86_AVX;
4577 for (size_t channels = 1; channels <= 80; channels += 15) {
4578 DWConvMicrokernelTester()
4579 .cr(16)
4580 .kr(9)
4581 .channels(channels)
4582 .width(3)
4583 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4584 }
4585 }
4586
4587 TEST(F32_DWCONV_UP16X9__AVX, multipixel_with_step) {
4588 TEST_REQUIRES_X86_AVX;
4589 for (size_t channels = 1; channels <= 80; channels += 15) {
4590 for (size_t step = 2; step <= 9; step++) {
4591 DWConvMicrokernelTester()
4592 .cr(16)
4593 .kr(9)
4594 .channels(channels)
4595 .width(3)
4596 .step(step)
4597 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4598 }
4599 }
4600 }
4601
4602 TEST(F32_DWCONV_UP16X9__AVX, multipixel_with_output_stride) {
4603 TEST_REQUIRES_X86_AVX;
4604 for (size_t channels = 1; channels <= 80; channels += 15) {
4605 DWConvMicrokernelTester()
4606 .cr(16)
4607 .kr(9)
4608 .channels(16)
4609 .width(5)
4610 .output_stride(83)
4611 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4612 }
4613 }
4614
4615 TEST(F32_DWCONV_UP16X9__AVX, multipixel_with_qmin) {
4616 TEST_REQUIRES_X86_AVX;
4617 for (size_t channels = 1; channels <= 80; channels += 15) {
4618 DWConvMicrokernelTester()
4619 .cr(16)
4620 .kr(9)
4621 .channels(channels)
4622 .width(3)
4623 .qmin(128)
4624 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4625 }
4626 }
4627
4628 TEST(F32_DWCONV_UP16X9__AVX, multipixel_with_qmax) {
4629 TEST_REQUIRES_X86_AVX;
4630 for (size_t channels = 1; channels <= 80; channels += 15) {
4631 DWConvMicrokernelTester()
4632 .cr(16)
4633 .kr(9)
4634 .channels(channels)
4635 .width(3)
4636 .qmax(128)
4637 .Test(xnn_f32_dwconv_ukernel_up16x9__avx);
4638 }
4639 }
4640#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4641
4642
4643#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4644 TEST(F32_DWCONV_UP16X9__AVX_ACC2, c_eq_16) {
4645 TEST_REQUIRES_X86_AVX;
4646 DWConvMicrokernelTester()
4647 .cr(16)
4648 .kr(9)
4649 .channels(16)
4650 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4651 }
4652
4653 TEST(F32_DWCONV_UP16X9__AVX_ACC2, c_div_16) {
4654 TEST_REQUIRES_X86_AVX;
4655 for (uint32_t channels = 32; channels < 256; channels += 48) {
4656 DWConvMicrokernelTester()
4657 .cr(16)
4658 .kr(9)
4659 .channels(channels)
4660 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4661 }
4662 }
4663
4664 TEST(F32_DWCONV_UP16X9__AVX_ACC2, c_div_16_with_qmin) {
4665 TEST_REQUIRES_X86_AVX;
4666 for (uint32_t channels = 32; channels < 256; channels += 48) {
4667 DWConvMicrokernelTester()
4668 .cr(16)
4669 .kr(9)
4670 .channels(channels)
4671 .qmin(128)
4672 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4673 }
4674 }
4675
4676 TEST(F32_DWCONV_UP16X9__AVX_ACC2, c_div_16_with_qmax) {
4677 TEST_REQUIRES_X86_AVX;
4678 for (uint32_t channels = 32; channels < 256; channels += 48) {
4679 DWConvMicrokernelTester()
4680 .cr(16)
4681 .kr(9)
4682 .channels(channels)
4683 .qmax(128)
4684 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4685 }
4686 }
4687
4688 TEST(F32_DWCONV_UP16X9__AVX_ACC2, c_lt_16) {
4689 TEST_REQUIRES_X86_AVX;
4690 for (uint32_t channels = 1; channels < 16; channels++) {
4691 DWConvMicrokernelTester()
4692 .cr(16)
4693 .kr(9)
4694 .channels(channels)
4695 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4696 }
4697 }
4698
4699 TEST(F32_DWCONV_UP16X9__AVX_ACC2, c_gt_16) {
4700 TEST_REQUIRES_X86_AVX;
4701 for (uint32_t channels = 17; channels < 32; channels++) {
4702 DWConvMicrokernelTester()
4703 .cr(16)
4704 .kr(9)
4705 .channels(channels)
4706 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4707 }
4708 }
4709
4710 TEST(F32_DWCONV_UP16X9__AVX_ACC2, c_gt_16_with_qmin) {
4711 TEST_REQUIRES_X86_AVX;
4712 for (uint32_t channels = 17; channels < 32; channels++) {
4713 DWConvMicrokernelTester()
4714 .cr(16)
4715 .kr(9)
4716 .channels(channels)
4717 .qmin(128)
4718 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4719 }
4720 }
4721
4722 TEST(F32_DWCONV_UP16X9__AVX_ACC2, c_gt_16_with_qmax) {
4723 TEST_REQUIRES_X86_AVX;
4724 for (uint32_t channels = 17; channels < 32; channels++) {
4725 DWConvMicrokernelTester()
4726 .cr(16)
4727 .kr(9)
4728 .channels(channels)
4729 .qmax(128)
4730 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4731 }
4732 }
4733
4734 TEST(F32_DWCONV_UP16X9__AVX_ACC2, multipixel) {
4735 TEST_REQUIRES_X86_AVX;
4736 for (size_t channels = 1; channels <= 80; channels += 15) {
4737 DWConvMicrokernelTester()
4738 .cr(16)
4739 .kr(9)
4740 .channels(channels)
4741 .width(3)
4742 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4743 }
4744 }
4745
4746 TEST(F32_DWCONV_UP16X9__AVX_ACC2, multipixel_with_step) {
4747 TEST_REQUIRES_X86_AVX;
4748 for (size_t channels = 1; channels <= 80; channels += 15) {
4749 for (size_t step = 2; step <= 9; step++) {
4750 DWConvMicrokernelTester()
4751 .cr(16)
4752 .kr(9)
4753 .channels(channels)
4754 .width(3)
4755 .step(step)
4756 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4757 }
4758 }
4759 }
4760
4761 TEST(F32_DWCONV_UP16X9__AVX_ACC2, multipixel_with_output_stride) {
4762 TEST_REQUIRES_X86_AVX;
4763 for (size_t channels = 1; channels <= 80; channels += 15) {
4764 DWConvMicrokernelTester()
4765 .cr(16)
4766 .kr(9)
4767 .channels(16)
4768 .width(5)
4769 .output_stride(83)
4770 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4771 }
4772 }
4773
4774 TEST(F32_DWCONV_UP16X9__AVX_ACC2, multipixel_with_qmin) {
4775 TEST_REQUIRES_X86_AVX;
4776 for (size_t channels = 1; channels <= 80; channels += 15) {
4777 DWConvMicrokernelTester()
4778 .cr(16)
4779 .kr(9)
4780 .channels(channels)
4781 .width(3)
4782 .qmin(128)
4783 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4784 }
4785 }
4786
4787 TEST(F32_DWCONV_UP16X9__AVX_ACC2, multipixel_with_qmax) {
4788 TEST_REQUIRES_X86_AVX;
4789 for (size_t channels = 1; channels <= 80; channels += 15) {
4790 DWConvMicrokernelTester()
4791 .cr(16)
4792 .kr(9)
4793 .channels(channels)
4794 .width(3)
4795 .qmax(128)
4796 .Test(xnn_f32_dwconv_ukernel_up16x9__avx_acc2);
4797 }
4798 }
4799#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4800
4801
4802#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4803 TEST(F32_DWCONV_UP8X4__AVX, c_eq_8) {
4804 TEST_REQUIRES_X86_AVX;
4805 DWConvMicrokernelTester()
4806 .cr(8)
4807 .kr(4)
4808 .channels(8)
4809 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4810 }
4811
4812 TEST(F32_DWCONV_UP8X4__AVX, c_div_8) {
4813 TEST_REQUIRES_X86_AVX;
4814 for (uint32_t channels = 16; channels < 128; channels += 24) {
4815 DWConvMicrokernelTester()
4816 .cr(8)
4817 .kr(4)
4818 .channels(channels)
4819 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4820 }
4821 }
4822
4823 TEST(F32_DWCONV_UP8X4__AVX, c_div_8_with_qmin) {
4824 TEST_REQUIRES_X86_AVX;
4825 for (uint32_t channels = 16; channels < 128; channels += 24) {
4826 DWConvMicrokernelTester()
4827 .cr(8)
4828 .kr(4)
4829 .channels(channels)
4830 .qmin(128)
4831 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4832 }
4833 }
4834
4835 TEST(F32_DWCONV_UP8X4__AVX, c_div_8_with_qmax) {
4836 TEST_REQUIRES_X86_AVX;
4837 for (uint32_t channels = 16; channels < 128; channels += 24) {
4838 DWConvMicrokernelTester()
4839 .cr(8)
4840 .kr(4)
4841 .channels(channels)
4842 .qmax(128)
4843 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4844 }
4845 }
4846
4847 TEST(F32_DWCONV_UP8X4__AVX, c_lt_8) {
4848 TEST_REQUIRES_X86_AVX;
4849 for (uint32_t channels = 1; channels < 8; channels++) {
4850 DWConvMicrokernelTester()
4851 .cr(8)
4852 .kr(4)
4853 .channels(channels)
4854 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4855 }
4856 }
4857
4858 TEST(F32_DWCONV_UP8X4__AVX, c_gt_8) {
4859 TEST_REQUIRES_X86_AVX;
4860 for (uint32_t channels = 9; channels < 16; channels++) {
4861 DWConvMicrokernelTester()
4862 .cr(8)
4863 .kr(4)
4864 .channels(channels)
4865 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4866 }
4867 }
4868
4869 TEST(F32_DWCONV_UP8X4__AVX, c_gt_8_with_qmin) {
4870 TEST_REQUIRES_X86_AVX;
4871 for (uint32_t channels = 9; channels < 16; channels++) {
4872 DWConvMicrokernelTester()
4873 .cr(8)
4874 .kr(4)
4875 .channels(channels)
4876 .qmin(128)
4877 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4878 }
4879 }
4880
4881 TEST(F32_DWCONV_UP8X4__AVX, c_gt_8_with_qmax) {
4882 TEST_REQUIRES_X86_AVX;
4883 for (uint32_t channels = 9; channels < 16; channels++) {
4884 DWConvMicrokernelTester()
4885 .cr(8)
4886 .kr(4)
4887 .channels(channels)
4888 .qmax(128)
4889 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4890 }
4891 }
4892
4893 TEST(F32_DWCONV_UP8X4__AVX, multipixel) {
4894 TEST_REQUIRES_X86_AVX;
4895 for (size_t channels = 1; channels <= 40; channels += 7) {
4896 DWConvMicrokernelTester()
4897 .cr(8)
4898 .kr(4)
4899 .channels(channels)
4900 .width(3)
4901 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4902 }
4903 }
4904
4905 TEST(F32_DWCONV_UP8X4__AVX, multipixel_with_step) {
4906 TEST_REQUIRES_X86_AVX;
4907 for (size_t channels = 1; channels <= 40; channels += 7) {
4908 for (size_t step = 2; step <= 4; step++) {
4909 DWConvMicrokernelTester()
4910 .cr(8)
4911 .kr(4)
4912 .channels(channels)
4913 .width(3)
4914 .step(step)
4915 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4916 }
4917 }
4918 }
4919
4920 TEST(F32_DWCONV_UP8X4__AVX, multipixel_with_output_stride) {
4921 TEST_REQUIRES_X86_AVX;
4922 for (size_t channels = 1; channels <= 40; channels += 7) {
4923 DWConvMicrokernelTester()
4924 .cr(8)
4925 .kr(4)
4926 .channels(8)
4927 .width(5)
4928 .output_stride(43)
4929 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4930 }
4931 }
4932
4933 TEST(F32_DWCONV_UP8X4__AVX, multipixel_with_qmin) {
4934 TEST_REQUIRES_X86_AVX;
4935 for (size_t channels = 1; channels <= 40; channels += 7) {
4936 DWConvMicrokernelTester()
4937 .cr(8)
4938 .kr(4)
4939 .channels(channels)
4940 .width(3)
4941 .qmin(128)
4942 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4943 }
4944 }
4945
4946 TEST(F32_DWCONV_UP8X4__AVX, multipixel_with_qmax) {
4947 TEST_REQUIRES_X86_AVX;
4948 for (size_t channels = 1; channels <= 40; channels += 7) {
4949 DWConvMicrokernelTester()
4950 .cr(8)
4951 .kr(4)
4952 .channels(channels)
4953 .width(3)
4954 .qmax(128)
4955 .Test(xnn_f32_dwconv_ukernel_up8x4__avx);
4956 }
4957 }
4958#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4959
4960
4961#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4962 TEST(F32_DWCONV_UP8X4__AVX_ACC2, c_eq_8) {
4963 TEST_REQUIRES_X86_AVX;
4964 DWConvMicrokernelTester()
4965 .cr(8)
4966 .kr(4)
4967 .channels(8)
4968 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
4969 }
4970
4971 TEST(F32_DWCONV_UP8X4__AVX_ACC2, c_div_8) {
4972 TEST_REQUIRES_X86_AVX;
4973 for (uint32_t channels = 16; channels < 128; channels += 24) {
4974 DWConvMicrokernelTester()
4975 .cr(8)
4976 .kr(4)
4977 .channels(channels)
4978 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
4979 }
4980 }
4981
4982 TEST(F32_DWCONV_UP8X4__AVX_ACC2, c_div_8_with_qmin) {
4983 TEST_REQUIRES_X86_AVX;
4984 for (uint32_t channels = 16; channels < 128; channels += 24) {
4985 DWConvMicrokernelTester()
4986 .cr(8)
4987 .kr(4)
4988 .channels(channels)
4989 .qmin(128)
4990 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
4991 }
4992 }
4993
4994 TEST(F32_DWCONV_UP8X4__AVX_ACC2, c_div_8_with_qmax) {
4995 TEST_REQUIRES_X86_AVX;
4996 for (uint32_t channels = 16; channels < 128; channels += 24) {
4997 DWConvMicrokernelTester()
4998 .cr(8)
4999 .kr(4)
5000 .channels(channels)
5001 .qmax(128)
5002 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5003 }
5004 }
5005
5006 TEST(F32_DWCONV_UP8X4__AVX_ACC2, c_lt_8) {
5007 TEST_REQUIRES_X86_AVX;
5008 for (uint32_t channels = 1; channels < 8; channels++) {
5009 DWConvMicrokernelTester()
5010 .cr(8)
5011 .kr(4)
5012 .channels(channels)
5013 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5014 }
5015 }
5016
5017 TEST(F32_DWCONV_UP8X4__AVX_ACC2, c_gt_8) {
5018 TEST_REQUIRES_X86_AVX;
5019 for (uint32_t channels = 9; channels < 16; channels++) {
5020 DWConvMicrokernelTester()
5021 .cr(8)
5022 .kr(4)
5023 .channels(channels)
5024 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5025 }
5026 }
5027
5028 TEST(F32_DWCONV_UP8X4__AVX_ACC2, c_gt_8_with_qmin) {
5029 TEST_REQUIRES_X86_AVX;
5030 for (uint32_t channels = 9; channels < 16; channels++) {
5031 DWConvMicrokernelTester()
5032 .cr(8)
5033 .kr(4)
5034 .channels(channels)
5035 .qmin(128)
5036 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5037 }
5038 }
5039
5040 TEST(F32_DWCONV_UP8X4__AVX_ACC2, c_gt_8_with_qmax) {
5041 TEST_REQUIRES_X86_AVX;
5042 for (uint32_t channels = 9; channels < 16; channels++) {
5043 DWConvMicrokernelTester()
5044 .cr(8)
5045 .kr(4)
5046 .channels(channels)
5047 .qmax(128)
5048 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5049 }
5050 }
5051
5052 TEST(F32_DWCONV_UP8X4__AVX_ACC2, multipixel) {
5053 TEST_REQUIRES_X86_AVX;
5054 for (size_t channels = 1; channels <= 40; channels += 7) {
5055 DWConvMicrokernelTester()
5056 .cr(8)
5057 .kr(4)
5058 .channels(channels)
5059 .width(3)
5060 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5061 }
5062 }
5063
5064 TEST(F32_DWCONV_UP8X4__AVX_ACC2, multipixel_with_step) {
5065 TEST_REQUIRES_X86_AVX;
5066 for (size_t channels = 1; channels <= 40; channels += 7) {
5067 for (size_t step = 2; step <= 4; step++) {
5068 DWConvMicrokernelTester()
5069 .cr(8)
5070 .kr(4)
5071 .channels(channels)
5072 .width(3)
5073 .step(step)
5074 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5075 }
5076 }
5077 }
5078
5079 TEST(F32_DWCONV_UP8X4__AVX_ACC2, multipixel_with_output_stride) {
5080 TEST_REQUIRES_X86_AVX;
5081 for (size_t channels = 1; channels <= 40; channels += 7) {
5082 DWConvMicrokernelTester()
5083 .cr(8)
5084 .kr(4)
5085 .channels(8)
5086 .width(5)
5087 .output_stride(43)
5088 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5089 }
5090 }
5091
5092 TEST(F32_DWCONV_UP8X4__AVX_ACC2, multipixel_with_qmin) {
5093 TEST_REQUIRES_X86_AVX;
5094 for (size_t channels = 1; channels <= 40; channels += 7) {
5095 DWConvMicrokernelTester()
5096 .cr(8)
5097 .kr(4)
5098 .channels(channels)
5099 .width(3)
5100 .qmin(128)
5101 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5102 }
5103 }
5104
5105 TEST(F32_DWCONV_UP8X4__AVX_ACC2, multipixel_with_qmax) {
5106 TEST_REQUIRES_X86_AVX;
5107 for (size_t channels = 1; channels <= 40; channels += 7) {
5108 DWConvMicrokernelTester()
5109 .cr(8)
5110 .kr(4)
5111 .channels(channels)
5112 .width(3)
5113 .qmax(128)
5114 .Test(xnn_f32_dwconv_ukernel_up8x4__avx_acc2);
5115 }
5116 }
5117#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5118
5119
5120#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5121 TEST(F32_DWCONV_UP16X4__AVX, c_eq_16) {
5122 TEST_REQUIRES_X86_AVX;
5123 DWConvMicrokernelTester()
5124 .cr(16)
5125 .kr(4)
5126 .channels(16)
5127 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5128 }
5129
5130 TEST(F32_DWCONV_UP16X4__AVX, c_div_16) {
5131 TEST_REQUIRES_X86_AVX;
5132 for (uint32_t channels = 32; channels < 256; channels += 48) {
5133 DWConvMicrokernelTester()
5134 .cr(16)
5135 .kr(4)
5136 .channels(channels)
5137 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5138 }
5139 }
5140
5141 TEST(F32_DWCONV_UP16X4__AVX, c_div_16_with_qmin) {
5142 TEST_REQUIRES_X86_AVX;
5143 for (uint32_t channels = 32; channels < 256; channels += 48) {
5144 DWConvMicrokernelTester()
5145 .cr(16)
5146 .kr(4)
5147 .channels(channels)
5148 .qmin(128)
5149 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5150 }
5151 }
5152
5153 TEST(F32_DWCONV_UP16X4__AVX, c_div_16_with_qmax) {
5154 TEST_REQUIRES_X86_AVX;
5155 for (uint32_t channels = 32; channels < 256; channels += 48) {
5156 DWConvMicrokernelTester()
5157 .cr(16)
5158 .kr(4)
5159 .channels(channels)
5160 .qmax(128)
5161 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5162 }
5163 }
5164
5165 TEST(F32_DWCONV_UP16X4__AVX, c_lt_16) {
5166 TEST_REQUIRES_X86_AVX;
5167 for (uint32_t channels = 1; channels < 16; channels++) {
5168 DWConvMicrokernelTester()
5169 .cr(16)
5170 .kr(4)
5171 .channels(channels)
5172 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5173 }
5174 }
5175
5176 TEST(F32_DWCONV_UP16X4__AVX, c_gt_16) {
5177 TEST_REQUIRES_X86_AVX;
5178 for (uint32_t channels = 17; channels < 32; channels++) {
5179 DWConvMicrokernelTester()
5180 .cr(16)
5181 .kr(4)
5182 .channels(channels)
5183 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5184 }
5185 }
5186
5187 TEST(F32_DWCONV_UP16X4__AVX, c_gt_16_with_qmin) {
5188 TEST_REQUIRES_X86_AVX;
5189 for (uint32_t channels = 17; channels < 32; channels++) {
5190 DWConvMicrokernelTester()
5191 .cr(16)
5192 .kr(4)
5193 .channels(channels)
5194 .qmin(128)
5195 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5196 }
5197 }
5198
5199 TEST(F32_DWCONV_UP16X4__AVX, c_gt_16_with_qmax) {
5200 TEST_REQUIRES_X86_AVX;
5201 for (uint32_t channels = 17; channels < 32; channels++) {
5202 DWConvMicrokernelTester()
5203 .cr(16)
5204 .kr(4)
5205 .channels(channels)
5206 .qmax(128)
5207 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5208 }
5209 }
5210
5211 TEST(F32_DWCONV_UP16X4__AVX, multipixel) {
5212 TEST_REQUIRES_X86_AVX;
5213 for (size_t channels = 1; channels <= 80; channels += 15) {
5214 DWConvMicrokernelTester()
5215 .cr(16)
5216 .kr(4)
5217 .channels(channels)
5218 .width(3)
5219 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5220 }
5221 }
5222
5223 TEST(F32_DWCONV_UP16X4__AVX, multipixel_with_step) {
5224 TEST_REQUIRES_X86_AVX;
5225 for (size_t channels = 1; channels <= 80; channels += 15) {
5226 for (size_t step = 2; step <= 4; step++) {
5227 DWConvMicrokernelTester()
5228 .cr(16)
5229 .kr(4)
5230 .channels(channels)
5231 .width(3)
5232 .step(step)
5233 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5234 }
5235 }
5236 }
5237
5238 TEST(F32_DWCONV_UP16X4__AVX, multipixel_with_output_stride) {
5239 TEST_REQUIRES_X86_AVX;
5240 for (size_t channels = 1; channels <= 80; channels += 15) {
5241 DWConvMicrokernelTester()
5242 .cr(16)
5243 .kr(4)
5244 .channels(16)
5245 .width(5)
5246 .output_stride(83)
5247 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5248 }
5249 }
5250
5251 TEST(F32_DWCONV_UP16X4__AVX, multipixel_with_qmin) {
5252 TEST_REQUIRES_X86_AVX;
5253 for (size_t channels = 1; channels <= 80; channels += 15) {
5254 DWConvMicrokernelTester()
5255 .cr(16)
5256 .kr(4)
5257 .channels(channels)
5258 .width(3)
5259 .qmin(128)
5260 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5261 }
5262 }
5263
5264 TEST(F32_DWCONV_UP16X4__AVX, multipixel_with_qmax) {
5265 TEST_REQUIRES_X86_AVX;
5266 for (size_t channels = 1; channels <= 80; channels += 15) {
5267 DWConvMicrokernelTester()
5268 .cr(16)
5269 .kr(4)
5270 .channels(channels)
5271 .width(3)
5272 .qmax(128)
5273 .Test(xnn_f32_dwconv_ukernel_up16x4__avx);
5274 }
5275 }
5276#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5277
5278
5279#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5280 TEST(F32_DWCONV_UP16X4__AVX_ACC2, c_eq_16) {
5281 TEST_REQUIRES_X86_AVX;
5282 DWConvMicrokernelTester()
5283 .cr(16)
5284 .kr(4)
5285 .channels(16)
5286 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5287 }
5288
5289 TEST(F32_DWCONV_UP16X4__AVX_ACC2, c_div_16) {
5290 TEST_REQUIRES_X86_AVX;
5291 for (uint32_t channels = 32; channels < 256; channels += 48) {
5292 DWConvMicrokernelTester()
5293 .cr(16)
5294 .kr(4)
5295 .channels(channels)
5296 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5297 }
5298 }
5299
5300 TEST(F32_DWCONV_UP16X4__AVX_ACC2, c_div_16_with_qmin) {
5301 TEST_REQUIRES_X86_AVX;
5302 for (uint32_t channels = 32; channels < 256; channels += 48) {
5303 DWConvMicrokernelTester()
5304 .cr(16)
5305 .kr(4)
5306 .channels(channels)
5307 .qmin(128)
5308 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5309 }
5310 }
5311
5312 TEST(F32_DWCONV_UP16X4__AVX_ACC2, c_div_16_with_qmax) {
5313 TEST_REQUIRES_X86_AVX;
5314 for (uint32_t channels = 32; channels < 256; channels += 48) {
5315 DWConvMicrokernelTester()
5316 .cr(16)
5317 .kr(4)
5318 .channels(channels)
5319 .qmax(128)
5320 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5321 }
5322 }
5323
5324 TEST(F32_DWCONV_UP16X4__AVX_ACC2, c_lt_16) {
5325 TEST_REQUIRES_X86_AVX;
5326 for (uint32_t channels = 1; channels < 16; channels++) {
5327 DWConvMicrokernelTester()
5328 .cr(16)
5329 .kr(4)
5330 .channels(channels)
5331 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5332 }
5333 }
5334
5335 TEST(F32_DWCONV_UP16X4__AVX_ACC2, c_gt_16) {
5336 TEST_REQUIRES_X86_AVX;
5337 for (uint32_t channels = 17; channels < 32; channels++) {
5338 DWConvMicrokernelTester()
5339 .cr(16)
5340 .kr(4)
5341 .channels(channels)
5342 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5343 }
5344 }
5345
5346 TEST(F32_DWCONV_UP16X4__AVX_ACC2, c_gt_16_with_qmin) {
5347 TEST_REQUIRES_X86_AVX;
5348 for (uint32_t channels = 17; channels < 32; channels++) {
5349 DWConvMicrokernelTester()
5350 .cr(16)
5351 .kr(4)
5352 .channels(channels)
5353 .qmin(128)
5354 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5355 }
5356 }
5357
5358 TEST(F32_DWCONV_UP16X4__AVX_ACC2, c_gt_16_with_qmax) {
5359 TEST_REQUIRES_X86_AVX;
5360 for (uint32_t channels = 17; channels < 32; channels++) {
5361 DWConvMicrokernelTester()
5362 .cr(16)
5363 .kr(4)
5364 .channels(channels)
5365 .qmax(128)
5366 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5367 }
5368 }
5369
5370 TEST(F32_DWCONV_UP16X4__AVX_ACC2, multipixel) {
5371 TEST_REQUIRES_X86_AVX;
5372 for (size_t channels = 1; channels <= 80; channels += 15) {
5373 DWConvMicrokernelTester()
5374 .cr(16)
5375 .kr(4)
5376 .channels(channels)
5377 .width(3)
5378 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5379 }
5380 }
5381
5382 TEST(F32_DWCONV_UP16X4__AVX_ACC2, multipixel_with_step) {
5383 TEST_REQUIRES_X86_AVX;
5384 for (size_t channels = 1; channels <= 80; channels += 15) {
5385 for (size_t step = 2; step <= 4; step++) {
5386 DWConvMicrokernelTester()
5387 .cr(16)
5388 .kr(4)
5389 .channels(channels)
5390 .width(3)
5391 .step(step)
5392 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5393 }
5394 }
5395 }
5396
5397 TEST(F32_DWCONV_UP16X4__AVX_ACC2, multipixel_with_output_stride) {
5398 TEST_REQUIRES_X86_AVX;
5399 for (size_t channels = 1; channels <= 80; channels += 15) {
5400 DWConvMicrokernelTester()
5401 .cr(16)
5402 .kr(4)
5403 .channels(16)
5404 .width(5)
5405 .output_stride(83)
5406 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5407 }
5408 }
5409
5410 TEST(F32_DWCONV_UP16X4__AVX_ACC2, multipixel_with_qmin) {
5411 TEST_REQUIRES_X86_AVX;
5412 for (size_t channels = 1; channels <= 80; channels += 15) {
5413 DWConvMicrokernelTester()
5414 .cr(16)
5415 .kr(4)
5416 .channels(channels)
5417 .width(3)
5418 .qmin(128)
5419 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5420 }
5421 }
5422
5423 TEST(F32_DWCONV_UP16X4__AVX_ACC2, multipixel_with_qmax) {
5424 TEST_REQUIRES_X86_AVX;
5425 for (size_t channels = 1; channels <= 80; channels += 15) {
5426 DWConvMicrokernelTester()
5427 .cr(16)
5428 .kr(4)
5429 .channels(channels)
5430 .width(3)
5431 .qmax(128)
5432 .Test(xnn_f32_dwconv_ukernel_up16x4__avx_acc2);
5433 }
5434 }
5435#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5436
5437
5438#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5439 TEST(F32_DWCONV_UP8X25__FMA3, c_eq_8) {
5440 TEST_REQUIRES_X86_FMA3;
5441 DWConvMicrokernelTester()
5442 .cr(8)
5443 .kr(25)
5444 .channels(8)
5445 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5446 }
5447
5448 TEST(F32_DWCONV_UP8X25__FMA3, c_div_8) {
5449 TEST_REQUIRES_X86_FMA3;
5450 for (uint32_t channels = 16; channels < 128; channels += 24) {
5451 DWConvMicrokernelTester()
5452 .cr(8)
5453 .kr(25)
5454 .channels(channels)
5455 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5456 }
5457 }
5458
5459 TEST(F32_DWCONV_UP8X25__FMA3, c_div_8_with_qmin) {
5460 TEST_REQUIRES_X86_FMA3;
5461 for (uint32_t channels = 16; channels < 128; channels += 24) {
5462 DWConvMicrokernelTester()
5463 .cr(8)
5464 .kr(25)
5465 .channels(channels)
5466 .qmin(128)
5467 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5468 }
5469 }
5470
5471 TEST(F32_DWCONV_UP8X25__FMA3, c_div_8_with_qmax) {
5472 TEST_REQUIRES_X86_FMA3;
5473 for (uint32_t channels = 16; channels < 128; channels += 24) {
5474 DWConvMicrokernelTester()
5475 .cr(8)
5476 .kr(25)
5477 .channels(channels)
5478 .qmax(128)
5479 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5480 }
5481 }
5482
5483 TEST(F32_DWCONV_UP8X25__FMA3, c_lt_8) {
5484 TEST_REQUIRES_X86_FMA3;
5485 for (uint32_t channels = 1; channels < 8; channels++) {
5486 DWConvMicrokernelTester()
5487 .cr(8)
5488 .kr(25)
5489 .channels(channels)
5490 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5491 }
5492 }
5493
5494 TEST(F32_DWCONV_UP8X25__FMA3, c_gt_8) {
5495 TEST_REQUIRES_X86_FMA3;
5496 for (uint32_t channels = 9; channels < 16; channels++) {
5497 DWConvMicrokernelTester()
5498 .cr(8)
5499 .kr(25)
5500 .channels(channels)
5501 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5502 }
5503 }
5504
5505 TEST(F32_DWCONV_UP8X25__FMA3, c_gt_8_with_qmin) {
5506 TEST_REQUIRES_X86_FMA3;
5507 for (uint32_t channels = 9; channels < 16; channels++) {
5508 DWConvMicrokernelTester()
5509 .cr(8)
5510 .kr(25)
5511 .channels(channels)
5512 .qmin(128)
5513 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5514 }
5515 }
5516
5517 TEST(F32_DWCONV_UP8X25__FMA3, c_gt_8_with_qmax) {
5518 TEST_REQUIRES_X86_FMA3;
5519 for (uint32_t channels = 9; channels < 16; channels++) {
5520 DWConvMicrokernelTester()
5521 .cr(8)
5522 .kr(25)
5523 .channels(channels)
5524 .qmax(128)
5525 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5526 }
5527 }
5528
5529 TEST(F32_DWCONV_UP8X25__FMA3, multipixel) {
5530 TEST_REQUIRES_X86_FMA3;
5531 for (size_t channels = 1; channels <= 40; channels += 7) {
5532 DWConvMicrokernelTester()
5533 .cr(8)
5534 .kr(25)
5535 .channels(channels)
5536 .width(3)
5537 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5538 }
5539 }
5540
5541 TEST(F32_DWCONV_UP8X25__FMA3, multipixel_with_step) {
5542 TEST_REQUIRES_X86_FMA3;
5543 for (size_t channels = 1; channels <= 40; channels += 7) {
5544 for (size_t step = 2; step <= 25; step++) {
5545 DWConvMicrokernelTester()
5546 .cr(8)
5547 .kr(25)
5548 .channels(channels)
5549 .width(3)
5550 .step(step)
5551 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5552 }
5553 }
5554 }
5555
5556 TEST(F32_DWCONV_UP8X25__FMA3, multipixel_with_output_stride) {
5557 TEST_REQUIRES_X86_FMA3;
5558 for (size_t channels = 1; channels <= 40; channels += 7) {
5559 DWConvMicrokernelTester()
5560 .cr(8)
5561 .kr(25)
5562 .channels(8)
5563 .width(5)
5564 .output_stride(43)
5565 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5566 }
5567 }
5568
5569 TEST(F32_DWCONV_UP8X25__FMA3, multipixel_with_qmin) {
5570 TEST_REQUIRES_X86_FMA3;
5571 for (size_t channels = 1; channels <= 40; channels += 7) {
5572 DWConvMicrokernelTester()
5573 .cr(8)
5574 .kr(25)
5575 .channels(channels)
5576 .width(3)
5577 .qmin(128)
5578 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5579 }
5580 }
5581
5582 TEST(F32_DWCONV_UP8X25__FMA3, multipixel_with_qmax) {
5583 TEST_REQUIRES_X86_FMA3;
5584 for (size_t channels = 1; channels <= 40; channels += 7) {
5585 DWConvMicrokernelTester()
5586 .cr(8)
5587 .kr(25)
5588 .channels(channels)
5589 .width(3)
5590 .qmax(128)
5591 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3);
5592 }
5593 }
5594#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5595
5596
5597#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5598 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, c_eq_8) {
5599 TEST_REQUIRES_X86_FMA3;
5600 DWConvMicrokernelTester()
5601 .cr(8)
5602 .kr(25)
5603 .channels(8)
5604 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5605 }
5606
5607 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, c_div_8) {
5608 TEST_REQUIRES_X86_FMA3;
5609 for (uint32_t channels = 16; channels < 128; channels += 24) {
5610 DWConvMicrokernelTester()
5611 .cr(8)
5612 .kr(25)
5613 .channels(channels)
5614 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5615 }
5616 }
5617
5618 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, c_div_8_with_qmin) {
5619 TEST_REQUIRES_X86_FMA3;
5620 for (uint32_t channels = 16; channels < 128; channels += 24) {
5621 DWConvMicrokernelTester()
5622 .cr(8)
5623 .kr(25)
5624 .channels(channels)
5625 .qmin(128)
5626 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5627 }
5628 }
5629
5630 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, c_div_8_with_qmax) {
5631 TEST_REQUIRES_X86_FMA3;
5632 for (uint32_t channels = 16; channels < 128; channels += 24) {
5633 DWConvMicrokernelTester()
5634 .cr(8)
5635 .kr(25)
5636 .channels(channels)
5637 .qmax(128)
5638 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5639 }
5640 }
5641
5642 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, c_lt_8) {
5643 TEST_REQUIRES_X86_FMA3;
5644 for (uint32_t channels = 1; channels < 8; channels++) {
5645 DWConvMicrokernelTester()
5646 .cr(8)
5647 .kr(25)
5648 .channels(channels)
5649 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5650 }
5651 }
5652
5653 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, c_gt_8) {
5654 TEST_REQUIRES_X86_FMA3;
5655 for (uint32_t channels = 9; channels < 16; channels++) {
5656 DWConvMicrokernelTester()
5657 .cr(8)
5658 .kr(25)
5659 .channels(channels)
5660 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5661 }
5662 }
5663
5664 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, c_gt_8_with_qmin) {
5665 TEST_REQUIRES_X86_FMA3;
5666 for (uint32_t channels = 9; channels < 16; channels++) {
5667 DWConvMicrokernelTester()
5668 .cr(8)
5669 .kr(25)
5670 .channels(channels)
5671 .qmin(128)
5672 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5673 }
5674 }
5675
5676 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, c_gt_8_with_qmax) {
5677 TEST_REQUIRES_X86_FMA3;
5678 for (uint32_t channels = 9; channels < 16; channels++) {
5679 DWConvMicrokernelTester()
5680 .cr(8)
5681 .kr(25)
5682 .channels(channels)
5683 .qmax(128)
5684 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5685 }
5686 }
5687
5688 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, multipixel) {
5689 TEST_REQUIRES_X86_FMA3;
5690 for (size_t channels = 1; channels <= 40; channels += 7) {
5691 DWConvMicrokernelTester()
5692 .cr(8)
5693 .kr(25)
5694 .channels(channels)
5695 .width(3)
5696 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5697 }
5698 }
5699
5700 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, multipixel_with_step) {
5701 TEST_REQUIRES_X86_FMA3;
5702 for (size_t channels = 1; channels <= 40; channels += 7) {
5703 for (size_t step = 2; step <= 25; step++) {
5704 DWConvMicrokernelTester()
5705 .cr(8)
5706 .kr(25)
5707 .channels(channels)
5708 .width(3)
5709 .step(step)
5710 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5711 }
5712 }
5713 }
5714
5715 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, multipixel_with_output_stride) {
5716 TEST_REQUIRES_X86_FMA3;
5717 for (size_t channels = 1; channels <= 40; channels += 7) {
5718 DWConvMicrokernelTester()
5719 .cr(8)
5720 .kr(25)
5721 .channels(8)
5722 .width(5)
5723 .output_stride(43)
5724 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5725 }
5726 }
5727
5728 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, multipixel_with_qmin) {
5729 TEST_REQUIRES_X86_FMA3;
5730 for (size_t channels = 1; channels <= 40; channels += 7) {
5731 DWConvMicrokernelTester()
5732 .cr(8)
5733 .kr(25)
5734 .channels(channels)
5735 .width(3)
5736 .qmin(128)
5737 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5738 }
5739 }
5740
5741 TEST(F32_DWCONV_UP8X25__FMA3_ACC2, multipixel_with_qmax) {
5742 TEST_REQUIRES_X86_FMA3;
5743 for (size_t channels = 1; channels <= 40; channels += 7) {
5744 DWConvMicrokernelTester()
5745 .cr(8)
5746 .kr(25)
5747 .channels(channels)
5748 .width(3)
5749 .qmax(128)
5750 .Test(xnn_f32_dwconv_ukernel_up8x25__fma3_acc2);
5751 }
5752 }
5753#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5754
5755
5756#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5757 TEST(F32_DWCONV_UP16X25__FMA3, c_eq_16) {
5758 TEST_REQUIRES_X86_FMA3;
5759 DWConvMicrokernelTester()
5760 .cr(16)
5761 .kr(25)
5762 .channels(16)
5763 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5764 }
5765
5766 TEST(F32_DWCONV_UP16X25__FMA3, c_div_16) {
5767 TEST_REQUIRES_X86_FMA3;
5768 for (uint32_t channels = 32; channels < 256; channels += 48) {
5769 DWConvMicrokernelTester()
5770 .cr(16)
5771 .kr(25)
5772 .channels(channels)
5773 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5774 }
5775 }
5776
5777 TEST(F32_DWCONV_UP16X25__FMA3, c_div_16_with_qmin) {
5778 TEST_REQUIRES_X86_FMA3;
5779 for (uint32_t channels = 32; channels < 256; channels += 48) {
5780 DWConvMicrokernelTester()
5781 .cr(16)
5782 .kr(25)
5783 .channels(channels)
5784 .qmin(128)
5785 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5786 }
5787 }
5788
5789 TEST(F32_DWCONV_UP16X25__FMA3, c_div_16_with_qmax) {
5790 TEST_REQUIRES_X86_FMA3;
5791 for (uint32_t channels = 32; channels < 256; channels += 48) {
5792 DWConvMicrokernelTester()
5793 .cr(16)
5794 .kr(25)
5795 .channels(channels)
5796 .qmax(128)
5797 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5798 }
5799 }
5800
5801 TEST(F32_DWCONV_UP16X25__FMA3, c_lt_16) {
5802 TEST_REQUIRES_X86_FMA3;
5803 for (uint32_t channels = 1; channels < 16; channels++) {
5804 DWConvMicrokernelTester()
5805 .cr(16)
5806 .kr(25)
5807 .channels(channels)
5808 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5809 }
5810 }
5811
5812 TEST(F32_DWCONV_UP16X25__FMA3, c_gt_16) {
5813 TEST_REQUIRES_X86_FMA3;
5814 for (uint32_t channels = 17; channels < 32; channels++) {
5815 DWConvMicrokernelTester()
5816 .cr(16)
5817 .kr(25)
5818 .channels(channels)
5819 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5820 }
5821 }
5822
5823 TEST(F32_DWCONV_UP16X25__FMA3, c_gt_16_with_qmin) {
5824 TEST_REQUIRES_X86_FMA3;
5825 for (uint32_t channels = 17; channels < 32; channels++) {
5826 DWConvMicrokernelTester()
5827 .cr(16)
5828 .kr(25)
5829 .channels(channels)
5830 .qmin(128)
5831 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5832 }
5833 }
5834
5835 TEST(F32_DWCONV_UP16X25__FMA3, c_gt_16_with_qmax) {
5836 TEST_REQUIRES_X86_FMA3;
5837 for (uint32_t channels = 17; channels < 32; channels++) {
5838 DWConvMicrokernelTester()
5839 .cr(16)
5840 .kr(25)
5841 .channels(channels)
5842 .qmax(128)
5843 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5844 }
5845 }
5846
5847 TEST(F32_DWCONV_UP16X25__FMA3, multipixel) {
5848 TEST_REQUIRES_X86_FMA3;
5849 for (size_t channels = 1; channels <= 80; channels += 15) {
5850 DWConvMicrokernelTester()
5851 .cr(16)
5852 .kr(25)
5853 .channels(channels)
5854 .width(3)
5855 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5856 }
5857 }
5858
5859 TEST(F32_DWCONV_UP16X25__FMA3, multipixel_with_step) {
5860 TEST_REQUIRES_X86_FMA3;
5861 for (size_t channels = 1; channels <= 80; channels += 15) {
5862 for (size_t step = 2; step <= 25; step++) {
5863 DWConvMicrokernelTester()
5864 .cr(16)
5865 .kr(25)
5866 .channels(channels)
5867 .width(3)
5868 .step(step)
5869 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5870 }
5871 }
5872 }
5873
5874 TEST(F32_DWCONV_UP16X25__FMA3, multipixel_with_output_stride) {
5875 TEST_REQUIRES_X86_FMA3;
5876 for (size_t channels = 1; channels <= 80; channels += 15) {
5877 DWConvMicrokernelTester()
5878 .cr(16)
5879 .kr(25)
5880 .channels(16)
5881 .width(5)
5882 .output_stride(83)
5883 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5884 }
5885 }
5886
5887 TEST(F32_DWCONV_UP16X25__FMA3, multipixel_with_qmin) {
5888 TEST_REQUIRES_X86_FMA3;
5889 for (size_t channels = 1; channels <= 80; channels += 15) {
5890 DWConvMicrokernelTester()
5891 .cr(16)
5892 .kr(25)
5893 .channels(channels)
5894 .width(3)
5895 .qmin(128)
5896 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5897 }
5898 }
5899
5900 TEST(F32_DWCONV_UP16X25__FMA3, multipixel_with_qmax) {
5901 TEST_REQUIRES_X86_FMA3;
5902 for (size_t channels = 1; channels <= 80; channels += 15) {
5903 DWConvMicrokernelTester()
5904 .cr(16)
5905 .kr(25)
5906 .channels(channels)
5907 .width(3)
5908 .qmax(128)
5909 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3);
5910 }
5911 }
5912#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5913
5914
5915#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5916 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, c_eq_16) {
5917 TEST_REQUIRES_X86_FMA3;
5918 DWConvMicrokernelTester()
5919 .cr(16)
5920 .kr(25)
5921 .channels(16)
5922 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
5923 }
5924
5925 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, c_div_16) {
5926 TEST_REQUIRES_X86_FMA3;
5927 for (uint32_t channels = 32; channels < 256; channels += 48) {
5928 DWConvMicrokernelTester()
5929 .cr(16)
5930 .kr(25)
5931 .channels(channels)
5932 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
5933 }
5934 }
5935
5936 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, c_div_16_with_qmin) {
5937 TEST_REQUIRES_X86_FMA3;
5938 for (uint32_t channels = 32; channels < 256; channels += 48) {
5939 DWConvMicrokernelTester()
5940 .cr(16)
5941 .kr(25)
5942 .channels(channels)
5943 .qmin(128)
5944 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
5945 }
5946 }
5947
5948 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, c_div_16_with_qmax) {
5949 TEST_REQUIRES_X86_FMA3;
5950 for (uint32_t channels = 32; channels < 256; channels += 48) {
5951 DWConvMicrokernelTester()
5952 .cr(16)
5953 .kr(25)
5954 .channels(channels)
5955 .qmax(128)
5956 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
5957 }
5958 }
5959
5960 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, c_lt_16) {
5961 TEST_REQUIRES_X86_FMA3;
5962 for (uint32_t channels = 1; channels < 16; channels++) {
5963 DWConvMicrokernelTester()
5964 .cr(16)
5965 .kr(25)
5966 .channels(channels)
5967 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
5968 }
5969 }
5970
5971 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, c_gt_16) {
5972 TEST_REQUIRES_X86_FMA3;
5973 for (uint32_t channels = 17; channels < 32; channels++) {
5974 DWConvMicrokernelTester()
5975 .cr(16)
5976 .kr(25)
5977 .channels(channels)
5978 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
5979 }
5980 }
5981
5982 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, c_gt_16_with_qmin) {
5983 TEST_REQUIRES_X86_FMA3;
5984 for (uint32_t channels = 17; channels < 32; channels++) {
5985 DWConvMicrokernelTester()
5986 .cr(16)
5987 .kr(25)
5988 .channels(channels)
5989 .qmin(128)
5990 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
5991 }
5992 }
5993
5994 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, c_gt_16_with_qmax) {
5995 TEST_REQUIRES_X86_FMA3;
5996 for (uint32_t channels = 17; channels < 32; channels++) {
5997 DWConvMicrokernelTester()
5998 .cr(16)
5999 .kr(25)
6000 .channels(channels)
6001 .qmax(128)
6002 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
6003 }
6004 }
6005
6006 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, multipixel) {
6007 TEST_REQUIRES_X86_FMA3;
6008 for (size_t channels = 1; channels <= 80; channels += 15) {
6009 DWConvMicrokernelTester()
6010 .cr(16)
6011 .kr(25)
6012 .channels(channels)
6013 .width(3)
6014 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
6015 }
6016 }
6017
6018 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, multipixel_with_step) {
6019 TEST_REQUIRES_X86_FMA3;
6020 for (size_t channels = 1; channels <= 80; channels += 15) {
6021 for (size_t step = 2; step <= 25; step++) {
6022 DWConvMicrokernelTester()
6023 .cr(16)
6024 .kr(25)
6025 .channels(channels)
6026 .width(3)
6027 .step(step)
6028 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
6029 }
6030 }
6031 }
6032
6033 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, multipixel_with_output_stride) {
6034 TEST_REQUIRES_X86_FMA3;
6035 for (size_t channels = 1; channels <= 80; channels += 15) {
6036 DWConvMicrokernelTester()
6037 .cr(16)
6038 .kr(25)
6039 .channels(16)
6040 .width(5)
6041 .output_stride(83)
6042 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
6043 }
6044 }
6045
6046 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, multipixel_with_qmin) {
6047 TEST_REQUIRES_X86_FMA3;
6048 for (size_t channels = 1; channels <= 80; channels += 15) {
6049 DWConvMicrokernelTester()
6050 .cr(16)
6051 .kr(25)
6052 .channels(channels)
6053 .width(3)
6054 .qmin(128)
6055 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
6056 }
6057 }
6058
6059 TEST(F32_DWCONV_UP16X25__FMA3_ACC2, multipixel_with_qmax) {
6060 TEST_REQUIRES_X86_FMA3;
6061 for (size_t channels = 1; channels <= 80; channels += 15) {
6062 DWConvMicrokernelTester()
6063 .cr(16)
6064 .kr(25)
6065 .channels(channels)
6066 .width(3)
6067 .qmax(128)
6068 .Test(xnn_f32_dwconv_ukernel_up16x25__fma3_acc2);
6069 }
6070 }
6071#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6072
6073
6074#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6075 TEST(F32_DWCONV_UP8X9__FMA3, c_eq_8) {
6076 TEST_REQUIRES_X86_FMA3;
6077 DWConvMicrokernelTester()
6078 .cr(8)
6079 .kr(9)
6080 .channels(8)
6081 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6082 }
6083
6084 TEST(F32_DWCONV_UP8X9__FMA3, c_div_8) {
6085 TEST_REQUIRES_X86_FMA3;
6086 for (uint32_t channels = 16; channels < 128; channels += 24) {
6087 DWConvMicrokernelTester()
6088 .cr(8)
6089 .kr(9)
6090 .channels(channels)
6091 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6092 }
6093 }
6094
6095 TEST(F32_DWCONV_UP8X9__FMA3, c_div_8_with_qmin) {
6096 TEST_REQUIRES_X86_FMA3;
6097 for (uint32_t channels = 16; channels < 128; channels += 24) {
6098 DWConvMicrokernelTester()
6099 .cr(8)
6100 .kr(9)
6101 .channels(channels)
6102 .qmin(128)
6103 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6104 }
6105 }
6106
6107 TEST(F32_DWCONV_UP8X9__FMA3, c_div_8_with_qmax) {
6108 TEST_REQUIRES_X86_FMA3;
6109 for (uint32_t channels = 16; channels < 128; channels += 24) {
6110 DWConvMicrokernelTester()
6111 .cr(8)
6112 .kr(9)
6113 .channels(channels)
6114 .qmax(128)
6115 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6116 }
6117 }
6118
6119 TEST(F32_DWCONV_UP8X9__FMA3, c_lt_8) {
6120 TEST_REQUIRES_X86_FMA3;
6121 for (uint32_t channels = 1; channels < 8; channels++) {
6122 DWConvMicrokernelTester()
6123 .cr(8)
6124 .kr(9)
6125 .channels(channels)
6126 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6127 }
6128 }
6129
6130 TEST(F32_DWCONV_UP8X9__FMA3, c_gt_8) {
6131 TEST_REQUIRES_X86_FMA3;
6132 for (uint32_t channels = 9; channels < 16; channels++) {
6133 DWConvMicrokernelTester()
6134 .cr(8)
6135 .kr(9)
6136 .channels(channels)
6137 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6138 }
6139 }
6140
6141 TEST(F32_DWCONV_UP8X9__FMA3, c_gt_8_with_qmin) {
6142 TEST_REQUIRES_X86_FMA3;
6143 for (uint32_t channels = 9; channels < 16; channels++) {
6144 DWConvMicrokernelTester()
6145 .cr(8)
6146 .kr(9)
6147 .channels(channels)
6148 .qmin(128)
6149 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6150 }
6151 }
6152
6153 TEST(F32_DWCONV_UP8X9__FMA3, c_gt_8_with_qmax) {
6154 TEST_REQUIRES_X86_FMA3;
6155 for (uint32_t channels = 9; channels < 16; channels++) {
6156 DWConvMicrokernelTester()
6157 .cr(8)
6158 .kr(9)
6159 .channels(channels)
6160 .qmax(128)
6161 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6162 }
6163 }
6164
6165 TEST(F32_DWCONV_UP8X9__FMA3, multipixel) {
6166 TEST_REQUIRES_X86_FMA3;
6167 for (size_t channels = 1; channels <= 40; channels += 7) {
6168 DWConvMicrokernelTester()
6169 .cr(8)
6170 .kr(9)
6171 .channels(channels)
6172 .width(3)
6173 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6174 }
6175 }
6176
6177 TEST(F32_DWCONV_UP8X9__FMA3, multipixel_with_step) {
6178 TEST_REQUIRES_X86_FMA3;
6179 for (size_t channels = 1; channels <= 40; channels += 7) {
6180 for (size_t step = 2; step <= 9; step++) {
6181 DWConvMicrokernelTester()
6182 .cr(8)
6183 .kr(9)
6184 .channels(channels)
6185 .width(3)
6186 .step(step)
6187 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6188 }
6189 }
6190 }
6191
6192 TEST(F32_DWCONV_UP8X9__FMA3, multipixel_with_output_stride) {
6193 TEST_REQUIRES_X86_FMA3;
6194 for (size_t channels = 1; channels <= 40; channels += 7) {
6195 DWConvMicrokernelTester()
6196 .cr(8)
6197 .kr(9)
6198 .channels(8)
6199 .width(5)
6200 .output_stride(43)
6201 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6202 }
6203 }
6204
6205 TEST(F32_DWCONV_UP8X9__FMA3, multipixel_with_qmin) {
6206 TEST_REQUIRES_X86_FMA3;
6207 for (size_t channels = 1; channels <= 40; channels += 7) {
6208 DWConvMicrokernelTester()
6209 .cr(8)
6210 .kr(9)
6211 .channels(channels)
6212 .width(3)
6213 .qmin(128)
6214 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6215 }
6216 }
6217
6218 TEST(F32_DWCONV_UP8X9__FMA3, multipixel_with_qmax) {
6219 TEST_REQUIRES_X86_FMA3;
6220 for (size_t channels = 1; channels <= 40; channels += 7) {
6221 DWConvMicrokernelTester()
6222 .cr(8)
6223 .kr(9)
6224 .channels(channels)
6225 .width(3)
6226 .qmax(128)
6227 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3);
6228 }
6229 }
6230#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6231
6232
6233#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6234 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, c_eq_8) {
6235 TEST_REQUIRES_X86_FMA3;
6236 DWConvMicrokernelTester()
6237 .cr(8)
6238 .kr(9)
6239 .channels(8)
6240 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6241 }
6242
6243 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, c_div_8) {
6244 TEST_REQUIRES_X86_FMA3;
6245 for (uint32_t channels = 16; channels < 128; channels += 24) {
6246 DWConvMicrokernelTester()
6247 .cr(8)
6248 .kr(9)
6249 .channels(channels)
6250 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6251 }
6252 }
6253
6254 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, c_div_8_with_qmin) {
6255 TEST_REQUIRES_X86_FMA3;
6256 for (uint32_t channels = 16; channels < 128; channels += 24) {
6257 DWConvMicrokernelTester()
6258 .cr(8)
6259 .kr(9)
6260 .channels(channels)
6261 .qmin(128)
6262 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6263 }
6264 }
6265
6266 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, c_div_8_with_qmax) {
6267 TEST_REQUIRES_X86_FMA3;
6268 for (uint32_t channels = 16; channels < 128; channels += 24) {
6269 DWConvMicrokernelTester()
6270 .cr(8)
6271 .kr(9)
6272 .channels(channels)
6273 .qmax(128)
6274 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6275 }
6276 }
6277
6278 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, c_lt_8) {
6279 TEST_REQUIRES_X86_FMA3;
6280 for (uint32_t channels = 1; channels < 8; channels++) {
6281 DWConvMicrokernelTester()
6282 .cr(8)
6283 .kr(9)
6284 .channels(channels)
6285 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6286 }
6287 }
6288
6289 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, c_gt_8) {
6290 TEST_REQUIRES_X86_FMA3;
6291 for (uint32_t channels = 9; channels < 16; channels++) {
6292 DWConvMicrokernelTester()
6293 .cr(8)
6294 .kr(9)
6295 .channels(channels)
6296 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6297 }
6298 }
6299
6300 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, c_gt_8_with_qmin) {
6301 TEST_REQUIRES_X86_FMA3;
6302 for (uint32_t channels = 9; channels < 16; channels++) {
6303 DWConvMicrokernelTester()
6304 .cr(8)
6305 .kr(9)
6306 .channels(channels)
6307 .qmin(128)
6308 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6309 }
6310 }
6311
6312 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, c_gt_8_with_qmax) {
6313 TEST_REQUIRES_X86_FMA3;
6314 for (uint32_t channels = 9; channels < 16; channels++) {
6315 DWConvMicrokernelTester()
6316 .cr(8)
6317 .kr(9)
6318 .channels(channels)
6319 .qmax(128)
6320 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6321 }
6322 }
6323
6324 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, multipixel) {
6325 TEST_REQUIRES_X86_FMA3;
6326 for (size_t channels = 1; channels <= 40; channels += 7) {
6327 DWConvMicrokernelTester()
6328 .cr(8)
6329 .kr(9)
6330 .channels(channels)
6331 .width(3)
6332 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6333 }
6334 }
6335
6336 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, multipixel_with_step) {
6337 TEST_REQUIRES_X86_FMA3;
6338 for (size_t channels = 1; channels <= 40; channels += 7) {
6339 for (size_t step = 2; step <= 9; step++) {
6340 DWConvMicrokernelTester()
6341 .cr(8)
6342 .kr(9)
6343 .channels(channels)
6344 .width(3)
6345 .step(step)
6346 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6347 }
6348 }
6349 }
6350
6351 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, multipixel_with_output_stride) {
6352 TEST_REQUIRES_X86_FMA3;
6353 for (size_t channels = 1; channels <= 40; channels += 7) {
6354 DWConvMicrokernelTester()
6355 .cr(8)
6356 .kr(9)
6357 .channels(8)
6358 .width(5)
6359 .output_stride(43)
6360 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6361 }
6362 }
6363
6364 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, multipixel_with_qmin) {
6365 TEST_REQUIRES_X86_FMA3;
6366 for (size_t channels = 1; channels <= 40; channels += 7) {
6367 DWConvMicrokernelTester()
6368 .cr(8)
6369 .kr(9)
6370 .channels(channels)
6371 .width(3)
6372 .qmin(128)
6373 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6374 }
6375 }
6376
6377 TEST(F32_DWCONV_UP8X9__FMA3_ACC2, multipixel_with_qmax) {
6378 TEST_REQUIRES_X86_FMA3;
6379 for (size_t channels = 1; channels <= 40; channels += 7) {
6380 DWConvMicrokernelTester()
6381 .cr(8)
6382 .kr(9)
6383 .channels(channels)
6384 .width(3)
6385 .qmax(128)
6386 .Test(xnn_f32_dwconv_ukernel_up8x9__fma3_acc2);
6387 }
6388 }
6389#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6390
6391
6392#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6393 TEST(F32_DWCONV_UP16X9__FMA3, c_eq_16) {
6394 TEST_REQUIRES_X86_FMA3;
6395 DWConvMicrokernelTester()
6396 .cr(16)
6397 .kr(9)
6398 .channels(16)
6399 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6400 }
6401
6402 TEST(F32_DWCONV_UP16X9__FMA3, c_div_16) {
6403 TEST_REQUIRES_X86_FMA3;
6404 for (uint32_t channels = 32; channels < 256; channels += 48) {
6405 DWConvMicrokernelTester()
6406 .cr(16)
6407 .kr(9)
6408 .channels(channels)
6409 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6410 }
6411 }
6412
6413 TEST(F32_DWCONV_UP16X9__FMA3, c_div_16_with_qmin) {
6414 TEST_REQUIRES_X86_FMA3;
6415 for (uint32_t channels = 32; channels < 256; channels += 48) {
6416 DWConvMicrokernelTester()
6417 .cr(16)
6418 .kr(9)
6419 .channels(channels)
6420 .qmin(128)
6421 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6422 }
6423 }
6424
6425 TEST(F32_DWCONV_UP16X9__FMA3, c_div_16_with_qmax) {
6426 TEST_REQUIRES_X86_FMA3;
6427 for (uint32_t channels = 32; channels < 256; channels += 48) {
6428 DWConvMicrokernelTester()
6429 .cr(16)
6430 .kr(9)
6431 .channels(channels)
6432 .qmax(128)
6433 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6434 }
6435 }
6436
6437 TEST(F32_DWCONV_UP16X9__FMA3, c_lt_16) {
6438 TEST_REQUIRES_X86_FMA3;
6439 for (uint32_t channels = 1; channels < 16; channels++) {
6440 DWConvMicrokernelTester()
6441 .cr(16)
6442 .kr(9)
6443 .channels(channels)
6444 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6445 }
6446 }
6447
6448 TEST(F32_DWCONV_UP16X9__FMA3, c_gt_16) {
6449 TEST_REQUIRES_X86_FMA3;
6450 for (uint32_t channels = 17; channels < 32; channels++) {
6451 DWConvMicrokernelTester()
6452 .cr(16)
6453 .kr(9)
6454 .channels(channels)
6455 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6456 }
6457 }
6458
6459 TEST(F32_DWCONV_UP16X9__FMA3, c_gt_16_with_qmin) {
6460 TEST_REQUIRES_X86_FMA3;
6461 for (uint32_t channels = 17; channels < 32; channels++) {
6462 DWConvMicrokernelTester()
6463 .cr(16)
6464 .kr(9)
6465 .channels(channels)
6466 .qmin(128)
6467 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6468 }
6469 }
6470
6471 TEST(F32_DWCONV_UP16X9__FMA3, c_gt_16_with_qmax) {
6472 TEST_REQUIRES_X86_FMA3;
6473 for (uint32_t channels = 17; channels < 32; channels++) {
6474 DWConvMicrokernelTester()
6475 .cr(16)
6476 .kr(9)
6477 .channels(channels)
6478 .qmax(128)
6479 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6480 }
6481 }
6482
6483 TEST(F32_DWCONV_UP16X9__FMA3, multipixel) {
6484 TEST_REQUIRES_X86_FMA3;
6485 for (size_t channels = 1; channels <= 80; channels += 15) {
6486 DWConvMicrokernelTester()
6487 .cr(16)
6488 .kr(9)
6489 .channels(channels)
6490 .width(3)
6491 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6492 }
6493 }
6494
6495 TEST(F32_DWCONV_UP16X9__FMA3, multipixel_with_step) {
6496 TEST_REQUIRES_X86_FMA3;
6497 for (size_t channels = 1; channels <= 80; channels += 15) {
6498 for (size_t step = 2; step <= 9; step++) {
6499 DWConvMicrokernelTester()
6500 .cr(16)
6501 .kr(9)
6502 .channels(channels)
6503 .width(3)
6504 .step(step)
6505 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6506 }
6507 }
6508 }
6509
6510 TEST(F32_DWCONV_UP16X9__FMA3, multipixel_with_output_stride) {
6511 TEST_REQUIRES_X86_FMA3;
6512 for (size_t channels = 1; channels <= 80; channels += 15) {
6513 DWConvMicrokernelTester()
6514 .cr(16)
6515 .kr(9)
6516 .channels(16)
6517 .width(5)
6518 .output_stride(83)
6519 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6520 }
6521 }
6522
6523 TEST(F32_DWCONV_UP16X9__FMA3, multipixel_with_qmin) {
6524 TEST_REQUIRES_X86_FMA3;
6525 for (size_t channels = 1; channels <= 80; channels += 15) {
6526 DWConvMicrokernelTester()
6527 .cr(16)
6528 .kr(9)
6529 .channels(channels)
6530 .width(3)
6531 .qmin(128)
6532 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6533 }
6534 }
6535
6536 TEST(F32_DWCONV_UP16X9__FMA3, multipixel_with_qmax) {
6537 TEST_REQUIRES_X86_FMA3;
6538 for (size_t channels = 1; channels <= 80; channels += 15) {
6539 DWConvMicrokernelTester()
6540 .cr(16)
6541 .kr(9)
6542 .channels(channels)
6543 .width(3)
6544 .qmax(128)
6545 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3);
6546 }
6547 }
6548#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6549
6550
6551#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6552 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, c_eq_16) {
6553 TEST_REQUIRES_X86_FMA3;
6554 DWConvMicrokernelTester()
6555 .cr(16)
6556 .kr(9)
6557 .channels(16)
6558 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6559 }
6560
6561 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, c_div_16) {
6562 TEST_REQUIRES_X86_FMA3;
6563 for (uint32_t channels = 32; channels < 256; channels += 48) {
6564 DWConvMicrokernelTester()
6565 .cr(16)
6566 .kr(9)
6567 .channels(channels)
6568 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6569 }
6570 }
6571
6572 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, c_div_16_with_qmin) {
6573 TEST_REQUIRES_X86_FMA3;
6574 for (uint32_t channels = 32; channels < 256; channels += 48) {
6575 DWConvMicrokernelTester()
6576 .cr(16)
6577 .kr(9)
6578 .channels(channels)
6579 .qmin(128)
6580 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6581 }
6582 }
6583
6584 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, c_div_16_with_qmax) {
6585 TEST_REQUIRES_X86_FMA3;
6586 for (uint32_t channels = 32; channels < 256; channels += 48) {
6587 DWConvMicrokernelTester()
6588 .cr(16)
6589 .kr(9)
6590 .channels(channels)
6591 .qmax(128)
6592 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6593 }
6594 }
6595
6596 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, c_lt_16) {
6597 TEST_REQUIRES_X86_FMA3;
6598 for (uint32_t channels = 1; channels < 16; channels++) {
6599 DWConvMicrokernelTester()
6600 .cr(16)
6601 .kr(9)
6602 .channels(channels)
6603 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6604 }
6605 }
6606
6607 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, c_gt_16) {
6608 TEST_REQUIRES_X86_FMA3;
6609 for (uint32_t channels = 17; channels < 32; channels++) {
6610 DWConvMicrokernelTester()
6611 .cr(16)
6612 .kr(9)
6613 .channels(channels)
6614 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6615 }
6616 }
6617
6618 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, c_gt_16_with_qmin) {
6619 TEST_REQUIRES_X86_FMA3;
6620 for (uint32_t channels = 17; channels < 32; channels++) {
6621 DWConvMicrokernelTester()
6622 .cr(16)
6623 .kr(9)
6624 .channels(channels)
6625 .qmin(128)
6626 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6627 }
6628 }
6629
6630 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, c_gt_16_with_qmax) {
6631 TEST_REQUIRES_X86_FMA3;
6632 for (uint32_t channels = 17; channels < 32; channels++) {
6633 DWConvMicrokernelTester()
6634 .cr(16)
6635 .kr(9)
6636 .channels(channels)
6637 .qmax(128)
6638 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6639 }
6640 }
6641
6642 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, multipixel) {
6643 TEST_REQUIRES_X86_FMA3;
6644 for (size_t channels = 1; channels <= 80; channels += 15) {
6645 DWConvMicrokernelTester()
6646 .cr(16)
6647 .kr(9)
6648 .channels(channels)
6649 .width(3)
6650 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6651 }
6652 }
6653
6654 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, multipixel_with_step) {
6655 TEST_REQUIRES_X86_FMA3;
6656 for (size_t channels = 1; channels <= 80; channels += 15) {
6657 for (size_t step = 2; step <= 9; step++) {
6658 DWConvMicrokernelTester()
6659 .cr(16)
6660 .kr(9)
6661 .channels(channels)
6662 .width(3)
6663 .step(step)
6664 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6665 }
6666 }
6667 }
6668
6669 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, multipixel_with_output_stride) {
6670 TEST_REQUIRES_X86_FMA3;
6671 for (size_t channels = 1; channels <= 80; channels += 15) {
6672 DWConvMicrokernelTester()
6673 .cr(16)
6674 .kr(9)
6675 .channels(16)
6676 .width(5)
6677 .output_stride(83)
6678 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6679 }
6680 }
6681
6682 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, multipixel_with_qmin) {
6683 TEST_REQUIRES_X86_FMA3;
6684 for (size_t channels = 1; channels <= 80; channels += 15) {
6685 DWConvMicrokernelTester()
6686 .cr(16)
6687 .kr(9)
6688 .channels(channels)
6689 .width(3)
6690 .qmin(128)
6691 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6692 }
6693 }
6694
6695 TEST(F32_DWCONV_UP16X9__FMA3_ACC2, multipixel_with_qmax) {
6696 TEST_REQUIRES_X86_FMA3;
6697 for (size_t channels = 1; channels <= 80; channels += 15) {
6698 DWConvMicrokernelTester()
6699 .cr(16)
6700 .kr(9)
6701 .channels(channels)
6702 .width(3)
6703 .qmax(128)
6704 .Test(xnn_f32_dwconv_ukernel_up16x9__fma3_acc2);
6705 }
6706 }
6707#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6708
6709
6710#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6711 TEST(F32_DWCONV_UP8X4__FMA3, c_eq_8) {
6712 TEST_REQUIRES_X86_FMA3;
6713 DWConvMicrokernelTester()
6714 .cr(8)
6715 .kr(4)
6716 .channels(8)
6717 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6718 }
6719
6720 TEST(F32_DWCONV_UP8X4__FMA3, c_div_8) {
6721 TEST_REQUIRES_X86_FMA3;
6722 for (uint32_t channels = 16; channels < 128; channels += 24) {
6723 DWConvMicrokernelTester()
6724 .cr(8)
6725 .kr(4)
6726 .channels(channels)
6727 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6728 }
6729 }
6730
6731 TEST(F32_DWCONV_UP8X4__FMA3, c_div_8_with_qmin) {
6732 TEST_REQUIRES_X86_FMA3;
6733 for (uint32_t channels = 16; channels < 128; channels += 24) {
6734 DWConvMicrokernelTester()
6735 .cr(8)
6736 .kr(4)
6737 .channels(channels)
6738 .qmin(128)
6739 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6740 }
6741 }
6742
6743 TEST(F32_DWCONV_UP8X4__FMA3, c_div_8_with_qmax) {
6744 TEST_REQUIRES_X86_FMA3;
6745 for (uint32_t channels = 16; channels < 128; channels += 24) {
6746 DWConvMicrokernelTester()
6747 .cr(8)
6748 .kr(4)
6749 .channels(channels)
6750 .qmax(128)
6751 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6752 }
6753 }
6754
6755 TEST(F32_DWCONV_UP8X4__FMA3, c_lt_8) {
6756 TEST_REQUIRES_X86_FMA3;
6757 for (uint32_t channels = 1; channels < 8; channels++) {
6758 DWConvMicrokernelTester()
6759 .cr(8)
6760 .kr(4)
6761 .channels(channels)
6762 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6763 }
6764 }
6765
6766 TEST(F32_DWCONV_UP8X4__FMA3, c_gt_8) {
6767 TEST_REQUIRES_X86_FMA3;
6768 for (uint32_t channels = 9; channels < 16; channels++) {
6769 DWConvMicrokernelTester()
6770 .cr(8)
6771 .kr(4)
6772 .channels(channels)
6773 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6774 }
6775 }
6776
6777 TEST(F32_DWCONV_UP8X4__FMA3, c_gt_8_with_qmin) {
6778 TEST_REQUIRES_X86_FMA3;
6779 for (uint32_t channels = 9; channels < 16; channels++) {
6780 DWConvMicrokernelTester()
6781 .cr(8)
6782 .kr(4)
6783 .channels(channels)
6784 .qmin(128)
6785 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6786 }
6787 }
6788
6789 TEST(F32_DWCONV_UP8X4__FMA3, c_gt_8_with_qmax) {
6790 TEST_REQUIRES_X86_FMA3;
6791 for (uint32_t channels = 9; channels < 16; channels++) {
6792 DWConvMicrokernelTester()
6793 .cr(8)
6794 .kr(4)
6795 .channels(channels)
6796 .qmax(128)
6797 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6798 }
6799 }
6800
6801 TEST(F32_DWCONV_UP8X4__FMA3, multipixel) {
6802 TEST_REQUIRES_X86_FMA3;
6803 for (size_t channels = 1; channels <= 40; channels += 7) {
6804 DWConvMicrokernelTester()
6805 .cr(8)
6806 .kr(4)
6807 .channels(channels)
6808 .width(3)
6809 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6810 }
6811 }
6812
6813 TEST(F32_DWCONV_UP8X4__FMA3, multipixel_with_step) {
6814 TEST_REQUIRES_X86_FMA3;
6815 for (size_t channels = 1; channels <= 40; channels += 7) {
6816 for (size_t step = 2; step <= 4; step++) {
6817 DWConvMicrokernelTester()
6818 .cr(8)
6819 .kr(4)
6820 .channels(channels)
6821 .width(3)
6822 .step(step)
6823 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6824 }
6825 }
6826 }
6827
6828 TEST(F32_DWCONV_UP8X4__FMA3, multipixel_with_output_stride) {
6829 TEST_REQUIRES_X86_FMA3;
6830 for (size_t channels = 1; channels <= 40; channels += 7) {
6831 DWConvMicrokernelTester()
6832 .cr(8)
6833 .kr(4)
6834 .channels(8)
6835 .width(5)
6836 .output_stride(43)
6837 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6838 }
6839 }
6840
6841 TEST(F32_DWCONV_UP8X4__FMA3, multipixel_with_qmin) {
6842 TEST_REQUIRES_X86_FMA3;
6843 for (size_t channels = 1; channels <= 40; channels += 7) {
6844 DWConvMicrokernelTester()
6845 .cr(8)
6846 .kr(4)
6847 .channels(channels)
6848 .width(3)
6849 .qmin(128)
6850 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6851 }
6852 }
6853
6854 TEST(F32_DWCONV_UP8X4__FMA3, multipixel_with_qmax) {
6855 TEST_REQUIRES_X86_FMA3;
6856 for (size_t channels = 1; channels <= 40; channels += 7) {
6857 DWConvMicrokernelTester()
6858 .cr(8)
6859 .kr(4)
6860 .channels(channels)
6861 .width(3)
6862 .qmax(128)
6863 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3);
6864 }
6865 }
6866#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6867
6868
6869#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6870 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, c_eq_8) {
6871 TEST_REQUIRES_X86_FMA3;
6872 DWConvMicrokernelTester()
6873 .cr(8)
6874 .kr(4)
6875 .channels(8)
6876 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6877 }
6878
6879 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, c_div_8) {
6880 TEST_REQUIRES_X86_FMA3;
6881 for (uint32_t channels = 16; channels < 128; channels += 24) {
6882 DWConvMicrokernelTester()
6883 .cr(8)
6884 .kr(4)
6885 .channels(channels)
6886 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6887 }
6888 }
6889
6890 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, c_div_8_with_qmin) {
6891 TEST_REQUIRES_X86_FMA3;
6892 for (uint32_t channels = 16; channels < 128; channels += 24) {
6893 DWConvMicrokernelTester()
6894 .cr(8)
6895 .kr(4)
6896 .channels(channels)
6897 .qmin(128)
6898 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6899 }
6900 }
6901
6902 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, c_div_8_with_qmax) {
6903 TEST_REQUIRES_X86_FMA3;
6904 for (uint32_t channels = 16; channels < 128; channels += 24) {
6905 DWConvMicrokernelTester()
6906 .cr(8)
6907 .kr(4)
6908 .channels(channels)
6909 .qmax(128)
6910 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6911 }
6912 }
6913
6914 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, c_lt_8) {
6915 TEST_REQUIRES_X86_FMA3;
6916 for (uint32_t channels = 1; channels < 8; channels++) {
6917 DWConvMicrokernelTester()
6918 .cr(8)
6919 .kr(4)
6920 .channels(channels)
6921 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6922 }
6923 }
6924
6925 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, c_gt_8) {
6926 TEST_REQUIRES_X86_FMA3;
6927 for (uint32_t channels = 9; channels < 16; channels++) {
6928 DWConvMicrokernelTester()
6929 .cr(8)
6930 .kr(4)
6931 .channels(channels)
6932 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6933 }
6934 }
6935
6936 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, c_gt_8_with_qmin) {
6937 TEST_REQUIRES_X86_FMA3;
6938 for (uint32_t channels = 9; channels < 16; channels++) {
6939 DWConvMicrokernelTester()
6940 .cr(8)
6941 .kr(4)
6942 .channels(channels)
6943 .qmin(128)
6944 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6945 }
6946 }
6947
6948 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, c_gt_8_with_qmax) {
6949 TEST_REQUIRES_X86_FMA3;
6950 for (uint32_t channels = 9; channels < 16; channels++) {
6951 DWConvMicrokernelTester()
6952 .cr(8)
6953 .kr(4)
6954 .channels(channels)
6955 .qmax(128)
6956 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6957 }
6958 }
6959
6960 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, multipixel) {
6961 TEST_REQUIRES_X86_FMA3;
6962 for (size_t channels = 1; channels <= 40; channels += 7) {
6963 DWConvMicrokernelTester()
6964 .cr(8)
6965 .kr(4)
6966 .channels(channels)
6967 .width(3)
6968 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6969 }
6970 }
6971
6972 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, multipixel_with_step) {
6973 TEST_REQUIRES_X86_FMA3;
6974 for (size_t channels = 1; channels <= 40; channels += 7) {
6975 for (size_t step = 2; step <= 4; step++) {
6976 DWConvMicrokernelTester()
6977 .cr(8)
6978 .kr(4)
6979 .channels(channels)
6980 .width(3)
6981 .step(step)
6982 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6983 }
6984 }
6985 }
6986
6987 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, multipixel_with_output_stride) {
6988 TEST_REQUIRES_X86_FMA3;
6989 for (size_t channels = 1; channels <= 40; channels += 7) {
6990 DWConvMicrokernelTester()
6991 .cr(8)
6992 .kr(4)
6993 .channels(8)
6994 .width(5)
6995 .output_stride(43)
6996 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
6997 }
6998 }
6999
7000 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, multipixel_with_qmin) {
7001 TEST_REQUIRES_X86_FMA3;
7002 for (size_t channels = 1; channels <= 40; channels += 7) {
7003 DWConvMicrokernelTester()
7004 .cr(8)
7005 .kr(4)
7006 .channels(channels)
7007 .width(3)
7008 .qmin(128)
7009 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
7010 }
7011 }
7012
7013 TEST(F32_DWCONV_UP8X4__FMA3_ACC2, multipixel_with_qmax) {
7014 TEST_REQUIRES_X86_FMA3;
7015 for (size_t channels = 1; channels <= 40; channels += 7) {
7016 DWConvMicrokernelTester()
7017 .cr(8)
7018 .kr(4)
7019 .channels(channels)
7020 .width(3)
7021 .qmax(128)
7022 .Test(xnn_f32_dwconv_ukernel_up8x4__fma3_acc2);
7023 }
7024 }
7025#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7026
7027
7028#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7029 TEST(F32_DWCONV_UP16X4__FMA3, c_eq_16) {
7030 TEST_REQUIRES_X86_FMA3;
7031 DWConvMicrokernelTester()
7032 .cr(16)
7033 .kr(4)
7034 .channels(16)
7035 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7036 }
7037
7038 TEST(F32_DWCONV_UP16X4__FMA3, c_div_16) {
7039 TEST_REQUIRES_X86_FMA3;
7040 for (uint32_t channels = 32; channels < 256; channels += 48) {
7041 DWConvMicrokernelTester()
7042 .cr(16)
7043 .kr(4)
7044 .channels(channels)
7045 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7046 }
7047 }
7048
7049 TEST(F32_DWCONV_UP16X4__FMA3, c_div_16_with_qmin) {
7050 TEST_REQUIRES_X86_FMA3;
7051 for (uint32_t channels = 32; channels < 256; channels += 48) {
7052 DWConvMicrokernelTester()
7053 .cr(16)
7054 .kr(4)
7055 .channels(channels)
7056 .qmin(128)
7057 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7058 }
7059 }
7060
7061 TEST(F32_DWCONV_UP16X4__FMA3, c_div_16_with_qmax) {
7062 TEST_REQUIRES_X86_FMA3;
7063 for (uint32_t channels = 32; channels < 256; channels += 48) {
7064 DWConvMicrokernelTester()
7065 .cr(16)
7066 .kr(4)
7067 .channels(channels)
7068 .qmax(128)
7069 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7070 }
7071 }
7072
7073 TEST(F32_DWCONV_UP16X4__FMA3, c_lt_16) {
7074 TEST_REQUIRES_X86_FMA3;
7075 for (uint32_t channels = 1; channels < 16; channels++) {
7076 DWConvMicrokernelTester()
7077 .cr(16)
7078 .kr(4)
7079 .channels(channels)
7080 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7081 }
7082 }
7083
7084 TEST(F32_DWCONV_UP16X4__FMA3, c_gt_16) {
7085 TEST_REQUIRES_X86_FMA3;
7086 for (uint32_t channels = 17; channels < 32; channels++) {
7087 DWConvMicrokernelTester()
7088 .cr(16)
7089 .kr(4)
7090 .channels(channels)
7091 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7092 }
7093 }
7094
7095 TEST(F32_DWCONV_UP16X4__FMA3, c_gt_16_with_qmin) {
7096 TEST_REQUIRES_X86_FMA3;
7097 for (uint32_t channels = 17; channels < 32; channels++) {
7098 DWConvMicrokernelTester()
7099 .cr(16)
7100 .kr(4)
7101 .channels(channels)
7102 .qmin(128)
7103 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7104 }
7105 }
7106
7107 TEST(F32_DWCONV_UP16X4__FMA3, c_gt_16_with_qmax) {
7108 TEST_REQUIRES_X86_FMA3;
7109 for (uint32_t channels = 17; channels < 32; channels++) {
7110 DWConvMicrokernelTester()
7111 .cr(16)
7112 .kr(4)
7113 .channels(channels)
7114 .qmax(128)
7115 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7116 }
7117 }
7118
7119 TEST(F32_DWCONV_UP16X4__FMA3, multipixel) {
7120 TEST_REQUIRES_X86_FMA3;
7121 for (size_t channels = 1; channels <= 80; channels += 15) {
7122 DWConvMicrokernelTester()
7123 .cr(16)
7124 .kr(4)
7125 .channels(channels)
7126 .width(3)
7127 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7128 }
7129 }
7130
7131 TEST(F32_DWCONV_UP16X4__FMA3, multipixel_with_step) {
7132 TEST_REQUIRES_X86_FMA3;
7133 for (size_t channels = 1; channels <= 80; channels += 15) {
7134 for (size_t step = 2; step <= 4; step++) {
7135 DWConvMicrokernelTester()
7136 .cr(16)
7137 .kr(4)
7138 .channels(channels)
7139 .width(3)
7140 .step(step)
7141 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7142 }
7143 }
7144 }
7145
7146 TEST(F32_DWCONV_UP16X4__FMA3, multipixel_with_output_stride) {
7147 TEST_REQUIRES_X86_FMA3;
7148 for (size_t channels = 1; channels <= 80; channels += 15) {
7149 DWConvMicrokernelTester()
7150 .cr(16)
7151 .kr(4)
7152 .channels(16)
7153 .width(5)
7154 .output_stride(83)
7155 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7156 }
7157 }
7158
7159 TEST(F32_DWCONV_UP16X4__FMA3, multipixel_with_qmin) {
7160 TEST_REQUIRES_X86_FMA3;
7161 for (size_t channels = 1; channels <= 80; channels += 15) {
7162 DWConvMicrokernelTester()
7163 .cr(16)
7164 .kr(4)
7165 .channels(channels)
7166 .width(3)
7167 .qmin(128)
7168 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7169 }
7170 }
7171
7172 TEST(F32_DWCONV_UP16X4__FMA3, multipixel_with_qmax) {
7173 TEST_REQUIRES_X86_FMA3;
7174 for (size_t channels = 1; channels <= 80; channels += 15) {
7175 DWConvMicrokernelTester()
7176 .cr(16)
7177 .kr(4)
7178 .channels(channels)
7179 .width(3)
7180 .qmax(128)
7181 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3);
7182 }
7183 }
7184#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7185
7186
7187#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7188 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, c_eq_16) {
7189 TEST_REQUIRES_X86_FMA3;
7190 DWConvMicrokernelTester()
7191 .cr(16)
7192 .kr(4)
7193 .channels(16)
7194 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7195 }
7196
7197 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, c_div_16) {
7198 TEST_REQUIRES_X86_FMA3;
7199 for (uint32_t channels = 32; channels < 256; channels += 48) {
7200 DWConvMicrokernelTester()
7201 .cr(16)
7202 .kr(4)
7203 .channels(channels)
7204 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7205 }
7206 }
7207
7208 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, c_div_16_with_qmin) {
7209 TEST_REQUIRES_X86_FMA3;
7210 for (uint32_t channels = 32; channels < 256; channels += 48) {
7211 DWConvMicrokernelTester()
7212 .cr(16)
7213 .kr(4)
7214 .channels(channels)
7215 .qmin(128)
7216 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7217 }
7218 }
7219
7220 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, c_div_16_with_qmax) {
7221 TEST_REQUIRES_X86_FMA3;
7222 for (uint32_t channels = 32; channels < 256; channels += 48) {
7223 DWConvMicrokernelTester()
7224 .cr(16)
7225 .kr(4)
7226 .channels(channels)
7227 .qmax(128)
7228 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7229 }
7230 }
7231
7232 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, c_lt_16) {
7233 TEST_REQUIRES_X86_FMA3;
7234 for (uint32_t channels = 1; channels < 16; channels++) {
7235 DWConvMicrokernelTester()
7236 .cr(16)
7237 .kr(4)
7238 .channels(channels)
7239 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7240 }
7241 }
7242
7243 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, c_gt_16) {
7244 TEST_REQUIRES_X86_FMA3;
7245 for (uint32_t channels = 17; channels < 32; channels++) {
7246 DWConvMicrokernelTester()
7247 .cr(16)
7248 .kr(4)
7249 .channels(channels)
7250 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7251 }
7252 }
7253
7254 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, c_gt_16_with_qmin) {
7255 TEST_REQUIRES_X86_FMA3;
7256 for (uint32_t channels = 17; channels < 32; channels++) {
7257 DWConvMicrokernelTester()
7258 .cr(16)
7259 .kr(4)
7260 .channels(channels)
7261 .qmin(128)
7262 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7263 }
7264 }
7265
7266 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, c_gt_16_with_qmax) {
7267 TEST_REQUIRES_X86_FMA3;
7268 for (uint32_t channels = 17; channels < 32; channels++) {
7269 DWConvMicrokernelTester()
7270 .cr(16)
7271 .kr(4)
7272 .channels(channels)
7273 .qmax(128)
7274 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7275 }
7276 }
7277
7278 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, multipixel) {
7279 TEST_REQUIRES_X86_FMA3;
7280 for (size_t channels = 1; channels <= 80; channels += 15) {
7281 DWConvMicrokernelTester()
7282 .cr(16)
7283 .kr(4)
7284 .channels(channels)
7285 .width(3)
7286 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7287 }
7288 }
7289
7290 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, multipixel_with_step) {
7291 TEST_REQUIRES_X86_FMA3;
7292 for (size_t channels = 1; channels <= 80; channels += 15) {
7293 for (size_t step = 2; step <= 4; step++) {
7294 DWConvMicrokernelTester()
7295 .cr(16)
7296 .kr(4)
7297 .channels(channels)
7298 .width(3)
7299 .step(step)
7300 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7301 }
7302 }
7303 }
7304
7305 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, multipixel_with_output_stride) {
7306 TEST_REQUIRES_X86_FMA3;
7307 for (size_t channels = 1; channels <= 80; channels += 15) {
7308 DWConvMicrokernelTester()
7309 .cr(16)
7310 .kr(4)
7311 .channels(16)
7312 .width(5)
7313 .output_stride(83)
7314 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7315 }
7316 }
7317
7318 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, multipixel_with_qmin) {
7319 TEST_REQUIRES_X86_FMA3;
7320 for (size_t channels = 1; channels <= 80; channels += 15) {
7321 DWConvMicrokernelTester()
7322 .cr(16)
7323 .kr(4)
7324 .channels(channels)
7325 .width(3)
7326 .qmin(128)
7327 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7328 }
7329 }
7330
7331 TEST(F32_DWCONV_UP16X4__FMA3_ACC2, multipixel_with_qmax) {
7332 TEST_REQUIRES_X86_FMA3;
7333 for (size_t channels = 1; channels <= 80; channels += 15) {
7334 DWConvMicrokernelTester()
7335 .cr(16)
7336 .kr(4)
7337 .channels(channels)
7338 .width(3)
7339 .qmax(128)
7340 .Test(xnn_f32_dwconv_ukernel_up16x4__fma3_acc2);
7341 }
7342 }
7343#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7344
7345
7346#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7347 TEST(F32_DWCONV_UP16X25__AVX512F, c_eq_16) {
7348 TEST_REQUIRES_X86_AVX512F;
7349 DWConvMicrokernelTester()
7350 .cr(16)
7351 .kr(25)
7352 .channels(16)
7353 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7354 }
7355
7356 TEST(F32_DWCONV_UP16X25__AVX512F, c_div_16) {
7357 TEST_REQUIRES_X86_AVX512F;
7358 for (uint32_t channels = 32; channels < 256; channels += 48) {
7359 DWConvMicrokernelTester()
7360 .cr(16)
7361 .kr(25)
7362 .channels(channels)
7363 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7364 }
7365 }
7366
7367 TEST(F32_DWCONV_UP16X25__AVX512F, c_div_16_with_qmin) {
7368 TEST_REQUIRES_X86_AVX512F;
7369 for (uint32_t channels = 32; channels < 256; channels += 48) {
7370 DWConvMicrokernelTester()
7371 .cr(16)
7372 .kr(25)
7373 .channels(channels)
7374 .qmin(128)
7375 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7376 }
7377 }
7378
7379 TEST(F32_DWCONV_UP16X25__AVX512F, c_div_16_with_qmax) {
7380 TEST_REQUIRES_X86_AVX512F;
7381 for (uint32_t channels = 32; channels < 256; channels += 48) {
7382 DWConvMicrokernelTester()
7383 .cr(16)
7384 .kr(25)
7385 .channels(channels)
7386 .qmax(128)
7387 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7388 }
7389 }
7390
7391 TEST(F32_DWCONV_UP16X25__AVX512F, c_lt_16) {
7392 TEST_REQUIRES_X86_AVX512F;
7393 for (uint32_t channels = 1; channels < 16; channels++) {
7394 DWConvMicrokernelTester()
7395 .cr(16)
7396 .kr(25)
7397 .channels(channels)
7398 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7399 }
7400 }
7401
7402 TEST(F32_DWCONV_UP16X25__AVX512F, c_gt_16) {
7403 TEST_REQUIRES_X86_AVX512F;
7404 for (uint32_t channels = 17; channels < 32; channels++) {
7405 DWConvMicrokernelTester()
7406 .cr(16)
7407 .kr(25)
7408 .channels(channels)
7409 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7410 }
7411 }
7412
7413 TEST(F32_DWCONV_UP16X25__AVX512F, c_gt_16_with_qmin) {
7414 TEST_REQUIRES_X86_AVX512F;
7415 for (uint32_t channels = 17; channels < 32; channels++) {
7416 DWConvMicrokernelTester()
7417 .cr(16)
7418 .kr(25)
7419 .channels(channels)
7420 .qmin(128)
7421 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7422 }
7423 }
7424
7425 TEST(F32_DWCONV_UP16X25__AVX512F, c_gt_16_with_qmax) {
7426 TEST_REQUIRES_X86_AVX512F;
7427 for (uint32_t channels = 17; channels < 32; channels++) {
7428 DWConvMicrokernelTester()
7429 .cr(16)
7430 .kr(25)
7431 .channels(channels)
7432 .qmax(128)
7433 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7434 }
7435 }
7436
7437 TEST(F32_DWCONV_UP16X25__AVX512F, multipixel) {
7438 TEST_REQUIRES_X86_AVX512F;
7439 for (size_t channels = 1; channels <= 80; channels += 15) {
7440 DWConvMicrokernelTester()
7441 .cr(16)
7442 .kr(25)
7443 .channels(channels)
7444 .width(3)
7445 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7446 }
7447 }
7448
7449 TEST(F32_DWCONV_UP16X25__AVX512F, multipixel_with_step) {
7450 TEST_REQUIRES_X86_AVX512F;
7451 for (size_t channels = 1; channels <= 80; channels += 15) {
7452 for (size_t step = 2; step <= 25; step++) {
7453 DWConvMicrokernelTester()
7454 .cr(16)
7455 .kr(25)
7456 .channels(channels)
7457 .width(3)
7458 .step(step)
7459 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7460 }
7461 }
7462 }
7463
7464 TEST(F32_DWCONV_UP16X25__AVX512F, multipixel_with_output_stride) {
7465 TEST_REQUIRES_X86_AVX512F;
7466 for (size_t channels = 1; channels <= 80; channels += 15) {
7467 DWConvMicrokernelTester()
7468 .cr(16)
7469 .kr(25)
7470 .channels(16)
7471 .width(5)
7472 .output_stride(83)
7473 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7474 }
7475 }
7476
7477 TEST(F32_DWCONV_UP16X25__AVX512F, multipixel_with_qmin) {
7478 TEST_REQUIRES_X86_AVX512F;
7479 for (size_t channels = 1; channels <= 80; channels += 15) {
7480 DWConvMicrokernelTester()
7481 .cr(16)
7482 .kr(25)
7483 .channels(channels)
7484 .width(3)
7485 .qmin(128)
7486 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7487 }
7488 }
7489
7490 TEST(F32_DWCONV_UP16X25__AVX512F, multipixel_with_qmax) {
7491 TEST_REQUIRES_X86_AVX512F;
7492 for (size_t channels = 1; channels <= 80; channels += 15) {
7493 DWConvMicrokernelTester()
7494 .cr(16)
7495 .kr(25)
7496 .channels(channels)
7497 .width(3)
7498 .qmax(128)
7499 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f);
7500 }
7501 }
7502#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7503
7504
7505#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7506 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, c_eq_16) {
7507 TEST_REQUIRES_X86_AVX512F;
7508 DWConvMicrokernelTester()
7509 .cr(16)
7510 .kr(25)
7511 .channels(16)
7512 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7513 }
7514
7515 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, c_div_16) {
7516 TEST_REQUIRES_X86_AVX512F;
7517 for (uint32_t channels = 32; channels < 256; channels += 48) {
7518 DWConvMicrokernelTester()
7519 .cr(16)
7520 .kr(25)
7521 .channels(channels)
7522 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7523 }
7524 }
7525
7526 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, c_div_16_with_qmin) {
7527 TEST_REQUIRES_X86_AVX512F;
7528 for (uint32_t channels = 32; channels < 256; channels += 48) {
7529 DWConvMicrokernelTester()
7530 .cr(16)
7531 .kr(25)
7532 .channels(channels)
7533 .qmin(128)
7534 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7535 }
7536 }
7537
7538 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, c_div_16_with_qmax) {
7539 TEST_REQUIRES_X86_AVX512F;
7540 for (uint32_t channels = 32; channels < 256; channels += 48) {
7541 DWConvMicrokernelTester()
7542 .cr(16)
7543 .kr(25)
7544 .channels(channels)
7545 .qmax(128)
7546 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7547 }
7548 }
7549
7550 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, c_lt_16) {
7551 TEST_REQUIRES_X86_AVX512F;
7552 for (uint32_t channels = 1; channels < 16; channels++) {
7553 DWConvMicrokernelTester()
7554 .cr(16)
7555 .kr(25)
7556 .channels(channels)
7557 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7558 }
7559 }
7560
7561 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, c_gt_16) {
7562 TEST_REQUIRES_X86_AVX512F;
7563 for (uint32_t channels = 17; channels < 32; channels++) {
7564 DWConvMicrokernelTester()
7565 .cr(16)
7566 .kr(25)
7567 .channels(channels)
7568 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7569 }
7570 }
7571
7572 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, c_gt_16_with_qmin) {
7573 TEST_REQUIRES_X86_AVX512F;
7574 for (uint32_t channels = 17; channels < 32; channels++) {
7575 DWConvMicrokernelTester()
7576 .cr(16)
7577 .kr(25)
7578 .channels(channels)
7579 .qmin(128)
7580 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7581 }
7582 }
7583
7584 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, c_gt_16_with_qmax) {
7585 TEST_REQUIRES_X86_AVX512F;
7586 for (uint32_t channels = 17; channels < 32; channels++) {
7587 DWConvMicrokernelTester()
7588 .cr(16)
7589 .kr(25)
7590 .channels(channels)
7591 .qmax(128)
7592 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7593 }
7594 }
7595
7596 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, multipixel) {
7597 TEST_REQUIRES_X86_AVX512F;
7598 for (size_t channels = 1; channels <= 80; channels += 15) {
7599 DWConvMicrokernelTester()
7600 .cr(16)
7601 .kr(25)
7602 .channels(channels)
7603 .width(3)
7604 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7605 }
7606 }
7607
7608 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, multipixel_with_step) {
7609 TEST_REQUIRES_X86_AVX512F;
7610 for (size_t channels = 1; channels <= 80; channels += 15) {
7611 for (size_t step = 2; step <= 25; step++) {
7612 DWConvMicrokernelTester()
7613 .cr(16)
7614 .kr(25)
7615 .channels(channels)
7616 .width(3)
7617 .step(step)
7618 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7619 }
7620 }
7621 }
7622
7623 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, multipixel_with_output_stride) {
7624 TEST_REQUIRES_X86_AVX512F;
7625 for (size_t channels = 1; channels <= 80; channels += 15) {
7626 DWConvMicrokernelTester()
7627 .cr(16)
7628 .kr(25)
7629 .channels(16)
7630 .width(5)
7631 .output_stride(83)
7632 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7633 }
7634 }
7635
7636 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, multipixel_with_qmin) {
7637 TEST_REQUIRES_X86_AVX512F;
7638 for (size_t channels = 1; channels <= 80; channels += 15) {
7639 DWConvMicrokernelTester()
7640 .cr(16)
7641 .kr(25)
7642 .channels(channels)
7643 .width(3)
7644 .qmin(128)
7645 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7646 }
7647 }
7648
7649 TEST(F32_DWCONV_UP16X25__AVX512F_ACC2, multipixel_with_qmax) {
7650 TEST_REQUIRES_X86_AVX512F;
7651 for (size_t channels = 1; channels <= 80; channels += 15) {
7652 DWConvMicrokernelTester()
7653 .cr(16)
7654 .kr(25)
7655 .channels(channels)
7656 .width(3)
7657 .qmax(128)
7658 .Test(xnn_f32_dwconv_ukernel_up16x25__avx512f_acc2);
7659 }
7660 }
7661#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7662
7663
7664#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7665 TEST(F32_DWCONV_UP32X25__AVX512F, c_eq_32) {
7666 TEST_REQUIRES_X86_AVX512F;
7667 DWConvMicrokernelTester()
7668 .cr(32)
7669 .kr(25)
7670 .channels(32)
7671 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7672 }
7673
7674 TEST(F32_DWCONV_UP32X25__AVX512F, c_div_32) {
7675 TEST_REQUIRES_X86_AVX512F;
7676 for (uint32_t channels = 64; channels < 512; channels += 96) {
7677 DWConvMicrokernelTester()
7678 .cr(32)
7679 .kr(25)
7680 .channels(channels)
7681 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7682 }
7683 }
7684
7685 TEST(F32_DWCONV_UP32X25__AVX512F, c_div_32_with_qmin) {
7686 TEST_REQUIRES_X86_AVX512F;
7687 for (uint32_t channels = 64; channels < 512; channels += 96) {
7688 DWConvMicrokernelTester()
7689 .cr(32)
7690 .kr(25)
7691 .channels(channels)
7692 .qmin(128)
7693 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7694 }
7695 }
7696
7697 TEST(F32_DWCONV_UP32X25__AVX512F, c_div_32_with_qmax) {
7698 TEST_REQUIRES_X86_AVX512F;
7699 for (uint32_t channels = 64; channels < 512; channels += 96) {
7700 DWConvMicrokernelTester()
7701 .cr(32)
7702 .kr(25)
7703 .channels(channels)
7704 .qmax(128)
7705 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7706 }
7707 }
7708
7709 TEST(F32_DWCONV_UP32X25__AVX512F, c_lt_32) {
7710 TEST_REQUIRES_X86_AVX512F;
7711 for (uint32_t channels = 1; channels < 32; channels++) {
7712 DWConvMicrokernelTester()
7713 .cr(32)
7714 .kr(25)
7715 .channels(channels)
7716 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7717 }
7718 }
7719
7720 TEST(F32_DWCONV_UP32X25__AVX512F, c_gt_32) {
7721 TEST_REQUIRES_X86_AVX512F;
7722 for (uint32_t channels = 33; channels < 64; channels++) {
7723 DWConvMicrokernelTester()
7724 .cr(32)
7725 .kr(25)
7726 .channels(channels)
7727 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7728 }
7729 }
7730
7731 TEST(F32_DWCONV_UP32X25__AVX512F, c_gt_32_with_qmin) {
7732 TEST_REQUIRES_X86_AVX512F;
7733 for (uint32_t channels = 33; channels < 64; channels++) {
7734 DWConvMicrokernelTester()
7735 .cr(32)
7736 .kr(25)
7737 .channels(channels)
7738 .qmin(128)
7739 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7740 }
7741 }
7742
7743 TEST(F32_DWCONV_UP32X25__AVX512F, c_gt_32_with_qmax) {
7744 TEST_REQUIRES_X86_AVX512F;
7745 for (uint32_t channels = 33; channels < 64; channels++) {
7746 DWConvMicrokernelTester()
7747 .cr(32)
7748 .kr(25)
7749 .channels(channels)
7750 .qmax(128)
7751 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7752 }
7753 }
7754
7755 TEST(F32_DWCONV_UP32X25__AVX512F, multipixel) {
7756 TEST_REQUIRES_X86_AVX512F;
7757 for (size_t channels = 1; channels <= 160; channels += 31) {
7758 DWConvMicrokernelTester()
7759 .cr(32)
7760 .kr(25)
7761 .channels(channels)
7762 .width(3)
7763 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7764 }
7765 }
7766
7767 TEST(F32_DWCONV_UP32X25__AVX512F, multipixel_with_step) {
7768 TEST_REQUIRES_X86_AVX512F;
7769 for (size_t channels = 1; channels <= 160; channels += 31) {
7770 for (size_t step = 2; step <= 25; step++) {
7771 DWConvMicrokernelTester()
7772 .cr(32)
7773 .kr(25)
7774 .channels(channels)
7775 .width(3)
7776 .step(step)
7777 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7778 }
7779 }
7780 }
7781
7782 TEST(F32_DWCONV_UP32X25__AVX512F, multipixel_with_output_stride) {
7783 TEST_REQUIRES_X86_AVX512F;
7784 for (size_t channels = 1; channels <= 160; channels += 31) {
7785 DWConvMicrokernelTester()
7786 .cr(32)
7787 .kr(25)
7788 .channels(32)
7789 .width(5)
7790 .output_stride(163)
7791 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7792 }
7793 }
7794
7795 TEST(F32_DWCONV_UP32X25__AVX512F, multipixel_with_qmin) {
7796 TEST_REQUIRES_X86_AVX512F;
7797 for (size_t channels = 1; channels <= 160; channels += 31) {
7798 DWConvMicrokernelTester()
7799 .cr(32)
7800 .kr(25)
7801 .channels(channels)
7802 .width(3)
7803 .qmin(128)
7804 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7805 }
7806 }
7807
7808 TEST(F32_DWCONV_UP32X25__AVX512F, multipixel_with_qmax) {
7809 TEST_REQUIRES_X86_AVX512F;
7810 for (size_t channels = 1; channels <= 160; channels += 31) {
7811 DWConvMicrokernelTester()
7812 .cr(32)
7813 .kr(25)
7814 .channels(channels)
7815 .width(3)
7816 .qmax(128)
7817 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f);
7818 }
7819 }
7820#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7821
7822
7823#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7824 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, c_eq_32) {
7825 TEST_REQUIRES_X86_AVX512F;
7826 DWConvMicrokernelTester()
7827 .cr(32)
7828 .kr(25)
7829 .channels(32)
7830 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7831 }
7832
7833 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, c_div_32) {
7834 TEST_REQUIRES_X86_AVX512F;
7835 for (uint32_t channels = 64; channels < 512; channels += 96) {
7836 DWConvMicrokernelTester()
7837 .cr(32)
7838 .kr(25)
7839 .channels(channels)
7840 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7841 }
7842 }
7843
7844 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, c_div_32_with_qmin) {
7845 TEST_REQUIRES_X86_AVX512F;
7846 for (uint32_t channels = 64; channels < 512; channels += 96) {
7847 DWConvMicrokernelTester()
7848 .cr(32)
7849 .kr(25)
7850 .channels(channels)
7851 .qmin(128)
7852 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7853 }
7854 }
7855
7856 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, c_div_32_with_qmax) {
7857 TEST_REQUIRES_X86_AVX512F;
7858 for (uint32_t channels = 64; channels < 512; channels += 96) {
7859 DWConvMicrokernelTester()
7860 .cr(32)
7861 .kr(25)
7862 .channels(channels)
7863 .qmax(128)
7864 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7865 }
7866 }
7867
7868 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, c_lt_32) {
7869 TEST_REQUIRES_X86_AVX512F;
7870 for (uint32_t channels = 1; channels < 32; channels++) {
7871 DWConvMicrokernelTester()
7872 .cr(32)
7873 .kr(25)
7874 .channels(channels)
7875 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7876 }
7877 }
7878
7879 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, c_gt_32) {
7880 TEST_REQUIRES_X86_AVX512F;
7881 for (uint32_t channels = 33; channels < 64; channels++) {
7882 DWConvMicrokernelTester()
7883 .cr(32)
7884 .kr(25)
7885 .channels(channels)
7886 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7887 }
7888 }
7889
7890 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, c_gt_32_with_qmin) {
7891 TEST_REQUIRES_X86_AVX512F;
7892 for (uint32_t channels = 33; channels < 64; channels++) {
7893 DWConvMicrokernelTester()
7894 .cr(32)
7895 .kr(25)
7896 .channels(channels)
7897 .qmin(128)
7898 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7899 }
7900 }
7901
7902 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, c_gt_32_with_qmax) {
7903 TEST_REQUIRES_X86_AVX512F;
7904 for (uint32_t channels = 33; channels < 64; channels++) {
7905 DWConvMicrokernelTester()
7906 .cr(32)
7907 .kr(25)
7908 .channels(channels)
7909 .qmax(128)
7910 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7911 }
7912 }
7913
7914 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, multipixel) {
7915 TEST_REQUIRES_X86_AVX512F;
7916 for (size_t channels = 1; channels <= 160; channels += 31) {
7917 DWConvMicrokernelTester()
7918 .cr(32)
7919 .kr(25)
7920 .channels(channels)
7921 .width(3)
7922 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7923 }
7924 }
7925
7926 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, multipixel_with_step) {
7927 TEST_REQUIRES_X86_AVX512F;
7928 for (size_t channels = 1; channels <= 160; channels += 31) {
7929 for (size_t step = 2; step <= 25; step++) {
7930 DWConvMicrokernelTester()
7931 .cr(32)
7932 .kr(25)
7933 .channels(channels)
7934 .width(3)
7935 .step(step)
7936 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7937 }
7938 }
7939 }
7940
7941 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, multipixel_with_output_stride) {
7942 TEST_REQUIRES_X86_AVX512F;
7943 for (size_t channels = 1; channels <= 160; channels += 31) {
7944 DWConvMicrokernelTester()
7945 .cr(32)
7946 .kr(25)
7947 .channels(32)
7948 .width(5)
7949 .output_stride(163)
7950 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7951 }
7952 }
7953
7954 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, multipixel_with_qmin) {
7955 TEST_REQUIRES_X86_AVX512F;
7956 for (size_t channels = 1; channels <= 160; channels += 31) {
7957 DWConvMicrokernelTester()
7958 .cr(32)
7959 .kr(25)
7960 .channels(channels)
7961 .width(3)
7962 .qmin(128)
7963 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7964 }
7965 }
7966
7967 TEST(F32_DWCONV_UP32X25__AVX512F_ACC2, multipixel_with_qmax) {
7968 TEST_REQUIRES_X86_AVX512F;
7969 for (size_t channels = 1; channels <= 160; channels += 31) {
7970 DWConvMicrokernelTester()
7971 .cr(32)
7972 .kr(25)
7973 .channels(channels)
7974 .width(3)
7975 .qmax(128)
7976 .Test(xnn_f32_dwconv_ukernel_up32x25__avx512f_acc2);
7977 }
7978 }
7979#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7980
7981
7982#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7983 TEST(F32_DWCONV_UP16X9__AVX512F, c_eq_16) {
7984 TEST_REQUIRES_X86_AVX512F;
7985 DWConvMicrokernelTester()
7986 .cr(16)
7987 .kr(9)
7988 .channels(16)
7989 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
7990 }
7991
7992 TEST(F32_DWCONV_UP16X9__AVX512F, c_div_16) {
7993 TEST_REQUIRES_X86_AVX512F;
7994 for (uint32_t channels = 32; channels < 256; channels += 48) {
7995 DWConvMicrokernelTester()
7996 .cr(16)
7997 .kr(9)
7998 .channels(channels)
7999 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8000 }
8001 }
8002
8003 TEST(F32_DWCONV_UP16X9__AVX512F, c_div_16_with_qmin) {
8004 TEST_REQUIRES_X86_AVX512F;
8005 for (uint32_t channels = 32; channels < 256; channels += 48) {
8006 DWConvMicrokernelTester()
8007 .cr(16)
8008 .kr(9)
8009 .channels(channels)
8010 .qmin(128)
8011 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8012 }
8013 }
8014
8015 TEST(F32_DWCONV_UP16X9__AVX512F, c_div_16_with_qmax) {
8016 TEST_REQUIRES_X86_AVX512F;
8017 for (uint32_t channels = 32; channels < 256; channels += 48) {
8018 DWConvMicrokernelTester()
8019 .cr(16)
8020 .kr(9)
8021 .channels(channels)
8022 .qmax(128)
8023 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8024 }
8025 }
8026
8027 TEST(F32_DWCONV_UP16X9__AVX512F, c_lt_16) {
8028 TEST_REQUIRES_X86_AVX512F;
8029 for (uint32_t channels = 1; channels < 16; channels++) {
8030 DWConvMicrokernelTester()
8031 .cr(16)
8032 .kr(9)
8033 .channels(channels)
8034 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8035 }
8036 }
8037
8038 TEST(F32_DWCONV_UP16X9__AVX512F, c_gt_16) {
8039 TEST_REQUIRES_X86_AVX512F;
8040 for (uint32_t channels = 17; channels < 32; channels++) {
8041 DWConvMicrokernelTester()
8042 .cr(16)
8043 .kr(9)
8044 .channels(channels)
8045 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8046 }
8047 }
8048
8049 TEST(F32_DWCONV_UP16X9__AVX512F, c_gt_16_with_qmin) {
8050 TEST_REQUIRES_X86_AVX512F;
8051 for (uint32_t channels = 17; channels < 32; channels++) {
8052 DWConvMicrokernelTester()
8053 .cr(16)
8054 .kr(9)
8055 .channels(channels)
8056 .qmin(128)
8057 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8058 }
8059 }
8060
8061 TEST(F32_DWCONV_UP16X9__AVX512F, c_gt_16_with_qmax) {
8062 TEST_REQUIRES_X86_AVX512F;
8063 for (uint32_t channels = 17; channels < 32; channels++) {
8064 DWConvMicrokernelTester()
8065 .cr(16)
8066 .kr(9)
8067 .channels(channels)
8068 .qmax(128)
8069 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8070 }
8071 }
8072
8073 TEST(F32_DWCONV_UP16X9__AVX512F, multipixel) {
8074 TEST_REQUIRES_X86_AVX512F;
8075 for (size_t channels = 1; channels <= 80; channels += 15) {
8076 DWConvMicrokernelTester()
8077 .cr(16)
8078 .kr(9)
8079 .channels(channels)
8080 .width(3)
8081 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8082 }
8083 }
8084
8085 TEST(F32_DWCONV_UP16X9__AVX512F, multipixel_with_step) {
8086 TEST_REQUIRES_X86_AVX512F;
8087 for (size_t channels = 1; channels <= 80; channels += 15) {
8088 for (size_t step = 2; step <= 9; step++) {
8089 DWConvMicrokernelTester()
8090 .cr(16)
8091 .kr(9)
8092 .channels(channels)
8093 .width(3)
8094 .step(step)
8095 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8096 }
8097 }
8098 }
8099
8100 TEST(F32_DWCONV_UP16X9__AVX512F, multipixel_with_output_stride) {
8101 TEST_REQUIRES_X86_AVX512F;
8102 for (size_t channels = 1; channels <= 80; channels += 15) {
8103 DWConvMicrokernelTester()
8104 .cr(16)
8105 .kr(9)
8106 .channels(16)
8107 .width(5)
8108 .output_stride(83)
8109 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8110 }
8111 }
8112
8113 TEST(F32_DWCONV_UP16X9__AVX512F, multipixel_with_qmin) {
8114 TEST_REQUIRES_X86_AVX512F;
8115 for (size_t channels = 1; channels <= 80; channels += 15) {
8116 DWConvMicrokernelTester()
8117 .cr(16)
8118 .kr(9)
8119 .channels(channels)
8120 .width(3)
8121 .qmin(128)
8122 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8123 }
8124 }
8125
8126 TEST(F32_DWCONV_UP16X9__AVX512F, multipixel_with_qmax) {
8127 TEST_REQUIRES_X86_AVX512F;
8128 for (size_t channels = 1; channels <= 80; channels += 15) {
8129 DWConvMicrokernelTester()
8130 .cr(16)
8131 .kr(9)
8132 .channels(channels)
8133 .width(3)
8134 .qmax(128)
8135 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f);
8136 }
8137 }
8138#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8139
8140
8141#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8142 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, c_eq_16) {
8143 TEST_REQUIRES_X86_AVX512F;
8144 DWConvMicrokernelTester()
8145 .cr(16)
8146 .kr(9)
8147 .channels(16)
8148 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8149 }
8150
8151 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, c_div_16) {
8152 TEST_REQUIRES_X86_AVX512F;
8153 for (uint32_t channels = 32; channels < 256; channels += 48) {
8154 DWConvMicrokernelTester()
8155 .cr(16)
8156 .kr(9)
8157 .channels(channels)
8158 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8159 }
8160 }
8161
8162 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, c_div_16_with_qmin) {
8163 TEST_REQUIRES_X86_AVX512F;
8164 for (uint32_t channels = 32; channels < 256; channels += 48) {
8165 DWConvMicrokernelTester()
8166 .cr(16)
8167 .kr(9)
8168 .channels(channels)
8169 .qmin(128)
8170 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8171 }
8172 }
8173
8174 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, c_div_16_with_qmax) {
8175 TEST_REQUIRES_X86_AVX512F;
8176 for (uint32_t channels = 32; channels < 256; channels += 48) {
8177 DWConvMicrokernelTester()
8178 .cr(16)
8179 .kr(9)
8180 .channels(channels)
8181 .qmax(128)
8182 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8183 }
8184 }
8185
8186 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, c_lt_16) {
8187 TEST_REQUIRES_X86_AVX512F;
8188 for (uint32_t channels = 1; channels < 16; channels++) {
8189 DWConvMicrokernelTester()
8190 .cr(16)
8191 .kr(9)
8192 .channels(channels)
8193 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8194 }
8195 }
8196
8197 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, c_gt_16) {
8198 TEST_REQUIRES_X86_AVX512F;
8199 for (uint32_t channels = 17; channels < 32; channels++) {
8200 DWConvMicrokernelTester()
8201 .cr(16)
8202 .kr(9)
8203 .channels(channels)
8204 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8205 }
8206 }
8207
8208 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, c_gt_16_with_qmin) {
8209 TEST_REQUIRES_X86_AVX512F;
8210 for (uint32_t channels = 17; channels < 32; channels++) {
8211 DWConvMicrokernelTester()
8212 .cr(16)
8213 .kr(9)
8214 .channels(channels)
8215 .qmin(128)
8216 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8217 }
8218 }
8219
8220 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, c_gt_16_with_qmax) {
8221 TEST_REQUIRES_X86_AVX512F;
8222 for (uint32_t channels = 17; channels < 32; channels++) {
8223 DWConvMicrokernelTester()
8224 .cr(16)
8225 .kr(9)
8226 .channels(channels)
8227 .qmax(128)
8228 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8229 }
8230 }
8231
8232 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, multipixel) {
8233 TEST_REQUIRES_X86_AVX512F;
8234 for (size_t channels = 1; channels <= 80; channels += 15) {
8235 DWConvMicrokernelTester()
8236 .cr(16)
8237 .kr(9)
8238 .channels(channels)
8239 .width(3)
8240 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8241 }
8242 }
8243
8244 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, multipixel_with_step) {
8245 TEST_REQUIRES_X86_AVX512F;
8246 for (size_t channels = 1; channels <= 80; channels += 15) {
8247 for (size_t step = 2; step <= 9; step++) {
8248 DWConvMicrokernelTester()
8249 .cr(16)
8250 .kr(9)
8251 .channels(channels)
8252 .width(3)
8253 .step(step)
8254 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8255 }
8256 }
8257 }
8258
8259 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, multipixel_with_output_stride) {
8260 TEST_REQUIRES_X86_AVX512F;
8261 for (size_t channels = 1; channels <= 80; channels += 15) {
8262 DWConvMicrokernelTester()
8263 .cr(16)
8264 .kr(9)
8265 .channels(16)
8266 .width(5)
8267 .output_stride(83)
8268 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8269 }
8270 }
8271
8272 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, multipixel_with_qmin) {
8273 TEST_REQUIRES_X86_AVX512F;
8274 for (size_t channels = 1; channels <= 80; channels += 15) {
8275 DWConvMicrokernelTester()
8276 .cr(16)
8277 .kr(9)
8278 .channels(channels)
8279 .width(3)
8280 .qmin(128)
8281 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8282 }
8283 }
8284
8285 TEST(F32_DWCONV_UP16X9__AVX512F_ACC2, multipixel_with_qmax) {
8286 TEST_REQUIRES_X86_AVX512F;
8287 for (size_t channels = 1; channels <= 80; channels += 15) {
8288 DWConvMicrokernelTester()
8289 .cr(16)
8290 .kr(9)
8291 .channels(channels)
8292 .width(3)
8293 .qmax(128)
8294 .Test(xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2);
8295 }
8296 }
8297#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8298
8299
8300#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8301 TEST(F32_DWCONV_UP32X9__AVX512F, c_eq_32) {
8302 TEST_REQUIRES_X86_AVX512F;
8303 DWConvMicrokernelTester()
8304 .cr(32)
8305 .kr(9)
8306 .channels(32)
8307 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8308 }
8309
8310 TEST(F32_DWCONV_UP32X9__AVX512F, c_div_32) {
8311 TEST_REQUIRES_X86_AVX512F;
8312 for (uint32_t channels = 64; channels < 512; channels += 96) {
8313 DWConvMicrokernelTester()
8314 .cr(32)
8315 .kr(9)
8316 .channels(channels)
8317 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8318 }
8319 }
8320
8321 TEST(F32_DWCONV_UP32X9__AVX512F, c_div_32_with_qmin) {
8322 TEST_REQUIRES_X86_AVX512F;
8323 for (uint32_t channels = 64; channels < 512; channels += 96) {
8324 DWConvMicrokernelTester()
8325 .cr(32)
8326 .kr(9)
8327 .channels(channels)
8328 .qmin(128)
8329 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8330 }
8331 }
8332
8333 TEST(F32_DWCONV_UP32X9__AVX512F, c_div_32_with_qmax) {
8334 TEST_REQUIRES_X86_AVX512F;
8335 for (uint32_t channels = 64; channels < 512; channels += 96) {
8336 DWConvMicrokernelTester()
8337 .cr(32)
8338 .kr(9)
8339 .channels(channels)
8340 .qmax(128)
8341 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8342 }
8343 }
8344
8345 TEST(F32_DWCONV_UP32X9__AVX512F, c_lt_32) {
8346 TEST_REQUIRES_X86_AVX512F;
8347 for (uint32_t channels = 1; channels < 32; channels++) {
8348 DWConvMicrokernelTester()
8349 .cr(32)
8350 .kr(9)
8351 .channels(channels)
8352 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8353 }
8354 }
8355
8356 TEST(F32_DWCONV_UP32X9__AVX512F, c_gt_32) {
8357 TEST_REQUIRES_X86_AVX512F;
8358 for (uint32_t channels = 33; channels < 64; channels++) {
8359 DWConvMicrokernelTester()
8360 .cr(32)
8361 .kr(9)
8362 .channels(channels)
8363 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8364 }
8365 }
8366
8367 TEST(F32_DWCONV_UP32X9__AVX512F, c_gt_32_with_qmin) {
8368 TEST_REQUIRES_X86_AVX512F;
8369 for (uint32_t channels = 33; channels < 64; channels++) {
8370 DWConvMicrokernelTester()
8371 .cr(32)
8372 .kr(9)
8373 .channels(channels)
8374 .qmin(128)
8375 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8376 }
8377 }
8378
8379 TEST(F32_DWCONV_UP32X9__AVX512F, c_gt_32_with_qmax) {
8380 TEST_REQUIRES_X86_AVX512F;
8381 for (uint32_t channels = 33; channels < 64; channels++) {
8382 DWConvMicrokernelTester()
8383 .cr(32)
8384 .kr(9)
8385 .channels(channels)
8386 .qmax(128)
8387 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8388 }
8389 }
8390
8391 TEST(F32_DWCONV_UP32X9__AVX512F, multipixel) {
8392 TEST_REQUIRES_X86_AVX512F;
8393 for (size_t channels = 1; channels <= 160; channels += 31) {
8394 DWConvMicrokernelTester()
8395 .cr(32)
8396 .kr(9)
8397 .channels(channels)
8398 .width(3)
8399 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8400 }
8401 }
8402
8403 TEST(F32_DWCONV_UP32X9__AVX512F, multipixel_with_step) {
8404 TEST_REQUIRES_X86_AVX512F;
8405 for (size_t channels = 1; channels <= 160; channels += 31) {
8406 for (size_t step = 2; step <= 9; step++) {
8407 DWConvMicrokernelTester()
8408 .cr(32)
8409 .kr(9)
8410 .channels(channels)
8411 .width(3)
8412 .step(step)
8413 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8414 }
8415 }
8416 }
8417
8418 TEST(F32_DWCONV_UP32X9__AVX512F, multipixel_with_output_stride) {
8419 TEST_REQUIRES_X86_AVX512F;
8420 for (size_t channels = 1; channels <= 160; channels += 31) {
8421 DWConvMicrokernelTester()
8422 .cr(32)
8423 .kr(9)
8424 .channels(32)
8425 .width(5)
8426 .output_stride(163)
8427 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8428 }
8429 }
8430
8431 TEST(F32_DWCONV_UP32X9__AVX512F, multipixel_with_qmin) {
8432 TEST_REQUIRES_X86_AVX512F;
8433 for (size_t channels = 1; channels <= 160; channels += 31) {
8434 DWConvMicrokernelTester()
8435 .cr(32)
8436 .kr(9)
8437 .channels(channels)
8438 .width(3)
8439 .qmin(128)
8440 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8441 }
8442 }
8443
8444 TEST(F32_DWCONV_UP32X9__AVX512F, multipixel_with_qmax) {
8445 TEST_REQUIRES_X86_AVX512F;
8446 for (size_t channels = 1; channels <= 160; channels += 31) {
8447 DWConvMicrokernelTester()
8448 .cr(32)
8449 .kr(9)
8450 .channels(channels)
8451 .width(3)
8452 .qmax(128)
8453 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f);
8454 }
8455 }
8456#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8457
8458
8459#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8460 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, c_eq_32) {
8461 TEST_REQUIRES_X86_AVX512F;
8462 DWConvMicrokernelTester()
8463 .cr(32)
8464 .kr(9)
8465 .channels(32)
8466 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8467 }
8468
8469 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, c_div_32) {
8470 TEST_REQUIRES_X86_AVX512F;
8471 for (uint32_t channels = 64; channels < 512; channels += 96) {
8472 DWConvMicrokernelTester()
8473 .cr(32)
8474 .kr(9)
8475 .channels(channels)
8476 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8477 }
8478 }
8479
8480 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, c_div_32_with_qmin) {
8481 TEST_REQUIRES_X86_AVX512F;
8482 for (uint32_t channels = 64; channels < 512; channels += 96) {
8483 DWConvMicrokernelTester()
8484 .cr(32)
8485 .kr(9)
8486 .channels(channels)
8487 .qmin(128)
8488 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8489 }
8490 }
8491
8492 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, c_div_32_with_qmax) {
8493 TEST_REQUIRES_X86_AVX512F;
8494 for (uint32_t channels = 64; channels < 512; channels += 96) {
8495 DWConvMicrokernelTester()
8496 .cr(32)
8497 .kr(9)
8498 .channels(channels)
8499 .qmax(128)
8500 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8501 }
8502 }
8503
8504 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, c_lt_32) {
8505 TEST_REQUIRES_X86_AVX512F;
8506 for (uint32_t channels = 1; channels < 32; channels++) {
8507 DWConvMicrokernelTester()
8508 .cr(32)
8509 .kr(9)
8510 .channels(channels)
8511 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8512 }
8513 }
8514
8515 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, c_gt_32) {
8516 TEST_REQUIRES_X86_AVX512F;
8517 for (uint32_t channels = 33; channels < 64; channels++) {
8518 DWConvMicrokernelTester()
8519 .cr(32)
8520 .kr(9)
8521 .channels(channels)
8522 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8523 }
8524 }
8525
8526 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, c_gt_32_with_qmin) {
8527 TEST_REQUIRES_X86_AVX512F;
8528 for (uint32_t channels = 33; channels < 64; channels++) {
8529 DWConvMicrokernelTester()
8530 .cr(32)
8531 .kr(9)
8532 .channels(channels)
8533 .qmin(128)
8534 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8535 }
8536 }
8537
8538 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, c_gt_32_with_qmax) {
8539 TEST_REQUIRES_X86_AVX512F;
8540 for (uint32_t channels = 33; channels < 64; channels++) {
8541 DWConvMicrokernelTester()
8542 .cr(32)
8543 .kr(9)
8544 .channels(channels)
8545 .qmax(128)
8546 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8547 }
8548 }
8549
8550 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, multipixel) {
8551 TEST_REQUIRES_X86_AVX512F;
8552 for (size_t channels = 1; channels <= 160; channels += 31) {
8553 DWConvMicrokernelTester()
8554 .cr(32)
8555 .kr(9)
8556 .channels(channels)
8557 .width(3)
8558 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8559 }
8560 }
8561
8562 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, multipixel_with_step) {
8563 TEST_REQUIRES_X86_AVX512F;
8564 for (size_t channels = 1; channels <= 160; channels += 31) {
8565 for (size_t step = 2; step <= 9; step++) {
8566 DWConvMicrokernelTester()
8567 .cr(32)
8568 .kr(9)
8569 .channels(channels)
8570 .width(3)
8571 .step(step)
8572 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8573 }
8574 }
8575 }
8576
8577 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, multipixel_with_output_stride) {
8578 TEST_REQUIRES_X86_AVX512F;
8579 for (size_t channels = 1; channels <= 160; channels += 31) {
8580 DWConvMicrokernelTester()
8581 .cr(32)
8582 .kr(9)
8583 .channels(32)
8584 .width(5)
8585 .output_stride(163)
8586 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8587 }
8588 }
8589
8590 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, multipixel_with_qmin) {
8591 TEST_REQUIRES_X86_AVX512F;
8592 for (size_t channels = 1; channels <= 160; channels += 31) {
8593 DWConvMicrokernelTester()
8594 .cr(32)
8595 .kr(9)
8596 .channels(channels)
8597 .width(3)
8598 .qmin(128)
8599 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8600 }
8601 }
8602
8603 TEST(F32_DWCONV_UP32X9__AVX512F_ACC2, multipixel_with_qmax) {
8604 TEST_REQUIRES_X86_AVX512F;
8605 for (size_t channels = 1; channels <= 160; channels += 31) {
8606 DWConvMicrokernelTester()
8607 .cr(32)
8608 .kr(9)
8609 .channels(channels)
8610 .width(3)
8611 .qmax(128)
8612 .Test(xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2);
8613 }
8614 }
8615#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8616
8617
8618#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8619 TEST(F32_DWCONV_UP16X4__AVX512F, c_eq_16) {
8620 TEST_REQUIRES_X86_AVX512F;
8621 DWConvMicrokernelTester()
8622 .cr(16)
8623 .kr(4)
8624 .channels(16)
8625 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8626 }
8627
8628 TEST(F32_DWCONV_UP16X4__AVX512F, c_div_16) {
8629 TEST_REQUIRES_X86_AVX512F;
8630 for (uint32_t channels = 32; channels < 256; channels += 48) {
8631 DWConvMicrokernelTester()
8632 .cr(16)
8633 .kr(4)
8634 .channels(channels)
8635 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8636 }
8637 }
8638
8639 TEST(F32_DWCONV_UP16X4__AVX512F, c_div_16_with_qmin) {
8640 TEST_REQUIRES_X86_AVX512F;
8641 for (uint32_t channels = 32; channels < 256; channels += 48) {
8642 DWConvMicrokernelTester()
8643 .cr(16)
8644 .kr(4)
8645 .channels(channels)
8646 .qmin(128)
8647 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8648 }
8649 }
8650
8651 TEST(F32_DWCONV_UP16X4__AVX512F, c_div_16_with_qmax) {
8652 TEST_REQUIRES_X86_AVX512F;
8653 for (uint32_t channels = 32; channels < 256; channels += 48) {
8654 DWConvMicrokernelTester()
8655 .cr(16)
8656 .kr(4)
8657 .channels(channels)
8658 .qmax(128)
8659 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8660 }
8661 }
8662
8663 TEST(F32_DWCONV_UP16X4__AVX512F, c_lt_16) {
8664 TEST_REQUIRES_X86_AVX512F;
8665 for (uint32_t channels = 1; channels < 16; channels++) {
8666 DWConvMicrokernelTester()
8667 .cr(16)
8668 .kr(4)
8669 .channels(channels)
8670 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8671 }
8672 }
8673
8674 TEST(F32_DWCONV_UP16X4__AVX512F, c_gt_16) {
8675 TEST_REQUIRES_X86_AVX512F;
8676 for (uint32_t channels = 17; channels < 32; channels++) {
8677 DWConvMicrokernelTester()
8678 .cr(16)
8679 .kr(4)
8680 .channels(channels)
8681 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8682 }
8683 }
8684
8685 TEST(F32_DWCONV_UP16X4__AVX512F, c_gt_16_with_qmin) {
8686 TEST_REQUIRES_X86_AVX512F;
8687 for (uint32_t channels = 17; channels < 32; channels++) {
8688 DWConvMicrokernelTester()
8689 .cr(16)
8690 .kr(4)
8691 .channels(channels)
8692 .qmin(128)
8693 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8694 }
8695 }
8696
8697 TEST(F32_DWCONV_UP16X4__AVX512F, c_gt_16_with_qmax) {
8698 TEST_REQUIRES_X86_AVX512F;
8699 for (uint32_t channels = 17; channels < 32; channels++) {
8700 DWConvMicrokernelTester()
8701 .cr(16)
8702 .kr(4)
8703 .channels(channels)
8704 .qmax(128)
8705 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8706 }
8707 }
8708
8709 TEST(F32_DWCONV_UP16X4__AVX512F, multipixel) {
8710 TEST_REQUIRES_X86_AVX512F;
8711 for (size_t channels = 1; channels <= 80; channels += 15) {
8712 DWConvMicrokernelTester()
8713 .cr(16)
8714 .kr(4)
8715 .channels(channels)
8716 .width(3)
8717 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8718 }
8719 }
8720
8721 TEST(F32_DWCONV_UP16X4__AVX512F, multipixel_with_step) {
8722 TEST_REQUIRES_X86_AVX512F;
8723 for (size_t channels = 1; channels <= 80; channels += 15) {
8724 for (size_t step = 2; step <= 4; step++) {
8725 DWConvMicrokernelTester()
8726 .cr(16)
8727 .kr(4)
8728 .channels(channels)
8729 .width(3)
8730 .step(step)
8731 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8732 }
8733 }
8734 }
8735
8736 TEST(F32_DWCONV_UP16X4__AVX512F, multipixel_with_output_stride) {
8737 TEST_REQUIRES_X86_AVX512F;
8738 for (size_t channels = 1; channels <= 80; channels += 15) {
8739 DWConvMicrokernelTester()
8740 .cr(16)
8741 .kr(4)
8742 .channels(16)
8743 .width(5)
8744 .output_stride(83)
8745 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8746 }
8747 }
8748
8749 TEST(F32_DWCONV_UP16X4__AVX512F, multipixel_with_qmin) {
8750 TEST_REQUIRES_X86_AVX512F;
8751 for (size_t channels = 1; channels <= 80; channels += 15) {
8752 DWConvMicrokernelTester()
8753 .cr(16)
8754 .kr(4)
8755 .channels(channels)
8756 .width(3)
8757 .qmin(128)
8758 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8759 }
8760 }
8761
8762 TEST(F32_DWCONV_UP16X4__AVX512F, multipixel_with_qmax) {
8763 TEST_REQUIRES_X86_AVX512F;
8764 for (size_t channels = 1; channels <= 80; channels += 15) {
8765 DWConvMicrokernelTester()
8766 .cr(16)
8767 .kr(4)
8768 .channels(channels)
8769 .width(3)
8770 .qmax(128)
8771 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f);
8772 }
8773 }
8774#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8775
8776
8777#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8778 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, c_eq_16) {
8779 TEST_REQUIRES_X86_AVX512F;
8780 DWConvMicrokernelTester()
8781 .cr(16)
8782 .kr(4)
8783 .channels(16)
8784 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8785 }
8786
8787 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, c_div_16) {
8788 TEST_REQUIRES_X86_AVX512F;
8789 for (uint32_t channels = 32; channels < 256; channels += 48) {
8790 DWConvMicrokernelTester()
8791 .cr(16)
8792 .kr(4)
8793 .channels(channels)
8794 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8795 }
8796 }
8797
8798 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, c_div_16_with_qmin) {
8799 TEST_REQUIRES_X86_AVX512F;
8800 for (uint32_t channels = 32; channels < 256; channels += 48) {
8801 DWConvMicrokernelTester()
8802 .cr(16)
8803 .kr(4)
8804 .channels(channels)
8805 .qmin(128)
8806 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8807 }
8808 }
8809
8810 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, c_div_16_with_qmax) {
8811 TEST_REQUIRES_X86_AVX512F;
8812 for (uint32_t channels = 32; channels < 256; channels += 48) {
8813 DWConvMicrokernelTester()
8814 .cr(16)
8815 .kr(4)
8816 .channels(channels)
8817 .qmax(128)
8818 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8819 }
8820 }
8821
8822 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, c_lt_16) {
8823 TEST_REQUIRES_X86_AVX512F;
8824 for (uint32_t channels = 1; channels < 16; channels++) {
8825 DWConvMicrokernelTester()
8826 .cr(16)
8827 .kr(4)
8828 .channels(channels)
8829 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8830 }
8831 }
8832
8833 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, c_gt_16) {
8834 TEST_REQUIRES_X86_AVX512F;
8835 for (uint32_t channels = 17; channels < 32; channels++) {
8836 DWConvMicrokernelTester()
8837 .cr(16)
8838 .kr(4)
8839 .channels(channels)
8840 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8841 }
8842 }
8843
8844 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, c_gt_16_with_qmin) {
8845 TEST_REQUIRES_X86_AVX512F;
8846 for (uint32_t channels = 17; channels < 32; channels++) {
8847 DWConvMicrokernelTester()
8848 .cr(16)
8849 .kr(4)
8850 .channels(channels)
8851 .qmin(128)
8852 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8853 }
8854 }
8855
8856 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, c_gt_16_with_qmax) {
8857 TEST_REQUIRES_X86_AVX512F;
8858 for (uint32_t channels = 17; channels < 32; channels++) {
8859 DWConvMicrokernelTester()
8860 .cr(16)
8861 .kr(4)
8862 .channels(channels)
8863 .qmax(128)
8864 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8865 }
8866 }
8867
8868 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, multipixel) {
8869 TEST_REQUIRES_X86_AVX512F;
8870 for (size_t channels = 1; channels <= 80; channels += 15) {
8871 DWConvMicrokernelTester()
8872 .cr(16)
8873 .kr(4)
8874 .channels(channels)
8875 .width(3)
8876 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8877 }
8878 }
8879
8880 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, multipixel_with_step) {
8881 TEST_REQUIRES_X86_AVX512F;
8882 for (size_t channels = 1; channels <= 80; channels += 15) {
8883 for (size_t step = 2; step <= 4; step++) {
8884 DWConvMicrokernelTester()
8885 .cr(16)
8886 .kr(4)
8887 .channels(channels)
8888 .width(3)
8889 .step(step)
8890 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8891 }
8892 }
8893 }
8894
8895 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, multipixel_with_output_stride) {
8896 TEST_REQUIRES_X86_AVX512F;
8897 for (size_t channels = 1; channels <= 80; channels += 15) {
8898 DWConvMicrokernelTester()
8899 .cr(16)
8900 .kr(4)
8901 .channels(16)
8902 .width(5)
8903 .output_stride(83)
8904 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8905 }
8906 }
8907
8908 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, multipixel_with_qmin) {
8909 TEST_REQUIRES_X86_AVX512F;
8910 for (size_t channels = 1; channels <= 80; channels += 15) {
8911 DWConvMicrokernelTester()
8912 .cr(16)
8913 .kr(4)
8914 .channels(channels)
8915 .width(3)
8916 .qmin(128)
8917 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8918 }
8919 }
8920
8921 TEST(F32_DWCONV_UP16X4__AVX512F_ACC2, multipixel_with_qmax) {
8922 TEST_REQUIRES_X86_AVX512F;
8923 for (size_t channels = 1; channels <= 80; channels += 15) {
8924 DWConvMicrokernelTester()
8925 .cr(16)
8926 .kr(4)
8927 .channels(channels)
8928 .width(3)
8929 .qmax(128)
8930 .Test(xnn_f32_dwconv_ukernel_up16x4__avx512f_acc2);
8931 }
8932 }
8933#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8934
8935
8936#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8937 TEST(F32_DWCONV_UP32X4__AVX512F, c_eq_32) {
8938 TEST_REQUIRES_X86_AVX512F;
8939 DWConvMicrokernelTester()
8940 .cr(32)
8941 .kr(4)
8942 .channels(32)
8943 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
8944 }
8945
8946 TEST(F32_DWCONV_UP32X4__AVX512F, c_div_32) {
8947 TEST_REQUIRES_X86_AVX512F;
8948 for (uint32_t channels = 64; channels < 512; channels += 96) {
8949 DWConvMicrokernelTester()
8950 .cr(32)
8951 .kr(4)
8952 .channels(channels)
8953 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
8954 }
8955 }
8956
8957 TEST(F32_DWCONV_UP32X4__AVX512F, c_div_32_with_qmin) {
8958 TEST_REQUIRES_X86_AVX512F;
8959 for (uint32_t channels = 64; channels < 512; channels += 96) {
8960 DWConvMicrokernelTester()
8961 .cr(32)
8962 .kr(4)
8963 .channels(channels)
8964 .qmin(128)
8965 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
8966 }
8967 }
8968
8969 TEST(F32_DWCONV_UP32X4__AVX512F, c_div_32_with_qmax) {
8970 TEST_REQUIRES_X86_AVX512F;
8971 for (uint32_t channels = 64; channels < 512; channels += 96) {
8972 DWConvMicrokernelTester()
8973 .cr(32)
8974 .kr(4)
8975 .channels(channels)
8976 .qmax(128)
8977 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
8978 }
8979 }
8980
8981 TEST(F32_DWCONV_UP32X4__AVX512F, c_lt_32) {
8982 TEST_REQUIRES_X86_AVX512F;
8983 for (uint32_t channels = 1; channels < 32; channels++) {
8984 DWConvMicrokernelTester()
8985 .cr(32)
8986 .kr(4)
8987 .channels(channels)
8988 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
8989 }
8990 }
8991
8992 TEST(F32_DWCONV_UP32X4__AVX512F, c_gt_32) {
8993 TEST_REQUIRES_X86_AVX512F;
8994 for (uint32_t channels = 33; channels < 64; channels++) {
8995 DWConvMicrokernelTester()
8996 .cr(32)
8997 .kr(4)
8998 .channels(channels)
8999 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
9000 }
9001 }
9002
9003 TEST(F32_DWCONV_UP32X4__AVX512F, c_gt_32_with_qmin) {
9004 TEST_REQUIRES_X86_AVX512F;
9005 for (uint32_t channels = 33; channels < 64; channels++) {
9006 DWConvMicrokernelTester()
9007 .cr(32)
9008 .kr(4)
9009 .channels(channels)
9010 .qmin(128)
9011 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
9012 }
9013 }
9014
9015 TEST(F32_DWCONV_UP32X4__AVX512F, c_gt_32_with_qmax) {
9016 TEST_REQUIRES_X86_AVX512F;
9017 for (uint32_t channels = 33; channels < 64; channels++) {
9018 DWConvMicrokernelTester()
9019 .cr(32)
9020 .kr(4)
9021 .channels(channels)
9022 .qmax(128)
9023 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
9024 }
9025 }
9026
9027 TEST(F32_DWCONV_UP32X4__AVX512F, multipixel) {
9028 TEST_REQUIRES_X86_AVX512F;
9029 for (size_t channels = 1; channels <= 160; channels += 31) {
9030 DWConvMicrokernelTester()
9031 .cr(32)
9032 .kr(4)
9033 .channels(channels)
9034 .width(3)
9035 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
9036 }
9037 }
9038
9039 TEST(F32_DWCONV_UP32X4__AVX512F, multipixel_with_step) {
9040 TEST_REQUIRES_X86_AVX512F;
9041 for (size_t channels = 1; channels <= 160; channels += 31) {
9042 for (size_t step = 2; step <= 4; step++) {
9043 DWConvMicrokernelTester()
9044 .cr(32)
9045 .kr(4)
9046 .channels(channels)
9047 .width(3)
9048 .step(step)
9049 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
9050 }
9051 }
9052 }
9053
9054 TEST(F32_DWCONV_UP32X4__AVX512F, multipixel_with_output_stride) {
9055 TEST_REQUIRES_X86_AVX512F;
9056 for (size_t channels = 1; channels <= 160; channels += 31) {
9057 DWConvMicrokernelTester()
9058 .cr(32)
9059 .kr(4)
9060 .channels(32)
9061 .width(5)
9062 .output_stride(163)
9063 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
9064 }
9065 }
9066
9067 TEST(F32_DWCONV_UP32X4__AVX512F, multipixel_with_qmin) {
9068 TEST_REQUIRES_X86_AVX512F;
9069 for (size_t channels = 1; channels <= 160; channels += 31) {
9070 DWConvMicrokernelTester()
9071 .cr(32)
9072 .kr(4)
9073 .channels(channels)
9074 .width(3)
9075 .qmin(128)
9076 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
9077 }
9078 }
9079
9080 TEST(F32_DWCONV_UP32X4__AVX512F, multipixel_with_qmax) {
9081 TEST_REQUIRES_X86_AVX512F;
9082 for (size_t channels = 1; channels <= 160; channels += 31) {
9083 DWConvMicrokernelTester()
9084 .cr(32)
9085 .kr(4)
9086 .channels(channels)
9087 .width(3)
9088 .qmax(128)
9089 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f);
9090 }
9091 }
9092#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9093
9094
9095#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9096 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, c_eq_32) {
9097 TEST_REQUIRES_X86_AVX512F;
9098 DWConvMicrokernelTester()
9099 .cr(32)
9100 .kr(4)
9101 .channels(32)
9102 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9103 }
9104
9105 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, c_div_32) {
9106 TEST_REQUIRES_X86_AVX512F;
9107 for (uint32_t channels = 64; channels < 512; channels += 96) {
9108 DWConvMicrokernelTester()
9109 .cr(32)
9110 .kr(4)
9111 .channels(channels)
9112 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9113 }
9114 }
9115
9116 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, c_div_32_with_qmin) {
9117 TEST_REQUIRES_X86_AVX512F;
9118 for (uint32_t channels = 64; channels < 512; channels += 96) {
9119 DWConvMicrokernelTester()
9120 .cr(32)
9121 .kr(4)
9122 .channels(channels)
9123 .qmin(128)
9124 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9125 }
9126 }
9127
9128 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, c_div_32_with_qmax) {
9129 TEST_REQUIRES_X86_AVX512F;
9130 for (uint32_t channels = 64; channels < 512; channels += 96) {
9131 DWConvMicrokernelTester()
9132 .cr(32)
9133 .kr(4)
9134 .channels(channels)
9135 .qmax(128)
9136 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9137 }
9138 }
9139
9140 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, c_lt_32) {
9141 TEST_REQUIRES_X86_AVX512F;
9142 for (uint32_t channels = 1; channels < 32; channels++) {
9143 DWConvMicrokernelTester()
9144 .cr(32)
9145 .kr(4)
9146 .channels(channels)
9147 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9148 }
9149 }
9150
9151 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, c_gt_32) {
9152 TEST_REQUIRES_X86_AVX512F;
9153 for (uint32_t channels = 33; channels < 64; channels++) {
9154 DWConvMicrokernelTester()
9155 .cr(32)
9156 .kr(4)
9157 .channels(channels)
9158 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9159 }
9160 }
9161
9162 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, c_gt_32_with_qmin) {
9163 TEST_REQUIRES_X86_AVX512F;
9164 for (uint32_t channels = 33; channels < 64; channels++) {
9165 DWConvMicrokernelTester()
9166 .cr(32)
9167 .kr(4)
9168 .channels(channels)
9169 .qmin(128)
9170 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9171 }
9172 }
9173
9174 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, c_gt_32_with_qmax) {
9175 TEST_REQUIRES_X86_AVX512F;
9176 for (uint32_t channels = 33; channels < 64; channels++) {
9177 DWConvMicrokernelTester()
9178 .cr(32)
9179 .kr(4)
9180 .channels(channels)
9181 .qmax(128)
9182 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9183 }
9184 }
9185
9186 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, multipixel) {
9187 TEST_REQUIRES_X86_AVX512F;
9188 for (size_t channels = 1; channels <= 160; channels += 31) {
9189 DWConvMicrokernelTester()
9190 .cr(32)
9191 .kr(4)
9192 .channels(channels)
9193 .width(3)
9194 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9195 }
9196 }
9197
9198 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, multipixel_with_step) {
9199 TEST_REQUIRES_X86_AVX512F;
9200 for (size_t channels = 1; channels <= 160; channels += 31) {
9201 for (size_t step = 2; step <= 4; step++) {
9202 DWConvMicrokernelTester()
9203 .cr(32)
9204 .kr(4)
9205 .channels(channels)
9206 .width(3)
9207 .step(step)
9208 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9209 }
9210 }
9211 }
9212
9213 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, multipixel_with_output_stride) {
9214 TEST_REQUIRES_X86_AVX512F;
9215 for (size_t channels = 1; channels <= 160; channels += 31) {
9216 DWConvMicrokernelTester()
9217 .cr(32)
9218 .kr(4)
9219 .channels(32)
9220 .width(5)
9221 .output_stride(163)
9222 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9223 }
9224 }
9225
9226 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, multipixel_with_qmin) {
9227 TEST_REQUIRES_X86_AVX512F;
9228 for (size_t channels = 1; channels <= 160; channels += 31) {
9229 DWConvMicrokernelTester()
9230 .cr(32)
9231 .kr(4)
9232 .channels(channels)
9233 .width(3)
9234 .qmin(128)
9235 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9236 }
9237 }
9238
9239 TEST(F32_DWCONV_UP32X4__AVX512F_ACC2, multipixel_with_qmax) {
9240 TEST_REQUIRES_X86_AVX512F;
9241 for (size_t channels = 1; channels <= 160; channels += 31) {
9242 DWConvMicrokernelTester()
9243 .cr(32)
9244 .kr(4)
9245 .channels(channels)
9246 .width(3)
9247 .qmax(128)
9248 .Test(xnn_f32_dwconv_ukernel_up32x4__avx512f_acc2);
9249 }
9250 }
9251#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9252
9253
9254#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
9255 TEST(F32_DWCONV_UP4X25__PSIMD, c_eq_4) {
9256 TEST_REQUIRES_PSIMD;
9257 DWConvMicrokernelTester()
9258 .cr(4)
9259 .kr(25)
9260 .channels(4)
9261 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9262 }
9263
9264 TEST(F32_DWCONV_UP4X25__PSIMD, c_div_4) {
9265 TEST_REQUIRES_PSIMD;
9266 for (uint32_t channels = 8; channels < 64; channels += 12) {
9267 DWConvMicrokernelTester()
9268 .cr(4)
9269 .kr(25)
9270 .channels(channels)
9271 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9272 }
9273 }
9274
9275 TEST(F32_DWCONV_UP4X25__PSIMD, c_div_4_with_qmin) {
9276 TEST_REQUIRES_PSIMD;
9277 for (uint32_t channels = 8; channels < 64; channels += 12) {
9278 DWConvMicrokernelTester()
9279 .cr(4)
9280 .kr(25)
9281 .channels(channels)
9282 .qmin(128)
9283 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9284 }
9285 }
9286
9287 TEST(F32_DWCONV_UP4X25__PSIMD, c_div_4_with_qmax) {
9288 TEST_REQUIRES_PSIMD;
9289 for (uint32_t channels = 8; channels < 64; channels += 12) {
9290 DWConvMicrokernelTester()
9291 .cr(4)
9292 .kr(25)
9293 .channels(channels)
9294 .qmax(128)
9295 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9296 }
9297 }
9298
9299 TEST(F32_DWCONV_UP4X25__PSIMD, c_lt_4) {
9300 TEST_REQUIRES_PSIMD;
9301 for (uint32_t channels = 1; channels < 4; channels++) {
9302 DWConvMicrokernelTester()
9303 .cr(4)
9304 .kr(25)
9305 .channels(channels)
9306 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9307 }
9308 }
9309
9310 TEST(F32_DWCONV_UP4X25__PSIMD, c_gt_4) {
9311 TEST_REQUIRES_PSIMD;
9312 for (uint32_t channels = 5; channels < 8; channels++) {
9313 DWConvMicrokernelTester()
9314 .cr(4)
9315 .kr(25)
9316 .channels(channels)
9317 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9318 }
9319 }
9320
9321 TEST(F32_DWCONV_UP4X25__PSIMD, c_gt_4_with_qmin) {
9322 TEST_REQUIRES_PSIMD;
9323 for (uint32_t channels = 5; channels < 8; channels++) {
9324 DWConvMicrokernelTester()
9325 .cr(4)
9326 .kr(25)
9327 .channels(channels)
9328 .qmin(128)
9329 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9330 }
9331 }
9332
9333 TEST(F32_DWCONV_UP4X25__PSIMD, c_gt_4_with_qmax) {
9334 TEST_REQUIRES_PSIMD;
9335 for (uint32_t channels = 5; channels < 8; channels++) {
9336 DWConvMicrokernelTester()
9337 .cr(4)
9338 .kr(25)
9339 .channels(channels)
9340 .qmax(128)
9341 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9342 }
9343 }
9344
9345 TEST(F32_DWCONV_UP4X25__PSIMD, multipixel) {
9346 TEST_REQUIRES_PSIMD;
9347 for (size_t channels = 1; channels <= 20; channels += 3) {
9348 DWConvMicrokernelTester()
9349 .cr(4)
9350 .kr(25)
9351 .channels(channels)
9352 .width(3)
9353 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9354 }
9355 }
9356
9357 TEST(F32_DWCONV_UP4X25__PSIMD, multipixel_with_step) {
9358 TEST_REQUIRES_PSIMD;
9359 for (size_t channels = 1; channels <= 20; channels += 3) {
9360 for (size_t step = 2; step <= 25; step++) {
9361 DWConvMicrokernelTester()
9362 .cr(4)
9363 .kr(25)
9364 .channels(channels)
9365 .width(3)
9366 .step(step)
9367 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9368 }
9369 }
9370 }
9371
9372 TEST(F32_DWCONV_UP4X25__PSIMD, multipixel_with_output_stride) {
9373 TEST_REQUIRES_PSIMD;
9374 for (size_t channels = 1; channels <= 20; channels += 3) {
9375 DWConvMicrokernelTester()
9376 .cr(4)
9377 .kr(25)
9378 .channels(4)
9379 .width(5)
9380 .output_stride(23)
9381 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9382 }
9383 }
9384
9385 TEST(F32_DWCONV_UP4X25__PSIMD, multipixel_with_qmin) {
9386 TEST_REQUIRES_PSIMD;
9387 for (size_t channels = 1; channels <= 20; channels += 3) {
9388 DWConvMicrokernelTester()
9389 .cr(4)
9390 .kr(25)
9391 .channels(channels)
9392 .width(3)
9393 .qmin(128)
9394 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9395 }
9396 }
9397
9398 TEST(F32_DWCONV_UP4X25__PSIMD, multipixel_with_qmax) {
9399 TEST_REQUIRES_PSIMD;
9400 for (size_t channels = 1; channels <= 20; channels += 3) {
9401 DWConvMicrokernelTester()
9402 .cr(4)
9403 .kr(25)
9404 .channels(channels)
9405 .width(3)
9406 .qmax(128)
9407 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9408 }
9409 }
9410#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
9411
9412
9413#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
9414 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, c_eq_4) {
9415 TEST_REQUIRES_PSIMD;
9416 DWConvMicrokernelTester()
9417 .cr(4)
9418 .kr(25)
9419 .channels(4)
9420 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9421 }
9422
9423 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, c_div_4) {
9424 TEST_REQUIRES_PSIMD;
9425 for (uint32_t channels = 8; channels < 64; channels += 12) {
9426 DWConvMicrokernelTester()
9427 .cr(4)
9428 .kr(25)
9429 .channels(channels)
9430 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9431 }
9432 }
9433
9434 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, c_div_4_with_qmin) {
9435 TEST_REQUIRES_PSIMD;
9436 for (uint32_t channels = 8; channels < 64; channels += 12) {
9437 DWConvMicrokernelTester()
9438 .cr(4)
9439 .kr(25)
9440 .channels(channels)
9441 .qmin(128)
9442 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9443 }
9444 }
9445
9446 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, c_div_4_with_qmax) {
9447 TEST_REQUIRES_PSIMD;
9448 for (uint32_t channels = 8; channels < 64; channels += 12) {
9449 DWConvMicrokernelTester()
9450 .cr(4)
9451 .kr(25)
9452 .channels(channels)
9453 .qmax(128)
9454 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9455 }
9456 }
9457
9458 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, c_lt_4) {
9459 TEST_REQUIRES_PSIMD;
9460 for (uint32_t channels = 1; channels < 4; channels++) {
9461 DWConvMicrokernelTester()
9462 .cr(4)
9463 .kr(25)
9464 .channels(channels)
9465 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9466 }
9467 }
9468
9469 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, c_gt_4) {
9470 TEST_REQUIRES_PSIMD;
9471 for (uint32_t channels = 5; channels < 8; channels++) {
9472 DWConvMicrokernelTester()
9473 .cr(4)
9474 .kr(25)
9475 .channels(channels)
9476 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9477 }
9478 }
9479
9480 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, c_gt_4_with_qmin) {
9481 TEST_REQUIRES_PSIMD;
9482 for (uint32_t channels = 5; channels < 8; channels++) {
9483 DWConvMicrokernelTester()
9484 .cr(4)
9485 .kr(25)
9486 .channels(channels)
9487 .qmin(128)
9488 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9489 }
9490 }
9491
9492 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, c_gt_4_with_qmax) {
9493 TEST_REQUIRES_PSIMD;
9494 for (uint32_t channels = 5; channels < 8; channels++) {
9495 DWConvMicrokernelTester()
9496 .cr(4)
9497 .kr(25)
9498 .channels(channels)
9499 .qmax(128)
9500 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9501 }
9502 }
9503
9504 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, multipixel) {
9505 TEST_REQUIRES_PSIMD;
9506 for (size_t channels = 1; channels <= 20; channels += 3) {
9507 DWConvMicrokernelTester()
9508 .cr(4)
9509 .kr(25)
9510 .channels(channels)
9511 .width(3)
9512 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9513 }
9514 }
9515
9516 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, multipixel_with_step) {
9517 TEST_REQUIRES_PSIMD;
9518 for (size_t channels = 1; channels <= 20; channels += 3) {
9519 for (size_t step = 2; step <= 25; step++) {
9520 DWConvMicrokernelTester()
9521 .cr(4)
9522 .kr(25)
9523 .channels(channels)
9524 .width(3)
9525 .step(step)
9526 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9527 }
9528 }
9529 }
9530
9531 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, multipixel_with_output_stride) {
9532 TEST_REQUIRES_PSIMD;
9533 for (size_t channels = 1; channels <= 20; channels += 3) {
9534 DWConvMicrokernelTester()
9535 .cr(4)
9536 .kr(25)
9537 .channels(4)
9538 .width(5)
9539 .output_stride(23)
9540 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9541 }
9542 }
9543
9544 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, multipixel_with_qmin) {
9545 TEST_REQUIRES_PSIMD;
9546 for (size_t channels = 1; channels <= 20; channels += 3) {
9547 DWConvMicrokernelTester()
9548 .cr(4)
9549 .kr(25)
9550 .channels(channels)
9551 .width(3)
9552 .qmin(128)
9553 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9554 }
9555 }
9556
9557 TEST(F32_DWCONV_UP4X25__PSIMD_ACC2, multipixel_with_qmax) {
9558 TEST_REQUIRES_PSIMD;
9559 for (size_t channels = 1; channels <= 20; channels += 3) {
9560 DWConvMicrokernelTester()
9561 .cr(4)
9562 .kr(25)
9563 .channels(channels)
9564 .width(3)
9565 .qmax(128)
9566 .Test(xnn_f32_dwconv_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9567 }
9568 }
9569#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
9570
9571
9572#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
9573 TEST(F32_DWCONV_UP8X25__PSIMD, c_eq_8) {
9574 TEST_REQUIRES_PSIMD;
9575 DWConvMicrokernelTester()
9576 .cr(8)
9577 .kr(25)
9578 .channels(8)
9579 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9580 }
9581
9582 TEST(F32_DWCONV_UP8X25__PSIMD, c_div_8) {
9583 TEST_REQUIRES_PSIMD;
9584 for (uint32_t channels = 16; channels < 128; channels += 24) {
9585 DWConvMicrokernelTester()
9586 .cr(8)
9587 .kr(25)
9588 .channels(channels)
9589 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9590 }
9591 }
9592
9593 TEST(F32_DWCONV_UP8X25__PSIMD, c_div_8_with_qmin) {
9594 TEST_REQUIRES_PSIMD;
9595 for (uint32_t channels = 16; channels < 128; channels += 24) {
9596 DWConvMicrokernelTester()
9597 .cr(8)
9598 .kr(25)
9599 .channels(channels)
9600 .qmin(128)
9601 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9602 }
9603 }
9604
9605 TEST(F32_DWCONV_UP8X25__PSIMD, c_div_8_with_qmax) {
9606 TEST_REQUIRES_PSIMD;
9607 for (uint32_t channels = 16; channels < 128; channels += 24) {
9608 DWConvMicrokernelTester()
9609 .cr(8)
9610 .kr(25)
9611 .channels(channels)
9612 .qmax(128)
9613 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9614 }
9615 }
9616
9617 TEST(F32_DWCONV_UP8X25__PSIMD, c_lt_8) {
9618 TEST_REQUIRES_PSIMD;
9619 for (uint32_t channels = 1; channels < 8; channels++) {
9620 DWConvMicrokernelTester()
9621 .cr(8)
9622 .kr(25)
9623 .channels(channels)
9624 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9625 }
9626 }
9627
9628 TEST(F32_DWCONV_UP8X25__PSIMD, c_gt_8) {
9629 TEST_REQUIRES_PSIMD;
9630 for (uint32_t channels = 9; channels < 16; channels++) {
9631 DWConvMicrokernelTester()
9632 .cr(8)
9633 .kr(25)
9634 .channels(channels)
9635 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9636 }
9637 }
9638
9639 TEST(F32_DWCONV_UP8X25__PSIMD, c_gt_8_with_qmin) {
9640 TEST_REQUIRES_PSIMD;
9641 for (uint32_t channels = 9; channels < 16; channels++) {
9642 DWConvMicrokernelTester()
9643 .cr(8)
9644 .kr(25)
9645 .channels(channels)
9646 .qmin(128)
9647 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9648 }
9649 }
9650
9651 TEST(F32_DWCONV_UP8X25__PSIMD, c_gt_8_with_qmax) {
9652 TEST_REQUIRES_PSIMD;
9653 for (uint32_t channels = 9; channels < 16; channels++) {
9654 DWConvMicrokernelTester()
9655 .cr(8)
9656 .kr(25)
9657 .channels(channels)
9658 .qmax(128)
9659 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9660 }
9661 }
9662
9663 TEST(F32_DWCONV_UP8X25__PSIMD, multipixel) {
9664 TEST_REQUIRES_PSIMD;
9665 for (size_t channels = 1; channels <= 40; channels += 7) {
9666 DWConvMicrokernelTester()
9667 .cr(8)
9668 .kr(25)
9669 .channels(channels)
9670 .width(3)
9671 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9672 }
9673 }
9674
9675 TEST(F32_DWCONV_UP8X25__PSIMD, multipixel_with_step) {
9676 TEST_REQUIRES_PSIMD;
9677 for (size_t channels = 1; channels <= 40; channels += 7) {
9678 for (size_t step = 2; step <= 25; step++) {
9679 DWConvMicrokernelTester()
9680 .cr(8)
9681 .kr(25)
9682 .channels(channels)
9683 .width(3)
9684 .step(step)
9685 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9686 }
9687 }
9688 }
9689
9690 TEST(F32_DWCONV_UP8X25__PSIMD, multipixel_with_output_stride) {
9691 TEST_REQUIRES_PSIMD;
9692 for (size_t channels = 1; channels <= 40; channels += 7) {
9693 DWConvMicrokernelTester()
9694 .cr(8)
9695 .kr(25)
9696 .channels(8)
9697 .width(5)
9698 .output_stride(43)
9699 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9700 }
9701 }
9702
9703 TEST(F32_DWCONV_UP8X25__PSIMD, multipixel_with_qmin) {
9704 TEST_REQUIRES_PSIMD;
9705 for (size_t channels = 1; channels <= 40; channels += 7) {
9706 DWConvMicrokernelTester()
9707 .cr(8)
9708 .kr(25)
9709 .channels(channels)
9710 .width(3)
9711 .qmin(128)
9712 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9713 }
9714 }
9715
9716 TEST(F32_DWCONV_UP8X25__PSIMD, multipixel_with_qmax) {
9717 TEST_REQUIRES_PSIMD;
9718 for (size_t channels = 1; channels <= 40; channels += 7) {
9719 DWConvMicrokernelTester()
9720 .cr(8)
9721 .kr(25)
9722 .channels(channels)
9723 .width(3)
9724 .qmax(128)
9725 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
9726 }
9727 }
9728#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
9729
9730
9731#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
9732 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, c_eq_8) {
9733 TEST_REQUIRES_PSIMD;
9734 DWConvMicrokernelTester()
9735 .cr(8)
9736 .kr(25)
9737 .channels(8)
9738 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9739 }
9740
9741 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, c_div_8) {
9742 TEST_REQUIRES_PSIMD;
9743 for (uint32_t channels = 16; channels < 128; channels += 24) {
9744 DWConvMicrokernelTester()
9745 .cr(8)
9746 .kr(25)
9747 .channels(channels)
9748 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9749 }
9750 }
9751
9752 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, c_div_8_with_qmin) {
9753 TEST_REQUIRES_PSIMD;
9754 for (uint32_t channels = 16; channels < 128; channels += 24) {
9755 DWConvMicrokernelTester()
9756 .cr(8)
9757 .kr(25)
9758 .channels(channels)
9759 .qmin(128)
9760 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9761 }
9762 }
9763
9764 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, c_div_8_with_qmax) {
9765 TEST_REQUIRES_PSIMD;
9766 for (uint32_t channels = 16; channels < 128; channels += 24) {
9767 DWConvMicrokernelTester()
9768 .cr(8)
9769 .kr(25)
9770 .channels(channels)
9771 .qmax(128)
9772 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9773 }
9774 }
9775
9776 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, c_lt_8) {
9777 TEST_REQUIRES_PSIMD;
9778 for (uint32_t channels = 1; channels < 8; channels++) {
9779 DWConvMicrokernelTester()
9780 .cr(8)
9781 .kr(25)
9782 .channels(channels)
9783 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9784 }
9785 }
9786
9787 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, c_gt_8) {
9788 TEST_REQUIRES_PSIMD;
9789 for (uint32_t channels = 9; channels < 16; channels++) {
9790 DWConvMicrokernelTester()
9791 .cr(8)
9792 .kr(25)
9793 .channels(channels)
9794 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9795 }
9796 }
9797
9798 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, c_gt_8_with_qmin) {
9799 TEST_REQUIRES_PSIMD;
9800 for (uint32_t channels = 9; channels < 16; channels++) {
9801 DWConvMicrokernelTester()
9802 .cr(8)
9803 .kr(25)
9804 .channels(channels)
9805 .qmin(128)
9806 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9807 }
9808 }
9809
9810 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, c_gt_8_with_qmax) {
9811 TEST_REQUIRES_PSIMD;
9812 for (uint32_t channels = 9; channels < 16; channels++) {
9813 DWConvMicrokernelTester()
9814 .cr(8)
9815 .kr(25)
9816 .channels(channels)
9817 .qmax(128)
9818 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9819 }
9820 }
9821
9822 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, multipixel) {
9823 TEST_REQUIRES_PSIMD;
9824 for (size_t channels = 1; channels <= 40; channels += 7) {
9825 DWConvMicrokernelTester()
9826 .cr(8)
9827 .kr(25)
9828 .channels(channels)
9829 .width(3)
9830 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9831 }
9832 }
9833
9834 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, multipixel_with_step) {
9835 TEST_REQUIRES_PSIMD;
9836 for (size_t channels = 1; channels <= 40; channels += 7) {
9837 for (size_t step = 2; step <= 25; step++) {
9838 DWConvMicrokernelTester()
9839 .cr(8)
9840 .kr(25)
9841 .channels(channels)
9842 .width(3)
9843 .step(step)
9844 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9845 }
9846 }
9847 }
9848
9849 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, multipixel_with_output_stride) {
9850 TEST_REQUIRES_PSIMD;
9851 for (size_t channels = 1; channels <= 40; channels += 7) {
9852 DWConvMicrokernelTester()
9853 .cr(8)
9854 .kr(25)
9855 .channels(8)
9856 .width(5)
9857 .output_stride(43)
9858 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9859 }
9860 }
9861
9862 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, multipixel_with_qmin) {
9863 TEST_REQUIRES_PSIMD;
9864 for (size_t channels = 1; channels <= 40; channels += 7) {
9865 DWConvMicrokernelTester()
9866 .cr(8)
9867 .kr(25)
9868 .channels(channels)
9869 .width(3)
9870 .qmin(128)
9871 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9872 }
9873 }
9874
9875 TEST(F32_DWCONV_UP8X25__PSIMD_ACC2, multipixel_with_qmax) {
9876 TEST_REQUIRES_PSIMD;
9877 for (size_t channels = 1; channels <= 40; channels += 7) {
9878 DWConvMicrokernelTester()
9879 .cr(8)
9880 .kr(25)
9881 .channels(channels)
9882 .width(3)
9883 .qmax(128)
9884 .Test(xnn_f32_dwconv_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
9885 }
9886 }
9887#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
9888
9889
9890#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
9891 TEST(F32_DWCONV_UP4X9__PSIMD, c_eq_4) {
9892 TEST_REQUIRES_PSIMD;
9893 DWConvMicrokernelTester()
9894 .cr(4)
9895 .kr(9)
9896 .channels(4)
9897 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
9898 }
9899
9900 TEST(F32_DWCONV_UP4X9__PSIMD, c_div_4) {
9901 TEST_REQUIRES_PSIMD;
9902 for (uint32_t channels = 8; channels < 64; channels += 12) {
9903 DWConvMicrokernelTester()
9904 .cr(4)
9905 .kr(9)
9906 .channels(channels)
9907 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
9908 }
9909 }
9910
9911 TEST(F32_DWCONV_UP4X9__PSIMD, c_div_4_with_qmin) {
9912 TEST_REQUIRES_PSIMD;
9913 for (uint32_t channels = 8; channels < 64; channels += 12) {
9914 DWConvMicrokernelTester()
9915 .cr(4)
9916 .kr(9)
9917 .channels(channels)
9918 .qmin(128)
9919 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
9920 }
9921 }
9922
9923 TEST(F32_DWCONV_UP4X9__PSIMD, c_div_4_with_qmax) {
9924 TEST_REQUIRES_PSIMD;
9925 for (uint32_t channels = 8; channels < 64; channels += 12) {
9926 DWConvMicrokernelTester()
9927 .cr(4)
9928 .kr(9)
9929 .channels(channels)
9930 .qmax(128)
9931 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
9932 }
9933 }
9934
9935 TEST(F32_DWCONV_UP4X9__PSIMD, c_lt_4) {
9936 TEST_REQUIRES_PSIMD;
9937 for (uint32_t channels = 1; channels < 4; channels++) {
9938 DWConvMicrokernelTester()
9939 .cr(4)
9940 .kr(9)
9941 .channels(channels)
9942 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
9943 }
9944 }
9945
9946 TEST(F32_DWCONV_UP4X9__PSIMD, c_gt_4) {
9947 TEST_REQUIRES_PSIMD;
9948 for (uint32_t channels = 5; channels < 8; channels++) {
9949 DWConvMicrokernelTester()
9950 .cr(4)
9951 .kr(9)
9952 .channels(channels)
9953 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
9954 }
9955 }
9956
9957 TEST(F32_DWCONV_UP4X9__PSIMD, c_gt_4_with_qmin) {
9958 TEST_REQUIRES_PSIMD;
9959 for (uint32_t channels = 5; channels < 8; channels++) {
9960 DWConvMicrokernelTester()
9961 .cr(4)
9962 .kr(9)
9963 .channels(channels)
9964 .qmin(128)
9965 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
9966 }
9967 }
9968
9969 TEST(F32_DWCONV_UP4X9__PSIMD, c_gt_4_with_qmax) {
9970 TEST_REQUIRES_PSIMD;
9971 for (uint32_t channels = 5; channels < 8; channels++) {
9972 DWConvMicrokernelTester()
9973 .cr(4)
9974 .kr(9)
9975 .channels(channels)
9976 .qmax(128)
9977 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
9978 }
9979 }
9980
9981 TEST(F32_DWCONV_UP4X9__PSIMD, multipixel) {
9982 TEST_REQUIRES_PSIMD;
9983 for (size_t channels = 1; channels <= 20; channels += 3) {
9984 DWConvMicrokernelTester()
9985 .cr(4)
9986 .kr(9)
9987 .channels(channels)
9988 .width(3)
9989 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
9990 }
9991 }
9992
9993 TEST(F32_DWCONV_UP4X9__PSIMD, multipixel_with_step) {
9994 TEST_REQUIRES_PSIMD;
9995 for (size_t channels = 1; channels <= 20; channels += 3) {
9996 for (size_t step = 2; step <= 9; step++) {
9997 DWConvMicrokernelTester()
9998 .cr(4)
9999 .kr(9)
10000 .channels(channels)
10001 .width(3)
10002 .step(step)
10003 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10004 }
10005 }
10006 }
10007
10008 TEST(F32_DWCONV_UP4X9__PSIMD, multipixel_with_output_stride) {
10009 TEST_REQUIRES_PSIMD;
10010 for (size_t channels = 1; channels <= 20; channels += 3) {
10011 DWConvMicrokernelTester()
10012 .cr(4)
10013 .kr(9)
10014 .channels(4)
10015 .width(5)
10016 .output_stride(23)
10017 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10018 }
10019 }
10020
10021 TEST(F32_DWCONV_UP4X9__PSIMD, multipixel_with_qmin) {
10022 TEST_REQUIRES_PSIMD;
10023 for (size_t channels = 1; channels <= 20; channels += 3) {
10024 DWConvMicrokernelTester()
10025 .cr(4)
10026 .kr(9)
10027 .channels(channels)
10028 .width(3)
10029 .qmin(128)
10030 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10031 }
10032 }
10033
10034 TEST(F32_DWCONV_UP4X9__PSIMD, multipixel_with_qmax) {
10035 TEST_REQUIRES_PSIMD;
10036 for (size_t channels = 1; channels <= 20; channels += 3) {
10037 DWConvMicrokernelTester()
10038 .cr(4)
10039 .kr(9)
10040 .channels(channels)
10041 .width(3)
10042 .qmax(128)
10043 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10044 }
10045 }
10046#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10047
10048
10049#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10050 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, c_eq_4) {
10051 TEST_REQUIRES_PSIMD;
10052 DWConvMicrokernelTester()
10053 .cr(4)
10054 .kr(9)
10055 .channels(4)
10056 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10057 }
10058
10059 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, c_div_4) {
10060 TEST_REQUIRES_PSIMD;
10061 for (uint32_t channels = 8; channels < 64; channels += 12) {
10062 DWConvMicrokernelTester()
10063 .cr(4)
10064 .kr(9)
10065 .channels(channels)
10066 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10067 }
10068 }
10069
10070 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, c_div_4_with_qmin) {
10071 TEST_REQUIRES_PSIMD;
10072 for (uint32_t channels = 8; channels < 64; channels += 12) {
10073 DWConvMicrokernelTester()
10074 .cr(4)
10075 .kr(9)
10076 .channels(channels)
10077 .qmin(128)
10078 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10079 }
10080 }
10081
10082 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, c_div_4_with_qmax) {
10083 TEST_REQUIRES_PSIMD;
10084 for (uint32_t channels = 8; channels < 64; channels += 12) {
10085 DWConvMicrokernelTester()
10086 .cr(4)
10087 .kr(9)
10088 .channels(channels)
10089 .qmax(128)
10090 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10091 }
10092 }
10093
10094 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, c_lt_4) {
10095 TEST_REQUIRES_PSIMD;
10096 for (uint32_t channels = 1; channels < 4; channels++) {
10097 DWConvMicrokernelTester()
10098 .cr(4)
10099 .kr(9)
10100 .channels(channels)
10101 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10102 }
10103 }
10104
10105 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, c_gt_4) {
10106 TEST_REQUIRES_PSIMD;
10107 for (uint32_t channels = 5; channels < 8; channels++) {
10108 DWConvMicrokernelTester()
10109 .cr(4)
10110 .kr(9)
10111 .channels(channels)
10112 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10113 }
10114 }
10115
10116 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, c_gt_4_with_qmin) {
10117 TEST_REQUIRES_PSIMD;
10118 for (uint32_t channels = 5; channels < 8; channels++) {
10119 DWConvMicrokernelTester()
10120 .cr(4)
10121 .kr(9)
10122 .channels(channels)
10123 .qmin(128)
10124 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10125 }
10126 }
10127
10128 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, c_gt_4_with_qmax) {
10129 TEST_REQUIRES_PSIMD;
10130 for (uint32_t channels = 5; channels < 8; channels++) {
10131 DWConvMicrokernelTester()
10132 .cr(4)
10133 .kr(9)
10134 .channels(channels)
10135 .qmax(128)
10136 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10137 }
10138 }
10139
10140 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, multipixel) {
10141 TEST_REQUIRES_PSIMD;
10142 for (size_t channels = 1; channels <= 20; channels += 3) {
10143 DWConvMicrokernelTester()
10144 .cr(4)
10145 .kr(9)
10146 .channels(channels)
10147 .width(3)
10148 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10149 }
10150 }
10151
10152 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, multipixel_with_step) {
10153 TEST_REQUIRES_PSIMD;
10154 for (size_t channels = 1; channels <= 20; channels += 3) {
10155 for (size_t step = 2; step <= 9; step++) {
10156 DWConvMicrokernelTester()
10157 .cr(4)
10158 .kr(9)
10159 .channels(channels)
10160 .width(3)
10161 .step(step)
10162 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10163 }
10164 }
10165 }
10166
10167 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, multipixel_with_output_stride) {
10168 TEST_REQUIRES_PSIMD;
10169 for (size_t channels = 1; channels <= 20; channels += 3) {
10170 DWConvMicrokernelTester()
10171 .cr(4)
10172 .kr(9)
10173 .channels(4)
10174 .width(5)
10175 .output_stride(23)
10176 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10177 }
10178 }
10179
10180 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, multipixel_with_qmin) {
10181 TEST_REQUIRES_PSIMD;
10182 for (size_t channels = 1; channels <= 20; channels += 3) {
10183 DWConvMicrokernelTester()
10184 .cr(4)
10185 .kr(9)
10186 .channels(channels)
10187 .width(3)
10188 .qmin(128)
10189 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10190 }
10191 }
10192
10193 TEST(F32_DWCONV_UP4X9__PSIMD_ACC2, multipixel_with_qmax) {
10194 TEST_REQUIRES_PSIMD;
10195 for (size_t channels = 1; channels <= 20; channels += 3) {
10196 DWConvMicrokernelTester()
10197 .cr(4)
10198 .kr(9)
10199 .channels(channels)
10200 .width(3)
10201 .qmax(128)
10202 .Test(xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10203 }
10204 }
10205#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10206
10207
10208#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10209 TEST(F32_DWCONV_UP8X9__PSIMD, c_eq_8) {
10210 TEST_REQUIRES_PSIMD;
10211 DWConvMicrokernelTester()
10212 .cr(8)
10213 .kr(9)
10214 .channels(8)
10215 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10216 }
10217
10218 TEST(F32_DWCONV_UP8X9__PSIMD, c_div_8) {
10219 TEST_REQUIRES_PSIMD;
10220 for (uint32_t channels = 16; channels < 128; channels += 24) {
10221 DWConvMicrokernelTester()
10222 .cr(8)
10223 .kr(9)
10224 .channels(channels)
10225 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10226 }
10227 }
10228
10229 TEST(F32_DWCONV_UP8X9__PSIMD, c_div_8_with_qmin) {
10230 TEST_REQUIRES_PSIMD;
10231 for (uint32_t channels = 16; channels < 128; channels += 24) {
10232 DWConvMicrokernelTester()
10233 .cr(8)
10234 .kr(9)
10235 .channels(channels)
10236 .qmin(128)
10237 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10238 }
10239 }
10240
10241 TEST(F32_DWCONV_UP8X9__PSIMD, c_div_8_with_qmax) {
10242 TEST_REQUIRES_PSIMD;
10243 for (uint32_t channels = 16; channels < 128; channels += 24) {
10244 DWConvMicrokernelTester()
10245 .cr(8)
10246 .kr(9)
10247 .channels(channels)
10248 .qmax(128)
10249 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10250 }
10251 }
10252
10253 TEST(F32_DWCONV_UP8X9__PSIMD, c_lt_8) {
10254 TEST_REQUIRES_PSIMD;
10255 for (uint32_t channels = 1; channels < 8; channels++) {
10256 DWConvMicrokernelTester()
10257 .cr(8)
10258 .kr(9)
10259 .channels(channels)
10260 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10261 }
10262 }
10263
10264 TEST(F32_DWCONV_UP8X9__PSIMD, c_gt_8) {
10265 TEST_REQUIRES_PSIMD;
10266 for (uint32_t channels = 9; channels < 16; channels++) {
10267 DWConvMicrokernelTester()
10268 .cr(8)
10269 .kr(9)
10270 .channels(channels)
10271 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10272 }
10273 }
10274
10275 TEST(F32_DWCONV_UP8X9__PSIMD, c_gt_8_with_qmin) {
10276 TEST_REQUIRES_PSIMD;
10277 for (uint32_t channels = 9; channels < 16; channels++) {
10278 DWConvMicrokernelTester()
10279 .cr(8)
10280 .kr(9)
10281 .channels(channels)
10282 .qmin(128)
10283 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10284 }
10285 }
10286
10287 TEST(F32_DWCONV_UP8X9__PSIMD, c_gt_8_with_qmax) {
10288 TEST_REQUIRES_PSIMD;
10289 for (uint32_t channels = 9; channels < 16; channels++) {
10290 DWConvMicrokernelTester()
10291 .cr(8)
10292 .kr(9)
10293 .channels(channels)
10294 .qmax(128)
10295 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10296 }
10297 }
10298
10299 TEST(F32_DWCONV_UP8X9__PSIMD, multipixel) {
10300 TEST_REQUIRES_PSIMD;
10301 for (size_t channels = 1; channels <= 40; channels += 7) {
10302 DWConvMicrokernelTester()
10303 .cr(8)
10304 .kr(9)
10305 .channels(channels)
10306 .width(3)
10307 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10308 }
10309 }
10310
10311 TEST(F32_DWCONV_UP8X9__PSIMD, multipixel_with_step) {
10312 TEST_REQUIRES_PSIMD;
10313 for (size_t channels = 1; channels <= 40; channels += 7) {
10314 for (size_t step = 2; step <= 9; step++) {
10315 DWConvMicrokernelTester()
10316 .cr(8)
10317 .kr(9)
10318 .channels(channels)
10319 .width(3)
10320 .step(step)
10321 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10322 }
10323 }
10324 }
10325
10326 TEST(F32_DWCONV_UP8X9__PSIMD, multipixel_with_output_stride) {
10327 TEST_REQUIRES_PSIMD;
10328 for (size_t channels = 1; channels <= 40; channels += 7) {
10329 DWConvMicrokernelTester()
10330 .cr(8)
10331 .kr(9)
10332 .channels(8)
10333 .width(5)
10334 .output_stride(43)
10335 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10336 }
10337 }
10338
10339 TEST(F32_DWCONV_UP8X9__PSIMD, multipixel_with_qmin) {
10340 TEST_REQUIRES_PSIMD;
10341 for (size_t channels = 1; channels <= 40; channels += 7) {
10342 DWConvMicrokernelTester()
10343 .cr(8)
10344 .kr(9)
10345 .channels(channels)
10346 .width(3)
10347 .qmin(128)
10348 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10349 }
10350 }
10351
10352 TEST(F32_DWCONV_UP8X9__PSIMD, multipixel_with_qmax) {
10353 TEST_REQUIRES_PSIMD;
10354 for (size_t channels = 1; channels <= 40; channels += 7) {
10355 DWConvMicrokernelTester()
10356 .cr(8)
10357 .kr(9)
10358 .channels(channels)
10359 .width(3)
10360 .qmax(128)
10361 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
10362 }
10363 }
10364#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10365
10366
10367#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10368 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, c_eq_8) {
10369 TEST_REQUIRES_PSIMD;
10370 DWConvMicrokernelTester()
10371 .cr(8)
10372 .kr(9)
10373 .channels(8)
10374 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10375 }
10376
10377 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, c_div_8) {
10378 TEST_REQUIRES_PSIMD;
10379 for (uint32_t channels = 16; channels < 128; channels += 24) {
10380 DWConvMicrokernelTester()
10381 .cr(8)
10382 .kr(9)
10383 .channels(channels)
10384 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10385 }
10386 }
10387
10388 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, c_div_8_with_qmin) {
10389 TEST_REQUIRES_PSIMD;
10390 for (uint32_t channels = 16; channels < 128; channels += 24) {
10391 DWConvMicrokernelTester()
10392 .cr(8)
10393 .kr(9)
10394 .channels(channels)
10395 .qmin(128)
10396 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10397 }
10398 }
10399
10400 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, c_div_8_with_qmax) {
10401 TEST_REQUIRES_PSIMD;
10402 for (uint32_t channels = 16; channels < 128; channels += 24) {
10403 DWConvMicrokernelTester()
10404 .cr(8)
10405 .kr(9)
10406 .channels(channels)
10407 .qmax(128)
10408 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10409 }
10410 }
10411
10412 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, c_lt_8) {
10413 TEST_REQUIRES_PSIMD;
10414 for (uint32_t channels = 1; channels < 8; channels++) {
10415 DWConvMicrokernelTester()
10416 .cr(8)
10417 .kr(9)
10418 .channels(channels)
10419 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10420 }
10421 }
10422
10423 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, c_gt_8) {
10424 TEST_REQUIRES_PSIMD;
10425 for (uint32_t channels = 9; channels < 16; channels++) {
10426 DWConvMicrokernelTester()
10427 .cr(8)
10428 .kr(9)
10429 .channels(channels)
10430 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10431 }
10432 }
10433
10434 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, c_gt_8_with_qmin) {
10435 TEST_REQUIRES_PSIMD;
10436 for (uint32_t channels = 9; channels < 16; channels++) {
10437 DWConvMicrokernelTester()
10438 .cr(8)
10439 .kr(9)
10440 .channels(channels)
10441 .qmin(128)
10442 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10443 }
10444 }
10445
10446 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, c_gt_8_with_qmax) {
10447 TEST_REQUIRES_PSIMD;
10448 for (uint32_t channels = 9; channels < 16; channels++) {
10449 DWConvMicrokernelTester()
10450 .cr(8)
10451 .kr(9)
10452 .channels(channels)
10453 .qmax(128)
10454 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10455 }
10456 }
10457
10458 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, multipixel) {
10459 TEST_REQUIRES_PSIMD;
10460 for (size_t channels = 1; channels <= 40; channels += 7) {
10461 DWConvMicrokernelTester()
10462 .cr(8)
10463 .kr(9)
10464 .channels(channels)
10465 .width(3)
10466 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10467 }
10468 }
10469
10470 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, multipixel_with_step) {
10471 TEST_REQUIRES_PSIMD;
10472 for (size_t channels = 1; channels <= 40; channels += 7) {
10473 for (size_t step = 2; step <= 9; step++) {
10474 DWConvMicrokernelTester()
10475 .cr(8)
10476 .kr(9)
10477 .channels(channels)
10478 .width(3)
10479 .step(step)
10480 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10481 }
10482 }
10483 }
10484
10485 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, multipixel_with_output_stride) {
10486 TEST_REQUIRES_PSIMD;
10487 for (size_t channels = 1; channels <= 40; channels += 7) {
10488 DWConvMicrokernelTester()
10489 .cr(8)
10490 .kr(9)
10491 .channels(8)
10492 .width(5)
10493 .output_stride(43)
10494 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10495 }
10496 }
10497
10498 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, multipixel_with_qmin) {
10499 TEST_REQUIRES_PSIMD;
10500 for (size_t channels = 1; channels <= 40; channels += 7) {
10501 DWConvMicrokernelTester()
10502 .cr(8)
10503 .kr(9)
10504 .channels(channels)
10505 .width(3)
10506 .qmin(128)
10507 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10508 }
10509 }
10510
10511 TEST(F32_DWCONV_UP8X9__PSIMD_ACC2, multipixel_with_qmax) {
10512 TEST_REQUIRES_PSIMD;
10513 for (size_t channels = 1; channels <= 40; channels += 7) {
10514 DWConvMicrokernelTester()
10515 .cr(8)
10516 .kr(9)
10517 .channels(channels)
10518 .width(3)
10519 .qmax(128)
10520 .Test(xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10521 }
10522 }
10523#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10524
10525
10526#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10527 TEST(F32_DWCONV_UP4X4__PSIMD, c_eq_4) {
10528 TEST_REQUIRES_PSIMD;
10529 DWConvMicrokernelTester()
10530 .cr(4)
10531 .kr(4)
10532 .channels(4)
10533 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10534 }
10535
10536 TEST(F32_DWCONV_UP4X4__PSIMD, c_div_4) {
10537 TEST_REQUIRES_PSIMD;
10538 for (uint32_t channels = 8; channels < 64; channels += 12) {
10539 DWConvMicrokernelTester()
10540 .cr(4)
10541 .kr(4)
10542 .channels(channels)
10543 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10544 }
10545 }
10546
10547 TEST(F32_DWCONV_UP4X4__PSIMD, c_div_4_with_qmin) {
10548 TEST_REQUIRES_PSIMD;
10549 for (uint32_t channels = 8; channels < 64; channels += 12) {
10550 DWConvMicrokernelTester()
10551 .cr(4)
10552 .kr(4)
10553 .channels(channels)
10554 .qmin(128)
10555 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10556 }
10557 }
10558
10559 TEST(F32_DWCONV_UP4X4__PSIMD, c_div_4_with_qmax) {
10560 TEST_REQUIRES_PSIMD;
10561 for (uint32_t channels = 8; channels < 64; channels += 12) {
10562 DWConvMicrokernelTester()
10563 .cr(4)
10564 .kr(4)
10565 .channels(channels)
10566 .qmax(128)
10567 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10568 }
10569 }
10570
10571 TEST(F32_DWCONV_UP4X4__PSIMD, c_lt_4) {
10572 TEST_REQUIRES_PSIMD;
10573 for (uint32_t channels = 1; channels < 4; channels++) {
10574 DWConvMicrokernelTester()
10575 .cr(4)
10576 .kr(4)
10577 .channels(channels)
10578 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10579 }
10580 }
10581
10582 TEST(F32_DWCONV_UP4X4__PSIMD, c_gt_4) {
10583 TEST_REQUIRES_PSIMD;
10584 for (uint32_t channels = 5; channels < 8; channels++) {
10585 DWConvMicrokernelTester()
10586 .cr(4)
10587 .kr(4)
10588 .channels(channels)
10589 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10590 }
10591 }
10592
10593 TEST(F32_DWCONV_UP4X4__PSIMD, c_gt_4_with_qmin) {
10594 TEST_REQUIRES_PSIMD;
10595 for (uint32_t channels = 5; channels < 8; channels++) {
10596 DWConvMicrokernelTester()
10597 .cr(4)
10598 .kr(4)
10599 .channels(channels)
10600 .qmin(128)
10601 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10602 }
10603 }
10604
10605 TEST(F32_DWCONV_UP4X4__PSIMD, c_gt_4_with_qmax) {
10606 TEST_REQUIRES_PSIMD;
10607 for (uint32_t channels = 5; channels < 8; channels++) {
10608 DWConvMicrokernelTester()
10609 .cr(4)
10610 .kr(4)
10611 .channels(channels)
10612 .qmax(128)
10613 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10614 }
10615 }
10616
10617 TEST(F32_DWCONV_UP4X4__PSIMD, multipixel) {
10618 TEST_REQUIRES_PSIMD;
10619 for (size_t channels = 1; channels <= 20; channels += 3) {
10620 DWConvMicrokernelTester()
10621 .cr(4)
10622 .kr(4)
10623 .channels(channels)
10624 .width(3)
10625 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10626 }
10627 }
10628
10629 TEST(F32_DWCONV_UP4X4__PSIMD, multipixel_with_step) {
10630 TEST_REQUIRES_PSIMD;
10631 for (size_t channels = 1; channels <= 20; channels += 3) {
10632 for (size_t step = 2; step <= 4; step++) {
10633 DWConvMicrokernelTester()
10634 .cr(4)
10635 .kr(4)
10636 .channels(channels)
10637 .width(3)
10638 .step(step)
10639 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10640 }
10641 }
10642 }
10643
10644 TEST(F32_DWCONV_UP4X4__PSIMD, multipixel_with_output_stride) {
10645 TEST_REQUIRES_PSIMD;
10646 for (size_t channels = 1; channels <= 20; channels += 3) {
10647 DWConvMicrokernelTester()
10648 .cr(4)
10649 .kr(4)
10650 .channels(4)
10651 .width(5)
10652 .output_stride(23)
10653 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10654 }
10655 }
10656
10657 TEST(F32_DWCONV_UP4X4__PSIMD, multipixel_with_qmin) {
10658 TEST_REQUIRES_PSIMD;
10659 for (size_t channels = 1; channels <= 20; channels += 3) {
10660 DWConvMicrokernelTester()
10661 .cr(4)
10662 .kr(4)
10663 .channels(channels)
10664 .width(3)
10665 .qmin(128)
10666 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10667 }
10668 }
10669
10670 TEST(F32_DWCONV_UP4X4__PSIMD, multipixel_with_qmax) {
10671 TEST_REQUIRES_PSIMD;
10672 for (size_t channels = 1; channels <= 20; channels += 3) {
10673 DWConvMicrokernelTester()
10674 .cr(4)
10675 .kr(4)
10676 .channels(channels)
10677 .width(3)
10678 .qmax(128)
10679 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10680 }
10681 }
10682#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10683
10684
10685#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10686 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, c_eq_4) {
10687 TEST_REQUIRES_PSIMD;
10688 DWConvMicrokernelTester()
10689 .cr(4)
10690 .kr(4)
10691 .channels(4)
10692 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10693 }
10694
10695 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, c_div_4) {
10696 TEST_REQUIRES_PSIMD;
10697 for (uint32_t channels = 8; channels < 64; channels += 12) {
10698 DWConvMicrokernelTester()
10699 .cr(4)
10700 .kr(4)
10701 .channels(channels)
10702 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10703 }
10704 }
10705
10706 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, c_div_4_with_qmin) {
10707 TEST_REQUIRES_PSIMD;
10708 for (uint32_t channels = 8; channels < 64; channels += 12) {
10709 DWConvMicrokernelTester()
10710 .cr(4)
10711 .kr(4)
10712 .channels(channels)
10713 .qmin(128)
10714 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10715 }
10716 }
10717
10718 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, c_div_4_with_qmax) {
10719 TEST_REQUIRES_PSIMD;
10720 for (uint32_t channels = 8; channels < 64; channels += 12) {
10721 DWConvMicrokernelTester()
10722 .cr(4)
10723 .kr(4)
10724 .channels(channels)
10725 .qmax(128)
10726 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10727 }
10728 }
10729
10730 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, c_lt_4) {
10731 TEST_REQUIRES_PSIMD;
10732 for (uint32_t channels = 1; channels < 4; channels++) {
10733 DWConvMicrokernelTester()
10734 .cr(4)
10735 .kr(4)
10736 .channels(channels)
10737 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10738 }
10739 }
10740
10741 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, c_gt_4) {
10742 TEST_REQUIRES_PSIMD;
10743 for (uint32_t channels = 5; channels < 8; channels++) {
10744 DWConvMicrokernelTester()
10745 .cr(4)
10746 .kr(4)
10747 .channels(channels)
10748 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10749 }
10750 }
10751
10752 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, c_gt_4_with_qmin) {
10753 TEST_REQUIRES_PSIMD;
10754 for (uint32_t channels = 5; channels < 8; channels++) {
10755 DWConvMicrokernelTester()
10756 .cr(4)
10757 .kr(4)
10758 .channels(channels)
10759 .qmin(128)
10760 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10761 }
10762 }
10763
10764 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, c_gt_4_with_qmax) {
10765 TEST_REQUIRES_PSIMD;
10766 for (uint32_t channels = 5; channels < 8; channels++) {
10767 DWConvMicrokernelTester()
10768 .cr(4)
10769 .kr(4)
10770 .channels(channels)
10771 .qmax(128)
10772 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10773 }
10774 }
10775
10776 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, multipixel) {
10777 TEST_REQUIRES_PSIMD;
10778 for (size_t channels = 1; channels <= 20; channels += 3) {
10779 DWConvMicrokernelTester()
10780 .cr(4)
10781 .kr(4)
10782 .channels(channels)
10783 .width(3)
10784 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10785 }
10786 }
10787
10788 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, multipixel_with_step) {
10789 TEST_REQUIRES_PSIMD;
10790 for (size_t channels = 1; channels <= 20; channels += 3) {
10791 for (size_t step = 2; step <= 4; step++) {
10792 DWConvMicrokernelTester()
10793 .cr(4)
10794 .kr(4)
10795 .channels(channels)
10796 .width(3)
10797 .step(step)
10798 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10799 }
10800 }
10801 }
10802
10803 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, multipixel_with_output_stride) {
10804 TEST_REQUIRES_PSIMD;
10805 for (size_t channels = 1; channels <= 20; channels += 3) {
10806 DWConvMicrokernelTester()
10807 .cr(4)
10808 .kr(4)
10809 .channels(4)
10810 .width(5)
10811 .output_stride(23)
10812 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10813 }
10814 }
10815
10816 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, multipixel_with_qmin) {
10817 TEST_REQUIRES_PSIMD;
10818 for (size_t channels = 1; channels <= 20; channels += 3) {
10819 DWConvMicrokernelTester()
10820 .cr(4)
10821 .kr(4)
10822 .channels(channels)
10823 .width(3)
10824 .qmin(128)
10825 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10826 }
10827 }
10828
10829 TEST(F32_DWCONV_UP4X4__PSIMD_ACC2, multipixel_with_qmax) {
10830 TEST_REQUIRES_PSIMD;
10831 for (size_t channels = 1; channels <= 20; channels += 3) {
10832 DWConvMicrokernelTester()
10833 .cr(4)
10834 .kr(4)
10835 .channels(channels)
10836 .width(3)
10837 .qmax(128)
10838 .Test(xnn_f32_dwconv_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
10839 }
10840 }
10841#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10842
10843
10844#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
10845 TEST(F32_DWCONV_UP8X4__PSIMD, c_eq_8) {
10846 TEST_REQUIRES_PSIMD;
10847 DWConvMicrokernelTester()
10848 .cr(8)
10849 .kr(4)
10850 .channels(8)
10851 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10852 }
10853
10854 TEST(F32_DWCONV_UP8X4__PSIMD, c_div_8) {
10855 TEST_REQUIRES_PSIMD;
10856 for (uint32_t channels = 16; channels < 128; channels += 24) {
10857 DWConvMicrokernelTester()
10858 .cr(8)
10859 .kr(4)
10860 .channels(channels)
10861 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10862 }
10863 }
10864
10865 TEST(F32_DWCONV_UP8X4__PSIMD, c_div_8_with_qmin) {
10866 TEST_REQUIRES_PSIMD;
10867 for (uint32_t channels = 16; channels < 128; channels += 24) {
10868 DWConvMicrokernelTester()
10869 .cr(8)
10870 .kr(4)
10871 .channels(channels)
10872 .qmin(128)
10873 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10874 }
10875 }
10876
10877 TEST(F32_DWCONV_UP8X4__PSIMD, c_div_8_with_qmax) {
10878 TEST_REQUIRES_PSIMD;
10879 for (uint32_t channels = 16; channels < 128; channels += 24) {
10880 DWConvMicrokernelTester()
10881 .cr(8)
10882 .kr(4)
10883 .channels(channels)
10884 .qmax(128)
10885 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10886 }
10887 }
10888
10889 TEST(F32_DWCONV_UP8X4__PSIMD, c_lt_8) {
10890 TEST_REQUIRES_PSIMD;
10891 for (uint32_t channels = 1; channels < 8; channels++) {
10892 DWConvMicrokernelTester()
10893 .cr(8)
10894 .kr(4)
10895 .channels(channels)
10896 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10897 }
10898 }
10899
10900 TEST(F32_DWCONV_UP8X4__PSIMD, c_gt_8) {
10901 TEST_REQUIRES_PSIMD;
10902 for (uint32_t channels = 9; channels < 16; channels++) {
10903 DWConvMicrokernelTester()
10904 .cr(8)
10905 .kr(4)
10906 .channels(channels)
10907 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10908 }
10909 }
10910
10911 TEST(F32_DWCONV_UP8X4__PSIMD, c_gt_8_with_qmin) {
10912 TEST_REQUIRES_PSIMD;
10913 for (uint32_t channels = 9; channels < 16; channels++) {
10914 DWConvMicrokernelTester()
10915 .cr(8)
10916 .kr(4)
10917 .channels(channels)
10918 .qmin(128)
10919 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10920 }
10921 }
10922
10923 TEST(F32_DWCONV_UP8X4__PSIMD, c_gt_8_with_qmax) {
10924 TEST_REQUIRES_PSIMD;
10925 for (uint32_t channels = 9; channels < 16; channels++) {
10926 DWConvMicrokernelTester()
10927 .cr(8)
10928 .kr(4)
10929 .channels(channels)
10930 .qmax(128)
10931 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10932 }
10933 }
10934
10935 TEST(F32_DWCONV_UP8X4__PSIMD, multipixel) {
10936 TEST_REQUIRES_PSIMD;
10937 for (size_t channels = 1; channels <= 40; channels += 7) {
10938 DWConvMicrokernelTester()
10939 .cr(8)
10940 .kr(4)
10941 .channels(channels)
10942 .width(3)
10943 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10944 }
10945 }
10946
10947 TEST(F32_DWCONV_UP8X4__PSIMD, multipixel_with_step) {
10948 TEST_REQUIRES_PSIMD;
10949 for (size_t channels = 1; channels <= 40; channels += 7) {
10950 for (size_t step = 2; step <= 4; step++) {
10951 DWConvMicrokernelTester()
10952 .cr(8)
10953 .kr(4)
10954 .channels(channels)
10955 .width(3)
10956 .step(step)
10957 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10958 }
10959 }
10960 }
10961
10962 TEST(F32_DWCONV_UP8X4__PSIMD, multipixel_with_output_stride) {
10963 TEST_REQUIRES_PSIMD;
10964 for (size_t channels = 1; channels <= 40; channels += 7) {
10965 DWConvMicrokernelTester()
10966 .cr(8)
10967 .kr(4)
10968 .channels(8)
10969 .width(5)
10970 .output_stride(43)
10971 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10972 }
10973 }
10974
10975 TEST(F32_DWCONV_UP8X4__PSIMD, multipixel_with_qmin) {
10976 TEST_REQUIRES_PSIMD;
10977 for (size_t channels = 1; channels <= 40; channels += 7) {
10978 DWConvMicrokernelTester()
10979 .cr(8)
10980 .kr(4)
10981 .channels(channels)
10982 .width(3)
10983 .qmin(128)
10984 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10985 }
10986 }
10987
10988 TEST(F32_DWCONV_UP8X4__PSIMD, multipixel_with_qmax) {
10989 TEST_REQUIRES_PSIMD;
10990 for (size_t channels = 1; channels <= 40; channels += 7) {
10991 DWConvMicrokernelTester()
10992 .cr(8)
10993 .kr(4)
10994 .channels(channels)
10995 .width(3)
10996 .qmax(128)
10997 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
10998 }
10999 }
11000#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
11001
11002
11003#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
11004 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, c_eq_8) {
11005 TEST_REQUIRES_PSIMD;
11006 DWConvMicrokernelTester()
11007 .cr(8)
11008 .kr(4)
11009 .channels(8)
11010 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11011 }
11012
11013 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, c_div_8) {
11014 TEST_REQUIRES_PSIMD;
11015 for (uint32_t channels = 16; channels < 128; channels += 24) {
11016 DWConvMicrokernelTester()
11017 .cr(8)
11018 .kr(4)
11019 .channels(channels)
11020 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11021 }
11022 }
11023
11024 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, c_div_8_with_qmin) {
11025 TEST_REQUIRES_PSIMD;
11026 for (uint32_t channels = 16; channels < 128; channels += 24) {
11027 DWConvMicrokernelTester()
11028 .cr(8)
11029 .kr(4)
11030 .channels(channels)
11031 .qmin(128)
11032 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11033 }
11034 }
11035
11036 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, c_div_8_with_qmax) {
11037 TEST_REQUIRES_PSIMD;
11038 for (uint32_t channels = 16; channels < 128; channels += 24) {
11039 DWConvMicrokernelTester()
11040 .cr(8)
11041 .kr(4)
11042 .channels(channels)
11043 .qmax(128)
11044 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11045 }
11046 }
11047
11048 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, c_lt_8) {
11049 TEST_REQUIRES_PSIMD;
11050 for (uint32_t channels = 1; channels < 8; channels++) {
11051 DWConvMicrokernelTester()
11052 .cr(8)
11053 .kr(4)
11054 .channels(channels)
11055 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11056 }
11057 }
11058
11059 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, c_gt_8) {
11060 TEST_REQUIRES_PSIMD;
11061 for (uint32_t channels = 9; channels < 16; channels++) {
11062 DWConvMicrokernelTester()
11063 .cr(8)
11064 .kr(4)
11065 .channels(channels)
11066 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11067 }
11068 }
11069
11070 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, c_gt_8_with_qmin) {
11071 TEST_REQUIRES_PSIMD;
11072 for (uint32_t channels = 9; channels < 16; channels++) {
11073 DWConvMicrokernelTester()
11074 .cr(8)
11075 .kr(4)
11076 .channels(channels)
11077 .qmin(128)
11078 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11079 }
11080 }
11081
11082 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, c_gt_8_with_qmax) {
11083 TEST_REQUIRES_PSIMD;
11084 for (uint32_t channels = 9; channels < 16; channels++) {
11085 DWConvMicrokernelTester()
11086 .cr(8)
11087 .kr(4)
11088 .channels(channels)
11089 .qmax(128)
11090 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11091 }
11092 }
11093
11094 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, multipixel) {
11095 TEST_REQUIRES_PSIMD;
11096 for (size_t channels = 1; channels <= 40; channels += 7) {
11097 DWConvMicrokernelTester()
11098 .cr(8)
11099 .kr(4)
11100 .channels(channels)
11101 .width(3)
11102 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11103 }
11104 }
11105
11106 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, multipixel_with_step) {
11107 TEST_REQUIRES_PSIMD;
11108 for (size_t channels = 1; channels <= 40; channels += 7) {
11109 for (size_t step = 2; step <= 4; step++) {
11110 DWConvMicrokernelTester()
11111 .cr(8)
11112 .kr(4)
11113 .channels(channels)
11114 .width(3)
11115 .step(step)
11116 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11117 }
11118 }
11119 }
11120
11121 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, multipixel_with_output_stride) {
11122 TEST_REQUIRES_PSIMD;
11123 for (size_t channels = 1; channels <= 40; channels += 7) {
11124 DWConvMicrokernelTester()
11125 .cr(8)
11126 .kr(4)
11127 .channels(8)
11128 .width(5)
11129 .output_stride(43)
11130 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11131 }
11132 }
11133
11134 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, multipixel_with_qmin) {
11135 TEST_REQUIRES_PSIMD;
11136 for (size_t channels = 1; channels <= 40; channels += 7) {
11137 DWConvMicrokernelTester()
11138 .cr(8)
11139 .kr(4)
11140 .channels(channels)
11141 .width(3)
11142 .qmin(128)
11143 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11144 }
11145 }
11146
11147 TEST(F32_DWCONV_UP8X4__PSIMD_ACC2, multipixel_with_qmax) {
11148 TEST_REQUIRES_PSIMD;
11149 for (size_t channels = 1; channels <= 40; channels += 7) {
11150 DWConvMicrokernelTester()
11151 .cr(8)
11152 .kr(4)
11153 .channels(channels)
11154 .width(3)
11155 .qmax(128)
11156 .Test(xnn_f32_dwconv_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
11157 }
11158 }
11159#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
11160
11161
11162#if XNN_ARCH_WASM
11163 TEST(F32_DWCONV_UP1X4__WASM, c_eq_1) {
11164 DWConvMicrokernelTester()
11165 .cr(1)
11166 .kr(4)
11167 .channels(1)
11168 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11169 }
11170
11171 TEST(F32_DWCONV_UP1X4__WASM, c_gt_1) {
11172 for (uint32_t channels = 2; channels < 10; channels++) {
11173 DWConvMicrokernelTester()
11174 .cr(1)
11175 .kr(4)
11176 .channels(channels)
11177 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11178 }
11179 }
11180
11181 TEST(F32_DWCONV_UP1X4__WASM, c_gt_1_with_qmin) {
11182 for (uint32_t channels = 2; channels < 10; channels++) {
11183 DWConvMicrokernelTester()
11184 .cr(1)
11185 .kr(4)
11186 .channels(channels)
11187 .qmin(128)
11188 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11189 }
11190 }
11191
11192 TEST(F32_DWCONV_UP1X4__WASM, c_gt_1_with_qmax) {
11193 for (uint32_t channels = 2; channels < 10; channels++) {
11194 DWConvMicrokernelTester()
11195 .cr(1)
11196 .kr(4)
11197 .channels(channels)
11198 .qmax(128)
11199 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11200 }
11201 }
11202
11203 TEST(F32_DWCONV_UP1X4__WASM, multipixel) {
11204 for (size_t channels = 1; channels <= 5; channels += 1) {
11205 DWConvMicrokernelTester()
11206 .cr(1)
11207 .kr(4)
11208 .channels(channels)
11209 .width(3)
11210 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11211 }
11212 }
11213
11214 TEST(F32_DWCONV_UP1X4__WASM, multipixel_with_step) {
11215 for (size_t channels = 1; channels <= 5; channels += 1) {
11216 for (size_t step = 2; step <= 4; step++) {
11217 DWConvMicrokernelTester()
11218 .cr(1)
11219 .kr(4)
11220 .channels(channels)
11221 .width(3)
11222 .step(step)
11223 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11224 }
11225 }
11226 }
11227
11228 TEST(F32_DWCONV_UP1X4__WASM, multipixel_with_output_stride) {
11229 for (size_t channels = 1; channels <= 5; channels += 1) {
11230 DWConvMicrokernelTester()
11231 .cr(1)
11232 .kr(4)
11233 .channels(1)
11234 .width(5)
11235 .output_stride(7)
11236 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11237 }
11238 }
11239
11240 TEST(F32_DWCONV_UP1X4__WASM, multipixel_with_qmin) {
11241 for (size_t channels = 1; channels <= 5; channels += 1) {
11242 DWConvMicrokernelTester()
11243 .cr(1)
11244 .kr(4)
11245 .channels(channels)
11246 .width(3)
11247 .qmin(128)
11248 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11249 }
11250 }
11251
11252 TEST(F32_DWCONV_UP1X4__WASM, multipixel_with_qmax) {
11253 for (size_t channels = 1; channels <= 5; channels += 1) {
11254 DWConvMicrokernelTester()
11255 .cr(1)
11256 .kr(4)
11257 .channels(channels)
11258 .width(3)
11259 .qmax(128)
11260 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11261 }
11262 }
11263#endif // XNN_ARCH_WASM
11264
11265
11266#if XNN_ARCH_WASM
11267 TEST(F32_DWCONV_UP1X4__WASM_ACC2, c_eq_1) {
11268 DWConvMicrokernelTester()
11269 .cr(1)
11270 .kr(4)
11271 .channels(1)
11272 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11273 }
11274
11275 TEST(F32_DWCONV_UP1X4__WASM_ACC2, c_gt_1) {
11276 for (uint32_t channels = 2; channels < 10; channels++) {
11277 DWConvMicrokernelTester()
11278 .cr(1)
11279 .kr(4)
11280 .channels(channels)
11281 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11282 }
11283 }
11284
11285 TEST(F32_DWCONV_UP1X4__WASM_ACC2, c_gt_1_with_qmin) {
11286 for (uint32_t channels = 2; channels < 10; channels++) {
11287 DWConvMicrokernelTester()
11288 .cr(1)
11289 .kr(4)
11290 .channels(channels)
11291 .qmin(128)
11292 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11293 }
11294 }
11295
11296 TEST(F32_DWCONV_UP1X4__WASM_ACC2, c_gt_1_with_qmax) {
11297 for (uint32_t channels = 2; channels < 10; channels++) {
11298 DWConvMicrokernelTester()
11299 .cr(1)
11300 .kr(4)
11301 .channels(channels)
11302 .qmax(128)
11303 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11304 }
11305 }
11306
11307 TEST(F32_DWCONV_UP1X4__WASM_ACC2, multipixel) {
11308 for (size_t channels = 1; channels <= 5; channels += 1) {
11309 DWConvMicrokernelTester()
11310 .cr(1)
11311 .kr(4)
11312 .channels(channels)
11313 .width(3)
11314 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11315 }
11316 }
11317
11318 TEST(F32_DWCONV_UP1X4__WASM_ACC2, multipixel_with_step) {
11319 for (size_t channels = 1; channels <= 5; channels += 1) {
11320 for (size_t step = 2; step <= 4; step++) {
11321 DWConvMicrokernelTester()
11322 .cr(1)
11323 .kr(4)
11324 .channels(channels)
11325 .width(3)
11326 .step(step)
11327 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11328 }
11329 }
11330 }
11331
11332 TEST(F32_DWCONV_UP1X4__WASM_ACC2, multipixel_with_output_stride) {
11333 for (size_t channels = 1; channels <= 5; channels += 1) {
11334 DWConvMicrokernelTester()
11335 .cr(1)
11336 .kr(4)
11337 .channels(1)
11338 .width(5)
11339 .output_stride(7)
11340 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11341 }
11342 }
11343
11344 TEST(F32_DWCONV_UP1X4__WASM_ACC2, multipixel_with_qmin) {
11345 for (size_t channels = 1; channels <= 5; channels += 1) {
11346 DWConvMicrokernelTester()
11347 .cr(1)
11348 .kr(4)
11349 .channels(channels)
11350 .width(3)
11351 .qmin(128)
11352 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11353 }
11354 }
11355
11356 TEST(F32_DWCONV_UP1X4__WASM_ACC2, multipixel_with_qmax) {
11357 for (size_t channels = 1; channels <= 5; channels += 1) {
11358 DWConvMicrokernelTester()
11359 .cr(1)
11360 .kr(4)
11361 .channels(channels)
11362 .width(3)
11363 .qmax(128)
11364 .Test(xnn_f32_dwconv_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11365 }
11366 }
11367#endif // XNN_ARCH_WASM
11368
11369
11370#if XNN_ARCH_WASM
11371 TEST(F32_DWCONV_UP2X4__WASM, c_eq_2) {
11372 DWConvMicrokernelTester()
11373 .cr(2)
11374 .kr(4)
11375 .channels(2)
11376 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11377 }
11378
11379 TEST(F32_DWCONV_UP2X4__WASM, c_div_2) {
11380 for (uint32_t channels = 4; channels < 32; channels += 6) {
11381 DWConvMicrokernelTester()
11382 .cr(2)
11383 .kr(4)
11384 .channels(channels)
11385 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11386 }
11387 }
11388
11389 TEST(F32_DWCONV_UP2X4__WASM, c_div_2_with_qmin) {
11390 for (uint32_t channels = 4; channels < 32; channels += 6) {
11391 DWConvMicrokernelTester()
11392 .cr(2)
11393 .kr(4)
11394 .channels(channels)
11395 .qmin(128)
11396 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11397 }
11398 }
11399
11400 TEST(F32_DWCONV_UP2X4__WASM, c_div_2_with_qmax) {
11401 for (uint32_t channels = 4; channels < 32; channels += 6) {
11402 DWConvMicrokernelTester()
11403 .cr(2)
11404 .kr(4)
11405 .channels(channels)
11406 .qmax(128)
11407 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11408 }
11409 }
11410
11411 TEST(F32_DWCONV_UP2X4__WASM, c_lt_2) {
11412 for (uint32_t channels = 1; channels < 2; channels++) {
11413 DWConvMicrokernelTester()
11414 .cr(2)
11415 .kr(4)
11416 .channels(channels)
11417 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11418 }
11419 }
11420
11421 TEST(F32_DWCONV_UP2X4__WASM, c_gt_2) {
11422 for (uint32_t channels = 3; channels < 4; channels++) {
11423 DWConvMicrokernelTester()
11424 .cr(2)
11425 .kr(4)
11426 .channels(channels)
11427 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11428 }
11429 }
11430
11431 TEST(F32_DWCONV_UP2X4__WASM, c_gt_2_with_qmin) {
11432 for (uint32_t channels = 3; channels < 4; channels++) {
11433 DWConvMicrokernelTester()
11434 .cr(2)
11435 .kr(4)
11436 .channels(channels)
11437 .qmin(128)
11438 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11439 }
11440 }
11441
11442 TEST(F32_DWCONV_UP2X4__WASM, c_gt_2_with_qmax) {
11443 for (uint32_t channels = 3; channels < 4; channels++) {
11444 DWConvMicrokernelTester()
11445 .cr(2)
11446 .kr(4)
11447 .channels(channels)
11448 .qmax(128)
11449 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11450 }
11451 }
11452
11453 TEST(F32_DWCONV_UP2X4__WASM, multipixel) {
11454 for (size_t channels = 1; channels <= 10; channels += 1) {
11455 DWConvMicrokernelTester()
11456 .cr(2)
11457 .kr(4)
11458 .channels(channels)
11459 .width(3)
11460 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11461 }
11462 }
11463
11464 TEST(F32_DWCONV_UP2X4__WASM, multipixel_with_step) {
11465 for (size_t channels = 1; channels <= 10; channels += 1) {
11466 for (size_t step = 2; step <= 4; step++) {
11467 DWConvMicrokernelTester()
11468 .cr(2)
11469 .kr(4)
11470 .channels(channels)
11471 .width(3)
11472 .step(step)
11473 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11474 }
11475 }
11476 }
11477
11478 TEST(F32_DWCONV_UP2X4__WASM, multipixel_with_output_stride) {
11479 for (size_t channels = 1; channels <= 10; channels += 1) {
11480 DWConvMicrokernelTester()
11481 .cr(2)
11482 .kr(4)
11483 .channels(2)
11484 .width(5)
11485 .output_stride(13)
11486 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11487 }
11488 }
11489
11490 TEST(F32_DWCONV_UP2X4__WASM, multipixel_with_qmin) {
11491 for (size_t channels = 1; channels <= 10; channels += 1) {
11492 DWConvMicrokernelTester()
11493 .cr(2)
11494 .kr(4)
11495 .channels(channels)
11496 .width(3)
11497 .qmin(128)
11498 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11499 }
11500 }
11501
11502 TEST(F32_DWCONV_UP2X4__WASM, multipixel_with_qmax) {
11503 for (size_t channels = 1; channels <= 10; channels += 1) {
11504 DWConvMicrokernelTester()
11505 .cr(2)
11506 .kr(4)
11507 .channels(channels)
11508 .width(3)
11509 .qmax(128)
11510 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
11511 }
11512 }
11513#endif // XNN_ARCH_WASM
11514
11515
11516#if XNN_ARCH_WASM
11517 TEST(F32_DWCONV_UP2X4__WASM_ACC2, c_eq_2) {
11518 DWConvMicrokernelTester()
11519 .cr(2)
11520 .kr(4)
11521 .channels(2)
11522 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11523 }
11524
11525 TEST(F32_DWCONV_UP2X4__WASM_ACC2, c_div_2) {
11526 for (uint32_t channels = 4; channels < 32; channels += 6) {
11527 DWConvMicrokernelTester()
11528 .cr(2)
11529 .kr(4)
11530 .channels(channels)
11531 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11532 }
11533 }
11534
11535 TEST(F32_DWCONV_UP2X4__WASM_ACC2, c_div_2_with_qmin) {
11536 for (uint32_t channels = 4; channels < 32; channels += 6) {
11537 DWConvMicrokernelTester()
11538 .cr(2)
11539 .kr(4)
11540 .channels(channels)
11541 .qmin(128)
11542 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11543 }
11544 }
11545
11546 TEST(F32_DWCONV_UP2X4__WASM_ACC2, c_div_2_with_qmax) {
11547 for (uint32_t channels = 4; channels < 32; channels += 6) {
11548 DWConvMicrokernelTester()
11549 .cr(2)
11550 .kr(4)
11551 .channels(channels)
11552 .qmax(128)
11553 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11554 }
11555 }
11556
11557 TEST(F32_DWCONV_UP2X4__WASM_ACC2, c_lt_2) {
11558 for (uint32_t channels = 1; channels < 2; channels++) {
11559 DWConvMicrokernelTester()
11560 .cr(2)
11561 .kr(4)
11562 .channels(channels)
11563 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11564 }
11565 }
11566
11567 TEST(F32_DWCONV_UP2X4__WASM_ACC2, c_gt_2) {
11568 for (uint32_t channels = 3; channels < 4; channels++) {
11569 DWConvMicrokernelTester()
11570 .cr(2)
11571 .kr(4)
11572 .channels(channels)
11573 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11574 }
11575 }
11576
11577 TEST(F32_DWCONV_UP2X4__WASM_ACC2, c_gt_2_with_qmin) {
11578 for (uint32_t channels = 3; channels < 4; channels++) {
11579 DWConvMicrokernelTester()
11580 .cr(2)
11581 .kr(4)
11582 .channels(channels)
11583 .qmin(128)
11584 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11585 }
11586 }
11587
11588 TEST(F32_DWCONV_UP2X4__WASM_ACC2, c_gt_2_with_qmax) {
11589 for (uint32_t channels = 3; channels < 4; channels++) {
11590 DWConvMicrokernelTester()
11591 .cr(2)
11592 .kr(4)
11593 .channels(channels)
11594 .qmax(128)
11595 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11596 }
11597 }
11598
11599 TEST(F32_DWCONV_UP2X4__WASM_ACC2, multipixel) {
11600 for (size_t channels = 1; channels <= 10; channels += 1) {
11601 DWConvMicrokernelTester()
11602 .cr(2)
11603 .kr(4)
11604 .channels(channels)
11605 .width(3)
11606 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11607 }
11608 }
11609
11610 TEST(F32_DWCONV_UP2X4__WASM_ACC2, multipixel_with_step) {
11611 for (size_t channels = 1; channels <= 10; channels += 1) {
11612 for (size_t step = 2; step <= 4; step++) {
11613 DWConvMicrokernelTester()
11614 .cr(2)
11615 .kr(4)
11616 .channels(channels)
11617 .width(3)
11618 .step(step)
11619 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11620 }
11621 }
11622 }
11623
11624 TEST(F32_DWCONV_UP2X4__WASM_ACC2, multipixel_with_output_stride) {
11625 for (size_t channels = 1; channels <= 10; channels += 1) {
11626 DWConvMicrokernelTester()
11627 .cr(2)
11628 .kr(4)
11629 .channels(2)
11630 .width(5)
11631 .output_stride(13)
11632 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11633 }
11634 }
11635
11636 TEST(F32_DWCONV_UP2X4__WASM_ACC2, multipixel_with_qmin) {
11637 for (size_t channels = 1; channels <= 10; channels += 1) {
11638 DWConvMicrokernelTester()
11639 .cr(2)
11640 .kr(4)
11641 .channels(channels)
11642 .width(3)
11643 .qmin(128)
11644 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11645 }
11646 }
11647
11648 TEST(F32_DWCONV_UP2X4__WASM_ACC2, multipixel_with_qmax) {
11649 for (size_t channels = 1; channels <= 10; channels += 1) {
11650 DWConvMicrokernelTester()
11651 .cr(2)
11652 .kr(4)
11653 .channels(channels)
11654 .width(3)
11655 .qmax(128)
11656 .Test(xnn_f32_dwconv_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11657 }
11658 }
11659#endif // XNN_ARCH_WASM
11660
11661
11662#if XNN_ARCH_WASM
11663 TEST(F32_DWCONV_UP1X9__WASM, c_eq_1) {
11664 DWConvMicrokernelTester()
11665 .cr(1)
11666 .kr(9)
11667 .channels(1)
11668 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11669 }
11670
11671 TEST(F32_DWCONV_UP1X9__WASM, c_gt_1) {
11672 for (uint32_t channels = 2; channels < 10; channels++) {
11673 DWConvMicrokernelTester()
11674 .cr(1)
11675 .kr(9)
11676 .channels(channels)
11677 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11678 }
11679 }
11680
11681 TEST(F32_DWCONV_UP1X9__WASM, c_gt_1_with_qmin) {
11682 for (uint32_t channels = 2; channels < 10; channels++) {
11683 DWConvMicrokernelTester()
11684 .cr(1)
11685 .kr(9)
11686 .channels(channels)
11687 .qmin(128)
11688 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11689 }
11690 }
11691
11692 TEST(F32_DWCONV_UP1X9__WASM, c_gt_1_with_qmax) {
11693 for (uint32_t channels = 2; channels < 10; channels++) {
11694 DWConvMicrokernelTester()
11695 .cr(1)
11696 .kr(9)
11697 .channels(channels)
11698 .qmax(128)
11699 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11700 }
11701 }
11702
11703 TEST(F32_DWCONV_UP1X9__WASM, multipixel) {
11704 for (size_t channels = 1; channels <= 5; channels += 1) {
11705 DWConvMicrokernelTester()
11706 .cr(1)
11707 .kr(9)
11708 .channels(channels)
11709 .width(3)
11710 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11711 }
11712 }
11713
11714 TEST(F32_DWCONV_UP1X9__WASM, multipixel_with_step) {
11715 for (size_t channels = 1; channels <= 5; channels += 1) {
11716 for (size_t step = 2; step <= 9; step++) {
11717 DWConvMicrokernelTester()
11718 .cr(1)
11719 .kr(9)
11720 .channels(channels)
11721 .width(3)
11722 .step(step)
11723 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11724 }
11725 }
11726 }
11727
11728 TEST(F32_DWCONV_UP1X9__WASM, multipixel_with_output_stride) {
11729 for (size_t channels = 1; channels <= 5; channels += 1) {
11730 DWConvMicrokernelTester()
11731 .cr(1)
11732 .kr(9)
11733 .channels(1)
11734 .width(5)
11735 .output_stride(7)
11736 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11737 }
11738 }
11739
11740 TEST(F32_DWCONV_UP1X9__WASM, multipixel_with_qmin) {
11741 for (size_t channels = 1; channels <= 5; channels += 1) {
11742 DWConvMicrokernelTester()
11743 .cr(1)
11744 .kr(9)
11745 .channels(channels)
11746 .width(3)
11747 .qmin(128)
11748 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11749 }
11750 }
11751
11752 TEST(F32_DWCONV_UP1X9__WASM, multipixel_with_qmax) {
11753 for (size_t channels = 1; channels <= 5; channels += 1) {
11754 DWConvMicrokernelTester()
11755 .cr(1)
11756 .kr(9)
11757 .channels(channels)
11758 .width(3)
11759 .qmax(128)
11760 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11761 }
11762 }
11763#endif // XNN_ARCH_WASM
11764
11765
11766#if XNN_ARCH_WASM
11767 TEST(F32_DWCONV_UP1X9__WASM_ACC2, c_eq_1) {
11768 DWConvMicrokernelTester()
11769 .cr(1)
11770 .kr(9)
11771 .channels(1)
11772 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11773 }
11774
11775 TEST(F32_DWCONV_UP1X9__WASM_ACC2, c_gt_1) {
11776 for (uint32_t channels = 2; channels < 10; channels++) {
11777 DWConvMicrokernelTester()
11778 .cr(1)
11779 .kr(9)
11780 .channels(channels)
11781 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11782 }
11783 }
11784
11785 TEST(F32_DWCONV_UP1X9__WASM_ACC2, c_gt_1_with_qmin) {
11786 for (uint32_t channels = 2; channels < 10; channels++) {
11787 DWConvMicrokernelTester()
11788 .cr(1)
11789 .kr(9)
11790 .channels(channels)
11791 .qmin(128)
11792 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11793 }
11794 }
11795
11796 TEST(F32_DWCONV_UP1X9__WASM_ACC2, c_gt_1_with_qmax) {
11797 for (uint32_t channels = 2; channels < 10; channels++) {
11798 DWConvMicrokernelTester()
11799 .cr(1)
11800 .kr(9)
11801 .channels(channels)
11802 .qmax(128)
11803 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11804 }
11805 }
11806
11807 TEST(F32_DWCONV_UP1X9__WASM_ACC2, multipixel) {
11808 for (size_t channels = 1; channels <= 5; channels += 1) {
11809 DWConvMicrokernelTester()
11810 .cr(1)
11811 .kr(9)
11812 .channels(channels)
11813 .width(3)
11814 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11815 }
11816 }
11817
11818 TEST(F32_DWCONV_UP1X9__WASM_ACC2, multipixel_with_step) {
11819 for (size_t channels = 1; channels <= 5; channels += 1) {
11820 for (size_t step = 2; step <= 9; step++) {
11821 DWConvMicrokernelTester()
11822 .cr(1)
11823 .kr(9)
11824 .channels(channels)
11825 .width(3)
11826 .step(step)
11827 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11828 }
11829 }
11830 }
11831
11832 TEST(F32_DWCONV_UP1X9__WASM_ACC2, multipixel_with_output_stride) {
11833 for (size_t channels = 1; channels <= 5; channels += 1) {
11834 DWConvMicrokernelTester()
11835 .cr(1)
11836 .kr(9)
11837 .channels(1)
11838 .width(5)
11839 .output_stride(7)
11840 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11841 }
11842 }
11843
11844 TEST(F32_DWCONV_UP1X9__WASM_ACC2, multipixel_with_qmin) {
11845 for (size_t channels = 1; channels <= 5; channels += 1) {
11846 DWConvMicrokernelTester()
11847 .cr(1)
11848 .kr(9)
11849 .channels(channels)
11850 .width(3)
11851 .qmin(128)
11852 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11853 }
11854 }
11855
11856 TEST(F32_DWCONV_UP1X9__WASM_ACC2, multipixel_with_qmax) {
11857 for (size_t channels = 1; channels <= 5; channels += 1) {
11858 DWConvMicrokernelTester()
11859 .cr(1)
11860 .kr(9)
11861 .channels(channels)
11862 .width(3)
11863 .qmax(128)
11864 .Test(xnn_f32_dwconv_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
11865 }
11866 }
11867#endif // XNN_ARCH_WASM
11868
11869
11870#if XNN_ARCH_WASM
11871 TEST(F32_DWCONV_UP2X9__WASM, c_eq_2) {
11872 DWConvMicrokernelTester()
11873 .cr(2)
11874 .kr(9)
11875 .channels(2)
11876 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11877 }
11878
11879 TEST(F32_DWCONV_UP2X9__WASM, c_div_2) {
11880 for (uint32_t channels = 4; channels < 32; channels += 6) {
11881 DWConvMicrokernelTester()
11882 .cr(2)
11883 .kr(9)
11884 .channels(channels)
11885 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11886 }
11887 }
11888
11889 TEST(F32_DWCONV_UP2X9__WASM, c_div_2_with_qmin) {
11890 for (uint32_t channels = 4; channels < 32; channels += 6) {
11891 DWConvMicrokernelTester()
11892 .cr(2)
11893 .kr(9)
11894 .channels(channels)
11895 .qmin(128)
11896 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11897 }
11898 }
11899
11900 TEST(F32_DWCONV_UP2X9__WASM, c_div_2_with_qmax) {
11901 for (uint32_t channels = 4; channels < 32; channels += 6) {
11902 DWConvMicrokernelTester()
11903 .cr(2)
11904 .kr(9)
11905 .channels(channels)
11906 .qmax(128)
11907 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11908 }
11909 }
11910
11911 TEST(F32_DWCONV_UP2X9__WASM, c_lt_2) {
11912 for (uint32_t channels = 1; channels < 2; channels++) {
11913 DWConvMicrokernelTester()
11914 .cr(2)
11915 .kr(9)
11916 .channels(channels)
11917 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11918 }
11919 }
11920
11921 TEST(F32_DWCONV_UP2X9__WASM, c_gt_2) {
11922 for (uint32_t channels = 3; channels < 4; channels++) {
11923 DWConvMicrokernelTester()
11924 .cr(2)
11925 .kr(9)
11926 .channels(channels)
11927 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11928 }
11929 }
11930
11931 TEST(F32_DWCONV_UP2X9__WASM, c_gt_2_with_qmin) {
11932 for (uint32_t channels = 3; channels < 4; channels++) {
11933 DWConvMicrokernelTester()
11934 .cr(2)
11935 .kr(9)
11936 .channels(channels)
11937 .qmin(128)
11938 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11939 }
11940 }
11941
11942 TEST(F32_DWCONV_UP2X9__WASM, c_gt_2_with_qmax) {
11943 for (uint32_t channels = 3; channels < 4; channels++) {
11944 DWConvMicrokernelTester()
11945 .cr(2)
11946 .kr(9)
11947 .channels(channels)
11948 .qmax(128)
11949 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11950 }
11951 }
11952
11953 TEST(F32_DWCONV_UP2X9__WASM, multipixel) {
11954 for (size_t channels = 1; channels <= 10; channels += 1) {
11955 DWConvMicrokernelTester()
11956 .cr(2)
11957 .kr(9)
11958 .channels(channels)
11959 .width(3)
11960 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11961 }
11962 }
11963
11964 TEST(F32_DWCONV_UP2X9__WASM, multipixel_with_step) {
11965 for (size_t channels = 1; channels <= 10; channels += 1) {
11966 for (size_t step = 2; step <= 9; step++) {
11967 DWConvMicrokernelTester()
11968 .cr(2)
11969 .kr(9)
11970 .channels(channels)
11971 .width(3)
11972 .step(step)
11973 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11974 }
11975 }
11976 }
11977
11978 TEST(F32_DWCONV_UP2X9__WASM, multipixel_with_output_stride) {
11979 for (size_t channels = 1; channels <= 10; channels += 1) {
11980 DWConvMicrokernelTester()
11981 .cr(2)
11982 .kr(9)
11983 .channels(2)
11984 .width(5)
11985 .output_stride(13)
11986 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11987 }
11988 }
11989
11990 TEST(F32_DWCONV_UP2X9__WASM, multipixel_with_qmin) {
11991 for (size_t channels = 1; channels <= 10; channels += 1) {
11992 DWConvMicrokernelTester()
11993 .cr(2)
11994 .kr(9)
11995 .channels(channels)
11996 .width(3)
11997 .qmin(128)
11998 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
11999 }
12000 }
12001
12002 TEST(F32_DWCONV_UP2X9__WASM, multipixel_with_qmax) {
12003 for (size_t channels = 1; channels <= 10; channels += 1) {
12004 DWConvMicrokernelTester()
12005 .cr(2)
12006 .kr(9)
12007 .channels(channels)
12008 .width(3)
12009 .qmax(128)
12010 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
12011 }
12012 }
12013#endif // XNN_ARCH_WASM
12014
12015
12016#if XNN_ARCH_WASM
12017 TEST(F32_DWCONV_UP2X9__WASM_ACC2, c_eq_2) {
12018 DWConvMicrokernelTester()
12019 .cr(2)
12020 .kr(9)
12021 .channels(2)
12022 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12023 }
12024
12025 TEST(F32_DWCONV_UP2X9__WASM_ACC2, c_div_2) {
12026 for (uint32_t channels = 4; channels < 32; channels += 6) {
12027 DWConvMicrokernelTester()
12028 .cr(2)
12029 .kr(9)
12030 .channels(channels)
12031 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12032 }
12033 }
12034
12035 TEST(F32_DWCONV_UP2X9__WASM_ACC2, c_div_2_with_qmin) {
12036 for (uint32_t channels = 4; channels < 32; channels += 6) {
12037 DWConvMicrokernelTester()
12038 .cr(2)
12039 .kr(9)
12040 .channels(channels)
12041 .qmin(128)
12042 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12043 }
12044 }
12045
12046 TEST(F32_DWCONV_UP2X9__WASM_ACC2, c_div_2_with_qmax) {
12047 for (uint32_t channels = 4; channels < 32; channels += 6) {
12048 DWConvMicrokernelTester()
12049 .cr(2)
12050 .kr(9)
12051 .channels(channels)
12052 .qmax(128)
12053 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12054 }
12055 }
12056
12057 TEST(F32_DWCONV_UP2X9__WASM_ACC2, c_lt_2) {
12058 for (uint32_t channels = 1; channels < 2; channels++) {
12059 DWConvMicrokernelTester()
12060 .cr(2)
12061 .kr(9)
12062 .channels(channels)
12063 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12064 }
12065 }
12066
12067 TEST(F32_DWCONV_UP2X9__WASM_ACC2, c_gt_2) {
12068 for (uint32_t channels = 3; channels < 4; channels++) {
12069 DWConvMicrokernelTester()
12070 .cr(2)
12071 .kr(9)
12072 .channels(channels)
12073 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12074 }
12075 }
12076
12077 TEST(F32_DWCONV_UP2X9__WASM_ACC2, c_gt_2_with_qmin) {
12078 for (uint32_t channels = 3; channels < 4; channels++) {
12079 DWConvMicrokernelTester()
12080 .cr(2)
12081 .kr(9)
12082 .channels(channels)
12083 .qmin(128)
12084 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12085 }
12086 }
12087
12088 TEST(F32_DWCONV_UP2X9__WASM_ACC2, c_gt_2_with_qmax) {
12089 for (uint32_t channels = 3; channels < 4; channels++) {
12090 DWConvMicrokernelTester()
12091 .cr(2)
12092 .kr(9)
12093 .channels(channels)
12094 .qmax(128)
12095 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12096 }
12097 }
12098
12099 TEST(F32_DWCONV_UP2X9__WASM_ACC2, multipixel) {
12100 for (size_t channels = 1; channels <= 10; channels += 1) {
12101 DWConvMicrokernelTester()
12102 .cr(2)
12103 .kr(9)
12104 .channels(channels)
12105 .width(3)
12106 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12107 }
12108 }
12109
12110 TEST(F32_DWCONV_UP2X9__WASM_ACC2, multipixel_with_step) {
12111 for (size_t channels = 1; channels <= 10; channels += 1) {
12112 for (size_t step = 2; step <= 9; step++) {
12113 DWConvMicrokernelTester()
12114 .cr(2)
12115 .kr(9)
12116 .channels(channels)
12117 .width(3)
12118 .step(step)
12119 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12120 }
12121 }
12122 }
12123
12124 TEST(F32_DWCONV_UP2X9__WASM_ACC2, multipixel_with_output_stride) {
12125 for (size_t channels = 1; channels <= 10; channels += 1) {
12126 DWConvMicrokernelTester()
12127 .cr(2)
12128 .kr(9)
12129 .channels(2)
12130 .width(5)
12131 .output_stride(13)
12132 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12133 }
12134 }
12135
12136 TEST(F32_DWCONV_UP2X9__WASM_ACC2, multipixel_with_qmin) {
12137 for (size_t channels = 1; channels <= 10; channels += 1) {
12138 DWConvMicrokernelTester()
12139 .cr(2)
12140 .kr(9)
12141 .channels(channels)
12142 .width(3)
12143 .qmin(128)
12144 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12145 }
12146 }
12147
12148 TEST(F32_DWCONV_UP2X9__WASM_ACC2, multipixel_with_qmax) {
12149 for (size_t channels = 1; channels <= 10; channels += 1) {
12150 DWConvMicrokernelTester()
12151 .cr(2)
12152 .kr(9)
12153 .channels(channels)
12154 .width(3)
12155 .qmax(128)
12156 .Test(xnn_f32_dwconv_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12157 }
12158 }
12159#endif // XNN_ARCH_WASM
12160
12161
12162#if XNN_ARCH_WASM
12163 TEST(F32_DWCONV_UP1X25__WASM, c_eq_1) {
12164 DWConvMicrokernelTester()
12165 .cr(1)
12166 .kr(25)
12167 .channels(1)
12168 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12169 }
12170
12171 TEST(F32_DWCONV_UP1X25__WASM, c_gt_1) {
12172 for (uint32_t channels = 2; channels < 10; channels++) {
12173 DWConvMicrokernelTester()
12174 .cr(1)
12175 .kr(25)
12176 .channels(channels)
12177 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12178 }
12179 }
12180
12181 TEST(F32_DWCONV_UP1X25__WASM, c_gt_1_with_qmin) {
12182 for (uint32_t channels = 2; channels < 10; channels++) {
12183 DWConvMicrokernelTester()
12184 .cr(1)
12185 .kr(25)
12186 .channels(channels)
12187 .qmin(128)
12188 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12189 }
12190 }
12191
12192 TEST(F32_DWCONV_UP1X25__WASM, c_gt_1_with_qmax) {
12193 for (uint32_t channels = 2; channels < 10; channels++) {
12194 DWConvMicrokernelTester()
12195 .cr(1)
12196 .kr(25)
12197 .channels(channels)
12198 .qmax(128)
12199 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12200 }
12201 }
12202
12203 TEST(F32_DWCONV_UP1X25__WASM, multipixel) {
12204 for (size_t channels = 1; channels <= 5; channels += 1) {
12205 DWConvMicrokernelTester()
12206 .cr(1)
12207 .kr(25)
12208 .channels(channels)
12209 .width(3)
12210 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12211 }
12212 }
12213
12214 TEST(F32_DWCONV_UP1X25__WASM, multipixel_with_step) {
12215 for (size_t channels = 1; channels <= 5; channels += 1) {
12216 for (size_t step = 2; step <= 25; step++) {
12217 DWConvMicrokernelTester()
12218 .cr(1)
12219 .kr(25)
12220 .channels(channels)
12221 .width(3)
12222 .step(step)
12223 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12224 }
12225 }
12226 }
12227
12228 TEST(F32_DWCONV_UP1X25__WASM, multipixel_with_output_stride) {
12229 for (size_t channels = 1; channels <= 5; channels += 1) {
12230 DWConvMicrokernelTester()
12231 .cr(1)
12232 .kr(25)
12233 .channels(1)
12234 .width(5)
12235 .output_stride(7)
12236 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12237 }
12238 }
12239
12240 TEST(F32_DWCONV_UP1X25__WASM, multipixel_with_qmin) {
12241 for (size_t channels = 1; channels <= 5; channels += 1) {
12242 DWConvMicrokernelTester()
12243 .cr(1)
12244 .kr(25)
12245 .channels(channels)
12246 .width(3)
12247 .qmin(128)
12248 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12249 }
12250 }
12251
12252 TEST(F32_DWCONV_UP1X25__WASM, multipixel_with_qmax) {
12253 for (size_t channels = 1; channels <= 5; channels += 1) {
12254 DWConvMicrokernelTester()
12255 .cr(1)
12256 .kr(25)
12257 .channels(channels)
12258 .width(3)
12259 .qmax(128)
12260 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12261 }
12262 }
12263#endif // XNN_ARCH_WASM
12264
12265
12266#if XNN_ARCH_WASM
12267 TEST(F32_DWCONV_UP1X25__WASM_ACC2, c_eq_1) {
12268 DWConvMicrokernelTester()
12269 .cr(1)
12270 .kr(25)
12271 .channels(1)
12272 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12273 }
12274
12275 TEST(F32_DWCONV_UP1X25__WASM_ACC2, c_gt_1) {
12276 for (uint32_t channels = 2; channels < 10; channels++) {
12277 DWConvMicrokernelTester()
12278 .cr(1)
12279 .kr(25)
12280 .channels(channels)
12281 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12282 }
12283 }
12284
12285 TEST(F32_DWCONV_UP1X25__WASM_ACC2, c_gt_1_with_qmin) {
12286 for (uint32_t channels = 2; channels < 10; channels++) {
12287 DWConvMicrokernelTester()
12288 .cr(1)
12289 .kr(25)
12290 .channels(channels)
12291 .qmin(128)
12292 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12293 }
12294 }
12295
12296 TEST(F32_DWCONV_UP1X25__WASM_ACC2, c_gt_1_with_qmax) {
12297 for (uint32_t channels = 2; channels < 10; channels++) {
12298 DWConvMicrokernelTester()
12299 .cr(1)
12300 .kr(25)
12301 .channels(channels)
12302 .qmax(128)
12303 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12304 }
12305 }
12306
12307 TEST(F32_DWCONV_UP1X25__WASM_ACC2, multipixel) {
12308 for (size_t channels = 1; channels <= 5; channels += 1) {
12309 DWConvMicrokernelTester()
12310 .cr(1)
12311 .kr(25)
12312 .channels(channels)
12313 .width(3)
12314 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12315 }
12316 }
12317
12318 TEST(F32_DWCONV_UP1X25__WASM_ACC2, multipixel_with_step) {
12319 for (size_t channels = 1; channels <= 5; channels += 1) {
12320 for (size_t step = 2; step <= 25; step++) {
12321 DWConvMicrokernelTester()
12322 .cr(1)
12323 .kr(25)
12324 .channels(channels)
12325 .width(3)
12326 .step(step)
12327 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12328 }
12329 }
12330 }
12331
12332 TEST(F32_DWCONV_UP1X25__WASM_ACC2, multipixel_with_output_stride) {
12333 for (size_t channels = 1; channels <= 5; channels += 1) {
12334 DWConvMicrokernelTester()
12335 .cr(1)
12336 .kr(25)
12337 .channels(1)
12338 .width(5)
12339 .output_stride(7)
12340 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12341 }
12342 }
12343
12344 TEST(F32_DWCONV_UP1X25__WASM_ACC2, multipixel_with_qmin) {
12345 for (size_t channels = 1; channels <= 5; channels += 1) {
12346 DWConvMicrokernelTester()
12347 .cr(1)
12348 .kr(25)
12349 .channels(channels)
12350 .width(3)
12351 .qmin(128)
12352 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12353 }
12354 }
12355
12356 TEST(F32_DWCONV_UP1X25__WASM_ACC2, multipixel_with_qmax) {
12357 for (size_t channels = 1; channels <= 5; channels += 1) {
12358 DWConvMicrokernelTester()
12359 .cr(1)
12360 .kr(25)
12361 .channels(channels)
12362 .width(3)
12363 .qmax(128)
12364 .Test(xnn_f32_dwconv_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12365 }
12366 }
12367#endif // XNN_ARCH_WASM
12368
12369
12370#if XNN_ARCH_WASM
12371 TEST(F32_DWCONV_UP2X25__WASM, c_eq_2) {
12372 DWConvMicrokernelTester()
12373 .cr(2)
12374 .kr(25)
12375 .channels(2)
12376 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12377 }
12378
12379 TEST(F32_DWCONV_UP2X25__WASM, c_div_2) {
12380 for (uint32_t channels = 4; channels < 32; channels += 6) {
12381 DWConvMicrokernelTester()
12382 .cr(2)
12383 .kr(25)
12384 .channels(channels)
12385 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12386 }
12387 }
12388
12389 TEST(F32_DWCONV_UP2X25__WASM, c_div_2_with_qmin) {
12390 for (uint32_t channels = 4; channels < 32; channels += 6) {
12391 DWConvMicrokernelTester()
12392 .cr(2)
12393 .kr(25)
12394 .channels(channels)
12395 .qmin(128)
12396 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12397 }
12398 }
12399
12400 TEST(F32_DWCONV_UP2X25__WASM, c_div_2_with_qmax) {
12401 for (uint32_t channels = 4; channels < 32; channels += 6) {
12402 DWConvMicrokernelTester()
12403 .cr(2)
12404 .kr(25)
12405 .channels(channels)
12406 .qmax(128)
12407 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12408 }
12409 }
12410
12411 TEST(F32_DWCONV_UP2X25__WASM, c_lt_2) {
12412 for (uint32_t channels = 1; channels < 2; channels++) {
12413 DWConvMicrokernelTester()
12414 .cr(2)
12415 .kr(25)
12416 .channels(channels)
12417 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12418 }
12419 }
12420
12421 TEST(F32_DWCONV_UP2X25__WASM, c_gt_2) {
12422 for (uint32_t channels = 3; channels < 4; channels++) {
12423 DWConvMicrokernelTester()
12424 .cr(2)
12425 .kr(25)
12426 .channels(channels)
12427 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12428 }
12429 }
12430
12431 TEST(F32_DWCONV_UP2X25__WASM, c_gt_2_with_qmin) {
12432 for (uint32_t channels = 3; channels < 4; channels++) {
12433 DWConvMicrokernelTester()
12434 .cr(2)
12435 .kr(25)
12436 .channels(channels)
12437 .qmin(128)
12438 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12439 }
12440 }
12441
12442 TEST(F32_DWCONV_UP2X25__WASM, c_gt_2_with_qmax) {
12443 for (uint32_t channels = 3; channels < 4; channels++) {
12444 DWConvMicrokernelTester()
12445 .cr(2)
12446 .kr(25)
12447 .channels(channels)
12448 .qmax(128)
12449 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12450 }
12451 }
12452
12453 TEST(F32_DWCONV_UP2X25__WASM, multipixel) {
12454 for (size_t channels = 1; channels <= 10; channels += 1) {
12455 DWConvMicrokernelTester()
12456 .cr(2)
12457 .kr(25)
12458 .channels(channels)
12459 .width(3)
12460 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12461 }
12462 }
12463
12464 TEST(F32_DWCONV_UP2X25__WASM, multipixel_with_step) {
12465 for (size_t channels = 1; channels <= 10; channels += 1) {
12466 for (size_t step = 2; step <= 25; step++) {
12467 DWConvMicrokernelTester()
12468 .cr(2)
12469 .kr(25)
12470 .channels(channels)
12471 .width(3)
12472 .step(step)
12473 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12474 }
12475 }
12476 }
12477
12478 TEST(F32_DWCONV_UP2X25__WASM, multipixel_with_output_stride) {
12479 for (size_t channels = 1; channels <= 10; channels += 1) {
12480 DWConvMicrokernelTester()
12481 .cr(2)
12482 .kr(25)
12483 .channels(2)
12484 .width(5)
12485 .output_stride(13)
12486 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12487 }
12488 }
12489
12490 TEST(F32_DWCONV_UP2X25__WASM, multipixel_with_qmin) {
12491 for (size_t channels = 1; channels <= 10; channels += 1) {
12492 DWConvMicrokernelTester()
12493 .cr(2)
12494 .kr(25)
12495 .channels(channels)
12496 .width(3)
12497 .qmin(128)
12498 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12499 }
12500 }
12501
12502 TEST(F32_DWCONV_UP2X25__WASM, multipixel_with_qmax) {
12503 for (size_t channels = 1; channels <= 10; channels += 1) {
12504 DWConvMicrokernelTester()
12505 .cr(2)
12506 .kr(25)
12507 .channels(channels)
12508 .width(3)
12509 .qmax(128)
12510 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
12511 }
12512 }
12513#endif // XNN_ARCH_WASM
12514
12515
12516#if XNN_ARCH_WASM
12517 TEST(F32_DWCONV_UP2X25__WASM_ACC2, c_eq_2) {
12518 DWConvMicrokernelTester()
12519 .cr(2)
12520 .kr(25)
12521 .channels(2)
12522 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12523 }
12524
12525 TEST(F32_DWCONV_UP2X25__WASM_ACC2, c_div_2) {
12526 for (uint32_t channels = 4; channels < 32; channels += 6) {
12527 DWConvMicrokernelTester()
12528 .cr(2)
12529 .kr(25)
12530 .channels(channels)
12531 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12532 }
12533 }
12534
12535 TEST(F32_DWCONV_UP2X25__WASM_ACC2, c_div_2_with_qmin) {
12536 for (uint32_t channels = 4; channels < 32; channels += 6) {
12537 DWConvMicrokernelTester()
12538 .cr(2)
12539 .kr(25)
12540 .channels(channels)
12541 .qmin(128)
12542 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12543 }
12544 }
12545
12546 TEST(F32_DWCONV_UP2X25__WASM_ACC2, c_div_2_with_qmax) {
12547 for (uint32_t channels = 4; channels < 32; channels += 6) {
12548 DWConvMicrokernelTester()
12549 .cr(2)
12550 .kr(25)
12551 .channels(channels)
12552 .qmax(128)
12553 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12554 }
12555 }
12556
12557 TEST(F32_DWCONV_UP2X25__WASM_ACC2, c_lt_2) {
12558 for (uint32_t channels = 1; channels < 2; channels++) {
12559 DWConvMicrokernelTester()
12560 .cr(2)
12561 .kr(25)
12562 .channels(channels)
12563 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12564 }
12565 }
12566
12567 TEST(F32_DWCONV_UP2X25__WASM_ACC2, c_gt_2) {
12568 for (uint32_t channels = 3; channels < 4; channels++) {
12569 DWConvMicrokernelTester()
12570 .cr(2)
12571 .kr(25)
12572 .channels(channels)
12573 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12574 }
12575 }
12576
12577 TEST(F32_DWCONV_UP2X25__WASM_ACC2, c_gt_2_with_qmin) {
12578 for (uint32_t channels = 3; channels < 4; channels++) {
12579 DWConvMicrokernelTester()
12580 .cr(2)
12581 .kr(25)
12582 .channels(channels)
12583 .qmin(128)
12584 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12585 }
12586 }
12587
12588 TEST(F32_DWCONV_UP2X25__WASM_ACC2, c_gt_2_with_qmax) {
12589 for (uint32_t channels = 3; channels < 4; channels++) {
12590 DWConvMicrokernelTester()
12591 .cr(2)
12592 .kr(25)
12593 .channels(channels)
12594 .qmax(128)
12595 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12596 }
12597 }
12598
12599 TEST(F32_DWCONV_UP2X25__WASM_ACC2, multipixel) {
12600 for (size_t channels = 1; channels <= 10; channels += 1) {
12601 DWConvMicrokernelTester()
12602 .cr(2)
12603 .kr(25)
12604 .channels(channels)
12605 .width(3)
12606 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12607 }
12608 }
12609
12610 TEST(F32_DWCONV_UP2X25__WASM_ACC2, multipixel_with_step) {
12611 for (size_t channels = 1; channels <= 10; channels += 1) {
12612 for (size_t step = 2; step <= 25; step++) {
12613 DWConvMicrokernelTester()
12614 .cr(2)
12615 .kr(25)
12616 .channels(channels)
12617 .width(3)
12618 .step(step)
12619 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12620 }
12621 }
12622 }
12623
12624 TEST(F32_DWCONV_UP2X25__WASM_ACC2, multipixel_with_output_stride) {
12625 for (size_t channels = 1; channels <= 10; channels += 1) {
12626 DWConvMicrokernelTester()
12627 .cr(2)
12628 .kr(25)
12629 .channels(2)
12630 .width(5)
12631 .output_stride(13)
12632 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12633 }
12634 }
12635
12636 TEST(F32_DWCONV_UP2X25__WASM_ACC2, multipixel_with_qmin) {
12637 for (size_t channels = 1; channels <= 10; channels += 1) {
12638 DWConvMicrokernelTester()
12639 .cr(2)
12640 .kr(25)
12641 .channels(channels)
12642 .width(3)
12643 .qmin(128)
12644 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12645 }
12646 }
12647
12648 TEST(F32_DWCONV_UP2X25__WASM_ACC2, multipixel_with_qmax) {
12649 for (size_t channels = 1; channels <= 10; channels += 1) {
12650 DWConvMicrokernelTester()
12651 .cr(2)
12652 .kr(25)
12653 .channels(channels)
12654 .width(3)
12655 .qmax(128)
12656 .Test(xnn_f32_dwconv_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
12657 }
12658 }
12659#endif // XNN_ARCH_WASM
12660
12661
12662TEST(F32_DWCONV_UP1X4__SCALAR, c_eq_1) {
12663 DWConvMicrokernelTester()
12664 .cr(1)
12665 .kr(4)
12666 .channels(1)
12667 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12668}
12669
12670TEST(F32_DWCONV_UP1X4__SCALAR, c_gt_1) {
12671 for (uint32_t channels = 2; channels < 10; channels++) {
12672 DWConvMicrokernelTester()
12673 .cr(1)
12674 .kr(4)
12675 .channels(channels)
12676 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12677 }
12678}
12679
12680TEST(F32_DWCONV_UP1X4__SCALAR, c_gt_1_with_qmin) {
12681 for (uint32_t channels = 2; channels < 10; channels++) {
12682 DWConvMicrokernelTester()
12683 .cr(1)
12684 .kr(4)
12685 .channels(channels)
12686 .qmin(128)
12687 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12688 }
12689}
12690
12691TEST(F32_DWCONV_UP1X4__SCALAR, c_gt_1_with_qmax) {
12692 for (uint32_t channels = 2; channels < 10; channels++) {
12693 DWConvMicrokernelTester()
12694 .cr(1)
12695 .kr(4)
12696 .channels(channels)
12697 .qmax(128)
12698 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12699 }
12700}
12701
12702TEST(F32_DWCONV_UP1X4__SCALAR, multipixel) {
12703 for (size_t channels = 1; channels <= 5; channels += 1) {
12704 DWConvMicrokernelTester()
12705 .cr(1)
12706 .kr(4)
12707 .channels(channels)
12708 .width(3)
12709 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12710 }
12711}
12712
12713TEST(F32_DWCONV_UP1X4__SCALAR, multipixel_with_step) {
12714 for (size_t channels = 1; channels <= 5; channels += 1) {
12715 for (size_t step = 2; step <= 4; step++) {
12716 DWConvMicrokernelTester()
12717 .cr(1)
12718 .kr(4)
12719 .channels(channels)
12720 .width(3)
12721 .step(step)
12722 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12723 }
12724 }
12725}
12726
12727TEST(F32_DWCONV_UP1X4__SCALAR, multipixel_with_output_stride) {
12728 for (size_t channels = 1; channels <= 5; channels += 1) {
12729 DWConvMicrokernelTester()
12730 .cr(1)
12731 .kr(4)
12732 .channels(1)
12733 .width(5)
12734 .output_stride(7)
12735 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12736 }
12737}
12738
12739TEST(F32_DWCONV_UP1X4__SCALAR, multipixel_with_qmin) {
12740 for (size_t channels = 1; channels <= 5; channels += 1) {
12741 DWConvMicrokernelTester()
12742 .cr(1)
12743 .kr(4)
12744 .channels(channels)
12745 .width(3)
12746 .qmin(128)
12747 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12748 }
12749}
12750
12751TEST(F32_DWCONV_UP1X4__SCALAR, multipixel_with_qmax) {
12752 for (size_t channels = 1; channels <= 5; channels += 1) {
12753 DWConvMicrokernelTester()
12754 .cr(1)
12755 .kr(4)
12756 .channels(channels)
12757 .width(3)
12758 .qmax(128)
12759 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12760 }
12761}
12762
12763
12764TEST(F32_DWCONV_UP1X4__SCALAR_ACC2, c_eq_1) {
12765 DWConvMicrokernelTester()
12766 .cr(1)
12767 .kr(4)
12768 .channels(1)
12769 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
12770}
12771
12772TEST(F32_DWCONV_UP1X4__SCALAR_ACC2, c_gt_1) {
12773 for (uint32_t channels = 2; channels < 10; channels++) {
12774 DWConvMicrokernelTester()
12775 .cr(1)
12776 .kr(4)
12777 .channels(channels)
12778 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
12779 }
12780}
12781
12782TEST(F32_DWCONV_UP1X4__SCALAR_ACC2, c_gt_1_with_qmin) {
12783 for (uint32_t channels = 2; channels < 10; channels++) {
12784 DWConvMicrokernelTester()
12785 .cr(1)
12786 .kr(4)
12787 .channels(channels)
12788 .qmin(128)
12789 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
12790 }
12791}
12792
12793TEST(F32_DWCONV_UP1X4__SCALAR_ACC2, c_gt_1_with_qmax) {
12794 for (uint32_t channels = 2; channels < 10; channels++) {
12795 DWConvMicrokernelTester()
12796 .cr(1)
12797 .kr(4)
12798 .channels(channels)
12799 .qmax(128)
12800 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
12801 }
12802}
12803
12804TEST(F32_DWCONV_UP1X4__SCALAR_ACC2, multipixel) {
12805 for (size_t channels = 1; channels <= 5; channels += 1) {
12806 DWConvMicrokernelTester()
12807 .cr(1)
12808 .kr(4)
12809 .channels(channels)
12810 .width(3)
12811 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
12812 }
12813}
12814
12815TEST(F32_DWCONV_UP1X4__SCALAR_ACC2, multipixel_with_step) {
12816 for (size_t channels = 1; channels <= 5; channels += 1) {
12817 for (size_t step = 2; step <= 4; step++) {
12818 DWConvMicrokernelTester()
12819 .cr(1)
12820 .kr(4)
12821 .channels(channels)
12822 .width(3)
12823 .step(step)
12824 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
12825 }
12826 }
12827}
12828
12829TEST(F32_DWCONV_UP1X4__SCALAR_ACC2, multipixel_with_output_stride) {
12830 for (size_t channels = 1; channels <= 5; channels += 1) {
12831 DWConvMicrokernelTester()
12832 .cr(1)
12833 .kr(4)
12834 .channels(1)
12835 .width(5)
12836 .output_stride(7)
12837 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
12838 }
12839}
12840
12841TEST(F32_DWCONV_UP1X4__SCALAR_ACC2, multipixel_with_qmin) {
12842 for (size_t channels = 1; channels <= 5; channels += 1) {
12843 DWConvMicrokernelTester()
12844 .cr(1)
12845 .kr(4)
12846 .channels(channels)
12847 .width(3)
12848 .qmin(128)
12849 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
12850 }
12851}
12852
12853TEST(F32_DWCONV_UP1X4__SCALAR_ACC2, multipixel_with_qmax) {
12854 for (size_t channels = 1; channels <= 5; channels += 1) {
12855 DWConvMicrokernelTester()
12856 .cr(1)
12857 .kr(4)
12858 .channels(channels)
12859 .width(3)
12860 .qmax(128)
12861 .Test(xnn_f32_dwconv_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
12862 }
12863}
12864
12865
12866TEST(F32_DWCONV_UP2X4__SCALAR, c_eq_2) {
12867 DWConvMicrokernelTester()
12868 .cr(2)
12869 .kr(4)
12870 .channels(2)
12871 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12872}
12873
12874TEST(F32_DWCONV_UP2X4__SCALAR, c_div_2) {
12875 for (uint32_t channels = 4; channels < 32; channels += 6) {
12876 DWConvMicrokernelTester()
12877 .cr(2)
12878 .kr(4)
12879 .channels(channels)
12880 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12881 }
12882}
12883
12884TEST(F32_DWCONV_UP2X4__SCALAR, c_div_2_with_qmin) {
12885 for (uint32_t channels = 4; channels < 32; channels += 6) {
12886 DWConvMicrokernelTester()
12887 .cr(2)
12888 .kr(4)
12889 .channels(channels)
12890 .qmin(128)
12891 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12892 }
12893}
12894
12895TEST(F32_DWCONV_UP2X4__SCALAR, c_div_2_with_qmax) {
12896 for (uint32_t channels = 4; channels < 32; channels += 6) {
12897 DWConvMicrokernelTester()
12898 .cr(2)
12899 .kr(4)
12900 .channels(channels)
12901 .qmax(128)
12902 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12903 }
12904}
12905
12906TEST(F32_DWCONV_UP2X4__SCALAR, c_lt_2) {
12907 for (uint32_t channels = 1; channels < 2; channels++) {
12908 DWConvMicrokernelTester()
12909 .cr(2)
12910 .kr(4)
12911 .channels(channels)
12912 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12913 }
12914}
12915
12916TEST(F32_DWCONV_UP2X4__SCALAR, c_gt_2) {
12917 for (uint32_t channels = 3; channels < 4; channels++) {
12918 DWConvMicrokernelTester()
12919 .cr(2)
12920 .kr(4)
12921 .channels(channels)
12922 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12923 }
12924}
12925
12926TEST(F32_DWCONV_UP2X4__SCALAR, c_gt_2_with_qmin) {
12927 for (uint32_t channels = 3; channels < 4; channels++) {
12928 DWConvMicrokernelTester()
12929 .cr(2)
12930 .kr(4)
12931 .channels(channels)
12932 .qmin(128)
12933 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12934 }
12935}
12936
12937TEST(F32_DWCONV_UP2X4__SCALAR, c_gt_2_with_qmax) {
12938 for (uint32_t channels = 3; channels < 4; channels++) {
12939 DWConvMicrokernelTester()
12940 .cr(2)
12941 .kr(4)
12942 .channels(channels)
12943 .qmax(128)
12944 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12945 }
12946}
12947
12948TEST(F32_DWCONV_UP2X4__SCALAR, multipixel) {
12949 for (size_t channels = 1; channels <= 10; channels += 1) {
12950 DWConvMicrokernelTester()
12951 .cr(2)
12952 .kr(4)
12953 .channels(channels)
12954 .width(3)
12955 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12956 }
12957}
12958
12959TEST(F32_DWCONV_UP2X4__SCALAR, multipixel_with_step) {
12960 for (size_t channels = 1; channels <= 10; channels += 1) {
12961 for (size_t step = 2; step <= 4; step++) {
12962 DWConvMicrokernelTester()
12963 .cr(2)
12964 .kr(4)
12965 .channels(channels)
12966 .width(3)
12967 .step(step)
12968 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12969 }
12970 }
12971}
12972
12973TEST(F32_DWCONV_UP2X4__SCALAR, multipixel_with_output_stride) {
12974 for (size_t channels = 1; channels <= 10; channels += 1) {
12975 DWConvMicrokernelTester()
12976 .cr(2)
12977 .kr(4)
12978 .channels(2)
12979 .width(5)
12980 .output_stride(13)
12981 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12982 }
12983}
12984
12985TEST(F32_DWCONV_UP2X4__SCALAR, multipixel_with_qmin) {
12986 for (size_t channels = 1; channels <= 10; channels += 1) {
12987 DWConvMicrokernelTester()
12988 .cr(2)
12989 .kr(4)
12990 .channels(channels)
12991 .width(3)
12992 .qmin(128)
12993 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
12994 }
12995}
12996
12997TEST(F32_DWCONV_UP2X4__SCALAR, multipixel_with_qmax) {
12998 for (size_t channels = 1; channels <= 10; channels += 1) {
12999 DWConvMicrokernelTester()
13000 .cr(2)
13001 .kr(4)
13002 .channels(channels)
13003 .width(3)
13004 .qmax(128)
13005 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
13006 }
13007}
13008
13009
13010TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, c_eq_2) {
13011 DWConvMicrokernelTester()
13012 .cr(2)
13013 .kr(4)
13014 .channels(2)
13015 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13016}
13017
13018TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, c_div_2) {
13019 for (uint32_t channels = 4; channels < 32; channels += 6) {
13020 DWConvMicrokernelTester()
13021 .cr(2)
13022 .kr(4)
13023 .channels(channels)
13024 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13025 }
13026}
13027
13028TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, c_div_2_with_qmin) {
13029 for (uint32_t channels = 4; channels < 32; channels += 6) {
13030 DWConvMicrokernelTester()
13031 .cr(2)
13032 .kr(4)
13033 .channels(channels)
13034 .qmin(128)
13035 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13036 }
13037}
13038
13039TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, c_div_2_with_qmax) {
13040 for (uint32_t channels = 4; channels < 32; channels += 6) {
13041 DWConvMicrokernelTester()
13042 .cr(2)
13043 .kr(4)
13044 .channels(channels)
13045 .qmax(128)
13046 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13047 }
13048}
13049
13050TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, c_lt_2) {
13051 for (uint32_t channels = 1; channels < 2; channels++) {
13052 DWConvMicrokernelTester()
13053 .cr(2)
13054 .kr(4)
13055 .channels(channels)
13056 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13057 }
13058}
13059
13060TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, c_gt_2) {
13061 for (uint32_t channels = 3; channels < 4; channels++) {
13062 DWConvMicrokernelTester()
13063 .cr(2)
13064 .kr(4)
13065 .channels(channels)
13066 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13067 }
13068}
13069
13070TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, c_gt_2_with_qmin) {
13071 for (uint32_t channels = 3; channels < 4; channels++) {
13072 DWConvMicrokernelTester()
13073 .cr(2)
13074 .kr(4)
13075 .channels(channels)
13076 .qmin(128)
13077 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13078 }
13079}
13080
13081TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, c_gt_2_with_qmax) {
13082 for (uint32_t channels = 3; channels < 4; channels++) {
13083 DWConvMicrokernelTester()
13084 .cr(2)
13085 .kr(4)
13086 .channels(channels)
13087 .qmax(128)
13088 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13089 }
13090}
13091
13092TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, multipixel) {
13093 for (size_t channels = 1; channels <= 10; channels += 1) {
13094 DWConvMicrokernelTester()
13095 .cr(2)
13096 .kr(4)
13097 .channels(channels)
13098 .width(3)
13099 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13100 }
13101}
13102
13103TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, multipixel_with_step) {
13104 for (size_t channels = 1; channels <= 10; channels += 1) {
13105 for (size_t step = 2; step <= 4; step++) {
13106 DWConvMicrokernelTester()
13107 .cr(2)
13108 .kr(4)
13109 .channels(channels)
13110 .width(3)
13111 .step(step)
13112 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13113 }
13114 }
13115}
13116
13117TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, multipixel_with_output_stride) {
13118 for (size_t channels = 1; channels <= 10; channels += 1) {
13119 DWConvMicrokernelTester()
13120 .cr(2)
13121 .kr(4)
13122 .channels(2)
13123 .width(5)
13124 .output_stride(13)
13125 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13126 }
13127}
13128
13129TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, multipixel_with_qmin) {
13130 for (size_t channels = 1; channels <= 10; channels += 1) {
13131 DWConvMicrokernelTester()
13132 .cr(2)
13133 .kr(4)
13134 .channels(channels)
13135 .width(3)
13136 .qmin(128)
13137 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13138 }
13139}
13140
13141TEST(F32_DWCONV_UP2X4__SCALAR_ACC2, multipixel_with_qmax) {
13142 for (size_t channels = 1; channels <= 10; channels += 1) {
13143 DWConvMicrokernelTester()
13144 .cr(2)
13145 .kr(4)
13146 .channels(channels)
13147 .width(3)
13148 .qmax(128)
13149 .Test(xnn_f32_dwconv_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13150 }
13151}
13152
13153
13154TEST(F32_DWCONV_UP1X9__SCALAR, c_eq_1) {
13155 DWConvMicrokernelTester()
13156 .cr(1)
13157 .kr(9)
13158 .channels(1)
13159 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13160}
13161
13162TEST(F32_DWCONV_UP1X9__SCALAR, c_gt_1) {
13163 for (uint32_t channels = 2; channels < 10; channels++) {
13164 DWConvMicrokernelTester()
13165 .cr(1)
13166 .kr(9)
13167 .channels(channels)
13168 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13169 }
13170}
13171
13172TEST(F32_DWCONV_UP1X9__SCALAR, c_gt_1_with_qmin) {
13173 for (uint32_t channels = 2; channels < 10; channels++) {
13174 DWConvMicrokernelTester()
13175 .cr(1)
13176 .kr(9)
13177 .channels(channels)
13178 .qmin(128)
13179 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13180 }
13181}
13182
13183TEST(F32_DWCONV_UP1X9__SCALAR, c_gt_1_with_qmax) {
13184 for (uint32_t channels = 2; channels < 10; channels++) {
13185 DWConvMicrokernelTester()
13186 .cr(1)
13187 .kr(9)
13188 .channels(channels)
13189 .qmax(128)
13190 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13191 }
13192}
13193
13194TEST(F32_DWCONV_UP1X9__SCALAR, multipixel) {
13195 for (size_t channels = 1; channels <= 5; channels += 1) {
13196 DWConvMicrokernelTester()
13197 .cr(1)
13198 .kr(9)
13199 .channels(channels)
13200 .width(3)
13201 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13202 }
13203}
13204
13205TEST(F32_DWCONV_UP1X9__SCALAR, multipixel_with_step) {
13206 for (size_t channels = 1; channels <= 5; channels += 1) {
13207 for (size_t step = 2; step <= 9; step++) {
13208 DWConvMicrokernelTester()
13209 .cr(1)
13210 .kr(9)
13211 .channels(channels)
13212 .width(3)
13213 .step(step)
13214 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13215 }
13216 }
13217}
13218
13219TEST(F32_DWCONV_UP1X9__SCALAR, multipixel_with_output_stride) {
13220 for (size_t channels = 1; channels <= 5; channels += 1) {
13221 DWConvMicrokernelTester()
13222 .cr(1)
13223 .kr(9)
13224 .channels(1)
13225 .width(5)
13226 .output_stride(7)
13227 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13228 }
13229}
13230
13231TEST(F32_DWCONV_UP1X9__SCALAR, multipixel_with_qmin) {
13232 for (size_t channels = 1; channels <= 5; channels += 1) {
13233 DWConvMicrokernelTester()
13234 .cr(1)
13235 .kr(9)
13236 .channels(channels)
13237 .width(3)
13238 .qmin(128)
13239 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13240 }
13241}
13242
13243TEST(F32_DWCONV_UP1X9__SCALAR, multipixel_with_qmax) {
13244 for (size_t channels = 1; channels <= 5; channels += 1) {
13245 DWConvMicrokernelTester()
13246 .cr(1)
13247 .kr(9)
13248 .channels(channels)
13249 .width(3)
13250 .qmax(128)
13251 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13252 }
13253}
13254
13255
13256TEST(F32_DWCONV_UP1X9__SCALAR_ACC2, c_eq_1) {
13257 DWConvMicrokernelTester()
13258 .cr(1)
13259 .kr(9)
13260 .channels(1)
13261 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13262}
13263
13264TEST(F32_DWCONV_UP1X9__SCALAR_ACC2, c_gt_1) {
13265 for (uint32_t channels = 2; channels < 10; channels++) {
13266 DWConvMicrokernelTester()
13267 .cr(1)
13268 .kr(9)
13269 .channels(channels)
13270 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13271 }
13272}
13273
13274TEST(F32_DWCONV_UP1X9__SCALAR_ACC2, c_gt_1_with_qmin) {
13275 for (uint32_t channels = 2; channels < 10; channels++) {
13276 DWConvMicrokernelTester()
13277 .cr(1)
13278 .kr(9)
13279 .channels(channels)
13280 .qmin(128)
13281 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13282 }
13283}
13284
13285TEST(F32_DWCONV_UP1X9__SCALAR_ACC2, c_gt_1_with_qmax) {
13286 for (uint32_t channels = 2; channels < 10; channels++) {
13287 DWConvMicrokernelTester()
13288 .cr(1)
13289 .kr(9)
13290 .channels(channels)
13291 .qmax(128)
13292 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13293 }
13294}
13295
13296TEST(F32_DWCONV_UP1X9__SCALAR_ACC2, multipixel) {
13297 for (size_t channels = 1; channels <= 5; channels += 1) {
13298 DWConvMicrokernelTester()
13299 .cr(1)
13300 .kr(9)
13301 .channels(channels)
13302 .width(3)
13303 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13304 }
13305}
13306
13307TEST(F32_DWCONV_UP1X9__SCALAR_ACC2, multipixel_with_step) {
13308 for (size_t channels = 1; channels <= 5; channels += 1) {
13309 for (size_t step = 2; step <= 9; step++) {
13310 DWConvMicrokernelTester()
13311 .cr(1)
13312 .kr(9)
13313 .channels(channels)
13314 .width(3)
13315 .step(step)
13316 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13317 }
13318 }
13319}
13320
13321TEST(F32_DWCONV_UP1X9__SCALAR_ACC2, multipixel_with_output_stride) {
13322 for (size_t channels = 1; channels <= 5; channels += 1) {
13323 DWConvMicrokernelTester()
13324 .cr(1)
13325 .kr(9)
13326 .channels(1)
13327 .width(5)
13328 .output_stride(7)
13329 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13330 }
13331}
13332
13333TEST(F32_DWCONV_UP1X9__SCALAR_ACC2, multipixel_with_qmin) {
13334 for (size_t channels = 1; channels <= 5; channels += 1) {
13335 DWConvMicrokernelTester()
13336 .cr(1)
13337 .kr(9)
13338 .channels(channels)
13339 .width(3)
13340 .qmin(128)
13341 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13342 }
13343}
13344
13345TEST(F32_DWCONV_UP1X9__SCALAR_ACC2, multipixel_with_qmax) {
13346 for (size_t channels = 1; channels <= 5; channels += 1) {
13347 DWConvMicrokernelTester()
13348 .cr(1)
13349 .kr(9)
13350 .channels(channels)
13351 .width(3)
13352 .qmax(128)
13353 .Test(xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13354 }
13355}
13356
13357
13358TEST(F32_DWCONV_UP2X9__SCALAR, c_eq_2) {
13359 DWConvMicrokernelTester()
13360 .cr(2)
13361 .kr(9)
13362 .channels(2)
13363 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13364}
13365
13366TEST(F32_DWCONV_UP2X9__SCALAR, c_div_2) {
13367 for (uint32_t channels = 4; channels < 32; channels += 6) {
13368 DWConvMicrokernelTester()
13369 .cr(2)
13370 .kr(9)
13371 .channels(channels)
13372 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13373 }
13374}
13375
13376TEST(F32_DWCONV_UP2X9__SCALAR, c_div_2_with_qmin) {
13377 for (uint32_t channels = 4; channels < 32; channels += 6) {
13378 DWConvMicrokernelTester()
13379 .cr(2)
13380 .kr(9)
13381 .channels(channels)
13382 .qmin(128)
13383 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13384 }
13385}
13386
13387TEST(F32_DWCONV_UP2X9__SCALAR, c_div_2_with_qmax) {
13388 for (uint32_t channels = 4; channels < 32; channels += 6) {
13389 DWConvMicrokernelTester()
13390 .cr(2)
13391 .kr(9)
13392 .channels(channels)
13393 .qmax(128)
13394 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13395 }
13396}
13397
13398TEST(F32_DWCONV_UP2X9__SCALAR, c_lt_2) {
13399 for (uint32_t channels = 1; channels < 2; channels++) {
13400 DWConvMicrokernelTester()
13401 .cr(2)
13402 .kr(9)
13403 .channels(channels)
13404 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13405 }
13406}
13407
13408TEST(F32_DWCONV_UP2X9__SCALAR, c_gt_2) {
13409 for (uint32_t channels = 3; channels < 4; channels++) {
13410 DWConvMicrokernelTester()
13411 .cr(2)
13412 .kr(9)
13413 .channels(channels)
13414 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13415 }
13416}
13417
13418TEST(F32_DWCONV_UP2X9__SCALAR, c_gt_2_with_qmin) {
13419 for (uint32_t channels = 3; channels < 4; channels++) {
13420 DWConvMicrokernelTester()
13421 .cr(2)
13422 .kr(9)
13423 .channels(channels)
13424 .qmin(128)
13425 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13426 }
13427}
13428
13429TEST(F32_DWCONV_UP2X9__SCALAR, c_gt_2_with_qmax) {
13430 for (uint32_t channels = 3; channels < 4; channels++) {
13431 DWConvMicrokernelTester()
13432 .cr(2)
13433 .kr(9)
13434 .channels(channels)
13435 .qmax(128)
13436 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13437 }
13438}
13439
13440TEST(F32_DWCONV_UP2X9__SCALAR, multipixel) {
13441 for (size_t channels = 1; channels <= 10; channels += 1) {
13442 DWConvMicrokernelTester()
13443 .cr(2)
13444 .kr(9)
13445 .channels(channels)
13446 .width(3)
13447 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13448 }
13449}
13450
13451TEST(F32_DWCONV_UP2X9__SCALAR, multipixel_with_step) {
13452 for (size_t channels = 1; channels <= 10; channels += 1) {
13453 for (size_t step = 2; step <= 9; step++) {
13454 DWConvMicrokernelTester()
13455 .cr(2)
13456 .kr(9)
13457 .channels(channels)
13458 .width(3)
13459 .step(step)
13460 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13461 }
13462 }
13463}
13464
13465TEST(F32_DWCONV_UP2X9__SCALAR, multipixel_with_output_stride) {
13466 for (size_t channels = 1; channels <= 10; channels += 1) {
13467 DWConvMicrokernelTester()
13468 .cr(2)
13469 .kr(9)
13470 .channels(2)
13471 .width(5)
13472 .output_stride(13)
13473 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13474 }
13475}
13476
13477TEST(F32_DWCONV_UP2X9__SCALAR, multipixel_with_qmin) {
13478 for (size_t channels = 1; channels <= 10; channels += 1) {
13479 DWConvMicrokernelTester()
13480 .cr(2)
13481 .kr(9)
13482 .channels(channels)
13483 .width(3)
13484 .qmin(128)
13485 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13486 }
13487}
13488
13489TEST(F32_DWCONV_UP2X9__SCALAR, multipixel_with_qmax) {
13490 for (size_t channels = 1; channels <= 10; channels += 1) {
13491 DWConvMicrokernelTester()
13492 .cr(2)
13493 .kr(9)
13494 .channels(channels)
13495 .width(3)
13496 .qmax(128)
13497 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
13498 }
13499}
13500
13501
13502TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, c_eq_2) {
13503 DWConvMicrokernelTester()
13504 .cr(2)
13505 .kr(9)
13506 .channels(2)
13507 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13508}
13509
13510TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, c_div_2) {
13511 for (uint32_t channels = 4; channels < 32; channels += 6) {
13512 DWConvMicrokernelTester()
13513 .cr(2)
13514 .kr(9)
13515 .channels(channels)
13516 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13517 }
13518}
13519
13520TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, c_div_2_with_qmin) {
13521 for (uint32_t channels = 4; channels < 32; channels += 6) {
13522 DWConvMicrokernelTester()
13523 .cr(2)
13524 .kr(9)
13525 .channels(channels)
13526 .qmin(128)
13527 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13528 }
13529}
13530
13531TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, c_div_2_with_qmax) {
13532 for (uint32_t channels = 4; channels < 32; channels += 6) {
13533 DWConvMicrokernelTester()
13534 .cr(2)
13535 .kr(9)
13536 .channels(channels)
13537 .qmax(128)
13538 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13539 }
13540}
13541
13542TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, c_lt_2) {
13543 for (uint32_t channels = 1; channels < 2; channels++) {
13544 DWConvMicrokernelTester()
13545 .cr(2)
13546 .kr(9)
13547 .channels(channels)
13548 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13549 }
13550}
13551
13552TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, c_gt_2) {
13553 for (uint32_t channels = 3; channels < 4; channels++) {
13554 DWConvMicrokernelTester()
13555 .cr(2)
13556 .kr(9)
13557 .channels(channels)
13558 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13559 }
13560}
13561
13562TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, c_gt_2_with_qmin) {
13563 for (uint32_t channels = 3; channels < 4; channels++) {
13564 DWConvMicrokernelTester()
13565 .cr(2)
13566 .kr(9)
13567 .channels(channels)
13568 .qmin(128)
13569 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13570 }
13571}
13572
13573TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, c_gt_2_with_qmax) {
13574 for (uint32_t channels = 3; channels < 4; channels++) {
13575 DWConvMicrokernelTester()
13576 .cr(2)
13577 .kr(9)
13578 .channels(channels)
13579 .qmax(128)
13580 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13581 }
13582}
13583
13584TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, multipixel) {
13585 for (size_t channels = 1; channels <= 10; channels += 1) {
13586 DWConvMicrokernelTester()
13587 .cr(2)
13588 .kr(9)
13589 .channels(channels)
13590 .width(3)
13591 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13592 }
13593}
13594
13595TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, multipixel_with_step) {
13596 for (size_t channels = 1; channels <= 10; channels += 1) {
13597 for (size_t step = 2; step <= 9; step++) {
13598 DWConvMicrokernelTester()
13599 .cr(2)
13600 .kr(9)
13601 .channels(channels)
13602 .width(3)
13603 .step(step)
13604 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13605 }
13606 }
13607}
13608
13609TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, multipixel_with_output_stride) {
13610 for (size_t channels = 1; channels <= 10; channels += 1) {
13611 DWConvMicrokernelTester()
13612 .cr(2)
13613 .kr(9)
13614 .channels(2)
13615 .width(5)
13616 .output_stride(13)
13617 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13618 }
13619}
13620
13621TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, multipixel_with_qmin) {
13622 for (size_t channels = 1; channels <= 10; channels += 1) {
13623 DWConvMicrokernelTester()
13624 .cr(2)
13625 .kr(9)
13626 .channels(channels)
13627 .width(3)
13628 .qmin(128)
13629 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13630 }
13631}
13632
13633TEST(F32_DWCONV_UP2X9__SCALAR_ACC2, multipixel_with_qmax) {
13634 for (size_t channels = 1; channels <= 10; channels += 1) {
13635 DWConvMicrokernelTester()
13636 .cr(2)
13637 .kr(9)
13638 .channels(channels)
13639 .width(3)
13640 .qmax(128)
13641 .Test(xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13642 }
13643}
13644
13645
13646TEST(F32_DWCONV_UP1X25__SCALAR, c_eq_1) {
13647 DWConvMicrokernelTester()
13648 .cr(1)
13649 .kr(25)
13650 .channels(1)
13651 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13652}
13653
13654TEST(F32_DWCONV_UP1X25__SCALAR, c_gt_1) {
13655 for (uint32_t channels = 2; channels < 10; channels++) {
13656 DWConvMicrokernelTester()
13657 .cr(1)
13658 .kr(25)
13659 .channels(channels)
13660 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13661 }
13662}
13663
13664TEST(F32_DWCONV_UP1X25__SCALAR, c_gt_1_with_qmin) {
13665 for (uint32_t channels = 2; channels < 10; channels++) {
13666 DWConvMicrokernelTester()
13667 .cr(1)
13668 .kr(25)
13669 .channels(channels)
13670 .qmin(128)
13671 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13672 }
13673}
13674
13675TEST(F32_DWCONV_UP1X25__SCALAR, c_gt_1_with_qmax) {
13676 for (uint32_t channels = 2; channels < 10; channels++) {
13677 DWConvMicrokernelTester()
13678 .cr(1)
13679 .kr(25)
13680 .channels(channels)
13681 .qmax(128)
13682 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13683 }
13684}
13685
13686TEST(F32_DWCONV_UP1X25__SCALAR, multipixel) {
13687 for (size_t channels = 1; channels <= 5; channels += 1) {
13688 DWConvMicrokernelTester()
13689 .cr(1)
13690 .kr(25)
13691 .channels(channels)
13692 .width(3)
13693 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13694 }
13695}
13696
13697TEST(F32_DWCONV_UP1X25__SCALAR, multipixel_with_step) {
13698 for (size_t channels = 1; channels <= 5; channels += 1) {
13699 for (size_t step = 2; step <= 25; step++) {
13700 DWConvMicrokernelTester()
13701 .cr(1)
13702 .kr(25)
13703 .channels(channels)
13704 .width(3)
13705 .step(step)
13706 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13707 }
13708 }
13709}
13710
13711TEST(F32_DWCONV_UP1X25__SCALAR, multipixel_with_output_stride) {
13712 for (size_t channels = 1; channels <= 5; channels += 1) {
13713 DWConvMicrokernelTester()
13714 .cr(1)
13715 .kr(25)
13716 .channels(1)
13717 .width(5)
13718 .output_stride(7)
13719 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13720 }
13721}
13722
13723TEST(F32_DWCONV_UP1X25__SCALAR, multipixel_with_qmin) {
13724 for (size_t channels = 1; channels <= 5; channels += 1) {
13725 DWConvMicrokernelTester()
13726 .cr(1)
13727 .kr(25)
13728 .channels(channels)
13729 .width(3)
13730 .qmin(128)
13731 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13732 }
13733}
13734
13735TEST(F32_DWCONV_UP1X25__SCALAR, multipixel_with_qmax) {
13736 for (size_t channels = 1; channels <= 5; channels += 1) {
13737 DWConvMicrokernelTester()
13738 .cr(1)
13739 .kr(25)
13740 .channels(channels)
13741 .width(3)
13742 .qmax(128)
13743 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13744 }
13745}
13746
13747
13748TEST(F32_DWCONV_UP1X25__SCALAR_ACC2, c_eq_1) {
13749 DWConvMicrokernelTester()
13750 .cr(1)
13751 .kr(25)
13752 .channels(1)
13753 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13754}
13755
13756TEST(F32_DWCONV_UP1X25__SCALAR_ACC2, c_gt_1) {
13757 for (uint32_t channels = 2; channels < 10; channels++) {
13758 DWConvMicrokernelTester()
13759 .cr(1)
13760 .kr(25)
13761 .channels(channels)
13762 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13763 }
13764}
13765
13766TEST(F32_DWCONV_UP1X25__SCALAR_ACC2, c_gt_1_with_qmin) {
13767 for (uint32_t channels = 2; channels < 10; channels++) {
13768 DWConvMicrokernelTester()
13769 .cr(1)
13770 .kr(25)
13771 .channels(channels)
13772 .qmin(128)
13773 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13774 }
13775}
13776
13777TEST(F32_DWCONV_UP1X25__SCALAR_ACC2, c_gt_1_with_qmax) {
13778 for (uint32_t channels = 2; channels < 10; channels++) {
13779 DWConvMicrokernelTester()
13780 .cr(1)
13781 .kr(25)
13782 .channels(channels)
13783 .qmax(128)
13784 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13785 }
13786}
13787
13788TEST(F32_DWCONV_UP1X25__SCALAR_ACC2, multipixel) {
13789 for (size_t channels = 1; channels <= 5; channels += 1) {
13790 DWConvMicrokernelTester()
13791 .cr(1)
13792 .kr(25)
13793 .channels(channels)
13794 .width(3)
13795 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13796 }
13797}
13798
13799TEST(F32_DWCONV_UP1X25__SCALAR_ACC2, multipixel_with_step) {
13800 for (size_t channels = 1; channels <= 5; channels += 1) {
13801 for (size_t step = 2; step <= 25; step++) {
13802 DWConvMicrokernelTester()
13803 .cr(1)
13804 .kr(25)
13805 .channels(channels)
13806 .width(3)
13807 .step(step)
13808 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13809 }
13810 }
13811}
13812
13813TEST(F32_DWCONV_UP1X25__SCALAR_ACC2, multipixel_with_output_stride) {
13814 for (size_t channels = 1; channels <= 5; channels += 1) {
13815 DWConvMicrokernelTester()
13816 .cr(1)
13817 .kr(25)
13818 .channels(1)
13819 .width(5)
13820 .output_stride(7)
13821 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13822 }
13823}
13824
13825TEST(F32_DWCONV_UP1X25__SCALAR_ACC2, multipixel_with_qmin) {
13826 for (size_t channels = 1; channels <= 5; channels += 1) {
13827 DWConvMicrokernelTester()
13828 .cr(1)
13829 .kr(25)
13830 .channels(channels)
13831 .width(3)
13832 .qmin(128)
13833 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13834 }
13835}
13836
13837TEST(F32_DWCONV_UP1X25__SCALAR_ACC2, multipixel_with_qmax) {
13838 for (size_t channels = 1; channels <= 5; channels += 1) {
13839 DWConvMicrokernelTester()
13840 .cr(1)
13841 .kr(25)
13842 .channels(channels)
13843 .width(3)
13844 .qmax(128)
13845 .Test(xnn_f32_dwconv_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
13846 }
13847}
13848
13849
13850TEST(F32_DWCONV_UP2X25__SCALAR, c_eq_2) {
13851 DWConvMicrokernelTester()
13852 .cr(2)
13853 .kr(25)
13854 .channels(2)
13855 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13856}
13857
13858TEST(F32_DWCONV_UP2X25__SCALAR, c_div_2) {
13859 for (uint32_t channels = 4; channels < 32; channels += 6) {
13860 DWConvMicrokernelTester()
13861 .cr(2)
13862 .kr(25)
13863 .channels(channels)
13864 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13865 }
13866}
13867
13868TEST(F32_DWCONV_UP2X25__SCALAR, c_div_2_with_qmin) {
13869 for (uint32_t channels = 4; channels < 32; channels += 6) {
13870 DWConvMicrokernelTester()
13871 .cr(2)
13872 .kr(25)
13873 .channels(channels)
13874 .qmin(128)
13875 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13876 }
13877}
13878
13879TEST(F32_DWCONV_UP2X25__SCALAR, c_div_2_with_qmax) {
13880 for (uint32_t channels = 4; channels < 32; channels += 6) {
13881 DWConvMicrokernelTester()
13882 .cr(2)
13883 .kr(25)
13884 .channels(channels)
13885 .qmax(128)
13886 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13887 }
13888}
13889
13890TEST(F32_DWCONV_UP2X25__SCALAR, c_lt_2) {
13891 for (uint32_t channels = 1; channels < 2; channels++) {
13892 DWConvMicrokernelTester()
13893 .cr(2)
13894 .kr(25)
13895 .channels(channels)
13896 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13897 }
13898}
13899
13900TEST(F32_DWCONV_UP2X25__SCALAR, c_gt_2) {
13901 for (uint32_t channels = 3; channels < 4; channels++) {
13902 DWConvMicrokernelTester()
13903 .cr(2)
13904 .kr(25)
13905 .channels(channels)
13906 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13907 }
13908}
13909
13910TEST(F32_DWCONV_UP2X25__SCALAR, c_gt_2_with_qmin) {
13911 for (uint32_t channels = 3; channels < 4; channels++) {
13912 DWConvMicrokernelTester()
13913 .cr(2)
13914 .kr(25)
13915 .channels(channels)
13916 .qmin(128)
13917 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13918 }
13919}
13920
13921TEST(F32_DWCONV_UP2X25__SCALAR, c_gt_2_with_qmax) {
13922 for (uint32_t channels = 3; channels < 4; channels++) {
13923 DWConvMicrokernelTester()
13924 .cr(2)
13925 .kr(25)
13926 .channels(channels)
13927 .qmax(128)
13928 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13929 }
13930}
13931
13932TEST(F32_DWCONV_UP2X25__SCALAR, multipixel) {
13933 for (size_t channels = 1; channels <= 10; channels += 1) {
13934 DWConvMicrokernelTester()
13935 .cr(2)
13936 .kr(25)
13937 .channels(channels)
13938 .width(3)
13939 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13940 }
13941}
13942
13943TEST(F32_DWCONV_UP2X25__SCALAR, multipixel_with_step) {
13944 for (size_t channels = 1; channels <= 10; channels += 1) {
13945 for (size_t step = 2; step <= 25; step++) {
13946 DWConvMicrokernelTester()
13947 .cr(2)
13948 .kr(25)
13949 .channels(channels)
13950 .width(3)
13951 .step(step)
13952 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13953 }
13954 }
13955}
13956
13957TEST(F32_DWCONV_UP2X25__SCALAR, multipixel_with_output_stride) {
13958 for (size_t channels = 1; channels <= 10; channels += 1) {
13959 DWConvMicrokernelTester()
13960 .cr(2)
13961 .kr(25)
13962 .channels(2)
13963 .width(5)
13964 .output_stride(13)
13965 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13966 }
13967}
13968
13969TEST(F32_DWCONV_UP2X25__SCALAR, multipixel_with_qmin) {
13970 for (size_t channels = 1; channels <= 10; channels += 1) {
13971 DWConvMicrokernelTester()
13972 .cr(2)
13973 .kr(25)
13974 .channels(channels)
13975 .width(3)
13976 .qmin(128)
13977 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13978 }
13979}
13980
13981TEST(F32_DWCONV_UP2X25__SCALAR, multipixel_with_qmax) {
13982 for (size_t channels = 1; channels <= 10; channels += 1) {
13983 DWConvMicrokernelTester()
13984 .cr(2)
13985 .kr(25)
13986 .channels(channels)
13987 .width(3)
13988 .qmax(128)
13989 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
13990 }
13991}
13992
13993
13994TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, c_eq_2) {
13995 DWConvMicrokernelTester()
13996 .cr(2)
13997 .kr(25)
13998 .channels(2)
13999 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14000}
14001
14002TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, c_div_2) {
14003 for (uint32_t channels = 4; channels < 32; channels += 6) {
14004 DWConvMicrokernelTester()
14005 .cr(2)
14006 .kr(25)
14007 .channels(channels)
14008 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14009 }
14010}
14011
14012TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, c_div_2_with_qmin) {
14013 for (uint32_t channels = 4; channels < 32; channels += 6) {
14014 DWConvMicrokernelTester()
14015 .cr(2)
14016 .kr(25)
14017 .channels(channels)
14018 .qmin(128)
14019 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14020 }
14021}
14022
14023TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, c_div_2_with_qmax) {
14024 for (uint32_t channels = 4; channels < 32; channels += 6) {
14025 DWConvMicrokernelTester()
14026 .cr(2)
14027 .kr(25)
14028 .channels(channels)
14029 .qmax(128)
14030 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14031 }
14032}
14033
14034TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, c_lt_2) {
14035 for (uint32_t channels = 1; channels < 2; channels++) {
14036 DWConvMicrokernelTester()
14037 .cr(2)
14038 .kr(25)
14039 .channels(channels)
14040 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14041 }
14042}
14043
14044TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, c_gt_2) {
14045 for (uint32_t channels = 3; channels < 4; channels++) {
14046 DWConvMicrokernelTester()
14047 .cr(2)
14048 .kr(25)
14049 .channels(channels)
14050 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14051 }
14052}
14053
14054TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, c_gt_2_with_qmin) {
14055 for (uint32_t channels = 3; channels < 4; channels++) {
14056 DWConvMicrokernelTester()
14057 .cr(2)
14058 .kr(25)
14059 .channels(channels)
14060 .qmin(128)
14061 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14062 }
14063}
14064
14065TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, c_gt_2_with_qmax) {
14066 for (uint32_t channels = 3; channels < 4; channels++) {
14067 DWConvMicrokernelTester()
14068 .cr(2)
14069 .kr(25)
14070 .channels(channels)
14071 .qmax(128)
14072 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14073 }
14074}
14075
14076TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, multipixel) {
14077 for (size_t channels = 1; channels <= 10; channels += 1) {
14078 DWConvMicrokernelTester()
14079 .cr(2)
14080 .kr(25)
14081 .channels(channels)
14082 .width(3)
14083 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14084 }
14085}
14086
14087TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, multipixel_with_step) {
14088 for (size_t channels = 1; channels <= 10; channels += 1) {
14089 for (size_t step = 2; step <= 25; step++) {
14090 DWConvMicrokernelTester()
14091 .cr(2)
14092 .kr(25)
14093 .channels(channels)
14094 .width(3)
14095 .step(step)
14096 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14097 }
14098 }
14099}
14100
14101TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, multipixel_with_output_stride) {
14102 for (size_t channels = 1; channels <= 10; channels += 1) {
14103 DWConvMicrokernelTester()
14104 .cr(2)
14105 .kr(25)
14106 .channels(2)
14107 .width(5)
14108 .output_stride(13)
14109 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14110 }
14111}
14112
14113TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, multipixel_with_qmin) {
14114 for (size_t channels = 1; channels <= 10; channels += 1) {
14115 DWConvMicrokernelTester()
14116 .cr(2)
14117 .kr(25)
14118 .channels(channels)
14119 .width(3)
14120 .qmin(128)
14121 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14122 }
14123}
14124
14125TEST(F32_DWCONV_UP2X25__SCALAR_ACC2, multipixel_with_qmax) {
14126 for (size_t channels = 1; channels <= 10; channels += 1) {
14127 DWConvMicrokernelTester()
14128 .cr(2)
14129 .kr(25)
14130 .channels(channels)
14131 .width(3)
14132 .qmax(128)
14133 .Test(xnn_f32_dwconv_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
14134 }
14135}