blob: 37a68ce5bfd001490a09798b2e2653851c5d1440 [file] [log] [blame]
Marat Dukhanf62bbdc2020-08-04 13:59:04 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qs8-dwconv-minmax.yaml
11// Generator: tools/generate-dwconv-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/dwconv.h>
20#include "dwconv-microkernel-tester.h"
21
22
Marat Dukhan023bcf92020-08-10 12:40:50 -070023#if XNN_ARCH_ARM || XNN_ARCH_ARM64
24 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_eq_8) {
25 TEST_REQUIRES_ARM_NEON;
26 DWConvMicrokernelTester()
27 .cr(8)
28 .kr(9)
29 .channels(8)
30 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
31 }
32
33 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_div_8) {
34 TEST_REQUIRES_ARM_NEON;
35 for (uint32_t channels = 16; channels < 128; channels += 24) {
36 DWConvMicrokernelTester()
37 .cr(8)
38 .kr(9)
39 .channels(channels)
40 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
41 }
42 }
43
44 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
45 TEST_REQUIRES_ARM_NEON;
46 for (uint32_t channels = 16; channels < 128; channels += 24) {
47 DWConvMicrokernelTester()
48 .cr(8)
49 .kr(9)
50 .channels(channels)
51 .qmin(128)
52 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
53 }
54 }
55
56 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
57 TEST_REQUIRES_ARM_NEON;
58 for (uint32_t channels = 16; channels < 128; channels += 24) {
59 DWConvMicrokernelTester()
60 .cr(8)
61 .kr(9)
62 .channels(channels)
63 .qmax(128)
64 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
65 }
66 }
67
68 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_lt_8) {
69 TEST_REQUIRES_ARM_NEON;
70 for (uint32_t channels = 1; channels < 8; channels++) {
71 DWConvMicrokernelTester()
72 .cr(8)
73 .kr(9)
74 .channels(channels)
75 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
76 }
77 }
78
79 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_gt_8) {
80 TEST_REQUIRES_ARM_NEON;
81 for (uint32_t channels = 9; channels < 16; channels++) {
82 DWConvMicrokernelTester()
83 .cr(8)
84 .kr(9)
85 .channels(channels)
86 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
87 }
88 }
89
90 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
91 TEST_REQUIRES_ARM_NEON;
92 for (uint32_t channels = 9; channels < 16; channels++) {
93 DWConvMicrokernelTester()
94 .cr(8)
95 .kr(9)
96 .channels(channels)
97 .qmin(128)
98 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
99 }
100 }
101
102 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t channels = 9; channels < 16; channels++) {
105 DWConvMicrokernelTester()
106 .cr(8)
107 .kr(9)
108 .channels(channels)
109 .qmax(128)
110 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
111 }
112 }
113
114 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t channels = 1; channels <= 40; channels += 7) {
117 DWConvMicrokernelTester()
118 .cr(8)
119 .kr(9)
120 .channels(channels)
121 .width(3)
122 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
123 }
124 }
125
126 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_step) {
127 TEST_REQUIRES_ARM_NEON;
128 for (size_t channels = 1; channels <= 40; channels += 7) {
129 for (size_t step = 2; step <= 9; step++) {
130 DWConvMicrokernelTester()
131 .cr(8)
132 .kr(9)
133 .channels(channels)
134 .width(3)
135 .step(step)
136 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
137 }
138 }
139 }
140
141 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
142 TEST_REQUIRES_ARM_NEON;
143 for (size_t channels = 1; channels <= 40; channels += 7) {
144 DWConvMicrokernelTester()
145 .cr(8)
146 .kr(9)
147 .channels(8)
148 .width(5)
149 .output_stride(43)
150 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
151 }
152 }
153
154 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_qmin) {
155 TEST_REQUIRES_ARM_NEON;
156 for (size_t channels = 1; channels <= 40; channels += 7) {
157 DWConvMicrokernelTester()
158 .cr(8)
159 .kr(9)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
163 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
164 }
165 }
166
167 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_qmax) {
168 TEST_REQUIRES_ARM_NEON;
169 for (size_t channels = 1; channels <= 40; channels += 7) {
170 DWConvMicrokernelTester()
171 .cr(8)
172 .kr(9)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
176 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
177 }
178 }
179
180 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, input_offset) {
181 TEST_REQUIRES_ARM_NEON;
182 for (uint32_t channels = 16; channels < 128; channels += 24) {
183 DWConvMicrokernelTester()
184 .cr(8)
185 .kr(9)
186 .channels(channels)
187 .input_offset(176)
188 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
189 }
190 }
191
192 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, zero) {
193 TEST_REQUIRES_ARM_NEON;
194 for (uint32_t mz = 0; mz < 9; mz++) {
195 for (uint32_t channels = 16; channels < 128; channels += 24) {
196 DWConvMicrokernelTester()
197 .cr(8)
198 .kr(9)
199 .channels(channels)
200 .input_offset(176)
201 .zero_index(mz)
202 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
203 }
204 }
205 }
206#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
207
208
209#if XNN_ARCH_ARM || XNN_ARCH_ARM64
210 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_eq_16) {
211 TEST_REQUIRES_ARM_NEON;
212 DWConvMicrokernelTester()
213 .cr(16)
214 .kr(9)
215 .channels(16)
216 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
217 }
218
219 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_div_16) {
220 TEST_REQUIRES_ARM_NEON;
221 for (uint32_t channels = 32; channels < 256; channels += 48) {
222 DWConvMicrokernelTester()
223 .cr(16)
224 .kr(9)
225 .channels(channels)
226 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
227 }
228 }
229
230 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
231 TEST_REQUIRES_ARM_NEON;
232 for (uint32_t channels = 32; channels < 256; channels += 48) {
233 DWConvMicrokernelTester()
234 .cr(16)
235 .kr(9)
236 .channels(channels)
237 .qmin(128)
238 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
239 }
240 }
241
242 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
243 TEST_REQUIRES_ARM_NEON;
244 for (uint32_t channels = 32; channels < 256; channels += 48) {
245 DWConvMicrokernelTester()
246 .cr(16)
247 .kr(9)
248 .channels(channels)
249 .qmax(128)
250 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
251 }
252 }
253
254 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_lt_16) {
255 TEST_REQUIRES_ARM_NEON;
256 for (uint32_t channels = 1; channels < 16; channels++) {
257 DWConvMicrokernelTester()
258 .cr(16)
259 .kr(9)
260 .channels(channels)
261 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
262 }
263 }
264
265 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_gt_16) {
266 TEST_REQUIRES_ARM_NEON;
267 for (uint32_t channels = 17; channels < 32; channels++) {
268 DWConvMicrokernelTester()
269 .cr(16)
270 .kr(9)
271 .channels(channels)
272 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
273 }
274 }
275
276 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
277 TEST_REQUIRES_ARM_NEON;
278 for (uint32_t channels = 17; channels < 32; channels++) {
279 DWConvMicrokernelTester()
280 .cr(16)
281 .kr(9)
282 .channels(channels)
283 .qmin(128)
284 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
285 }
286 }
287
288 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
289 TEST_REQUIRES_ARM_NEON;
290 for (uint32_t channels = 17; channels < 32; channels++) {
291 DWConvMicrokernelTester()
292 .cr(16)
293 .kr(9)
294 .channels(channels)
295 .qmax(128)
296 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
297 }
298 }
299
300 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel) {
301 TEST_REQUIRES_ARM_NEON;
302 for (size_t channels = 1; channels <= 80; channels += 15) {
303 DWConvMicrokernelTester()
304 .cr(16)
305 .kr(9)
306 .channels(channels)
307 .width(3)
308 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
309 }
310 }
311
312 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_step) {
313 TEST_REQUIRES_ARM_NEON;
314 for (size_t channels = 1; channels <= 80; channels += 15) {
315 for (size_t step = 2; step <= 9; step++) {
316 DWConvMicrokernelTester()
317 .cr(16)
318 .kr(9)
319 .channels(channels)
320 .width(3)
321 .step(step)
322 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
323 }
324 }
325 }
326
327 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
328 TEST_REQUIRES_ARM_NEON;
329 for (size_t channels = 1; channels <= 80; channels += 15) {
330 DWConvMicrokernelTester()
331 .cr(16)
332 .kr(9)
333 .channels(16)
334 .width(5)
335 .output_stride(83)
336 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
337 }
338 }
339
340 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_qmin) {
341 TEST_REQUIRES_ARM_NEON;
342 for (size_t channels = 1; channels <= 80; channels += 15) {
343 DWConvMicrokernelTester()
344 .cr(16)
345 .kr(9)
346 .channels(channels)
347 .width(3)
348 .qmin(128)
349 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
350 }
351 }
352
353 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_qmax) {
354 TEST_REQUIRES_ARM_NEON;
355 for (size_t channels = 1; channels <= 80; channels += 15) {
356 DWConvMicrokernelTester()
357 .cr(16)
358 .kr(9)
359 .channels(channels)
360 .width(3)
361 .qmax(128)
362 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
363 }
364 }
365
366 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, input_offset) {
367 TEST_REQUIRES_ARM_NEON;
368 for (uint32_t channels = 32; channels < 256; channels += 48) {
369 DWConvMicrokernelTester()
370 .cr(16)
371 .kr(9)
372 .channels(channels)
373 .input_offset(304)
374 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
375 }
376 }
377
378 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, zero) {
379 TEST_REQUIRES_ARM_NEON;
380 for (uint32_t mz = 0; mz < 9; mz++) {
381 for (uint32_t channels = 32; channels < 256; channels += 48) {
382 DWConvMicrokernelTester()
383 .cr(16)
384 .kr(9)
385 .channels(channels)
386 .input_offset(304)
387 .zero_index(mz)
388 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
389 }
390 }
391 }
392#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
393
394
395#if XNN_ARCH_ARM || XNN_ARCH_ARM64
396 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_eq_24) {
397 TEST_REQUIRES_ARM_NEON;
398 DWConvMicrokernelTester()
399 .cr(24)
400 .kr(9)
401 .channels(24)
402 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
403 }
404
405 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_div_24) {
406 TEST_REQUIRES_ARM_NEON;
407 for (uint32_t channels = 48; channels < 384; channels += 72) {
408 DWConvMicrokernelTester()
409 .cr(24)
410 .kr(9)
411 .channels(channels)
412 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
413 }
414 }
415
416 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
417 TEST_REQUIRES_ARM_NEON;
418 for (uint32_t channels = 48; channels < 384; channels += 72) {
419 DWConvMicrokernelTester()
420 .cr(24)
421 .kr(9)
422 .channels(channels)
423 .qmin(128)
424 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
425 }
426 }
427
428 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
429 TEST_REQUIRES_ARM_NEON;
430 for (uint32_t channels = 48; channels < 384; channels += 72) {
431 DWConvMicrokernelTester()
432 .cr(24)
433 .kr(9)
434 .channels(channels)
435 .qmax(128)
436 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
437 }
438 }
439
440 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_lt_24) {
441 TEST_REQUIRES_ARM_NEON;
442 for (uint32_t channels = 1; channels < 24; channels++) {
443 DWConvMicrokernelTester()
444 .cr(24)
445 .kr(9)
446 .channels(channels)
447 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
448 }
449 }
450
451 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_gt_24) {
452 TEST_REQUIRES_ARM_NEON;
453 for (uint32_t channels = 25; channels < 48; channels++) {
454 DWConvMicrokernelTester()
455 .cr(24)
456 .kr(9)
457 .channels(channels)
458 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
459 }
460 }
461
462 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
463 TEST_REQUIRES_ARM_NEON;
464 for (uint32_t channels = 25; channels < 48; channels++) {
465 DWConvMicrokernelTester()
466 .cr(24)
467 .kr(9)
468 .channels(channels)
469 .qmin(128)
470 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
471 }
472 }
473
474 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
475 TEST_REQUIRES_ARM_NEON;
476 for (uint32_t channels = 25; channels < 48; channels++) {
477 DWConvMicrokernelTester()
478 .cr(24)
479 .kr(9)
480 .channels(channels)
481 .qmax(128)
482 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
483 }
484 }
485
486 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel) {
487 TEST_REQUIRES_ARM_NEON;
488 for (size_t channels = 1; channels <= 120; channels += 23) {
489 DWConvMicrokernelTester()
490 .cr(24)
491 .kr(9)
492 .channels(channels)
493 .width(3)
494 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
495 }
496 }
497
498 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_step) {
499 TEST_REQUIRES_ARM_NEON;
500 for (size_t channels = 1; channels <= 120; channels += 23) {
501 for (size_t step = 2; step <= 9; step++) {
502 DWConvMicrokernelTester()
503 .cr(24)
504 .kr(9)
505 .channels(channels)
506 .width(3)
507 .step(step)
508 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
509 }
510 }
511 }
512
513 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
514 TEST_REQUIRES_ARM_NEON;
515 for (size_t channels = 1; channels <= 120; channels += 23) {
516 DWConvMicrokernelTester()
517 .cr(24)
518 .kr(9)
519 .channels(24)
520 .width(5)
521 .output_stride(127)
522 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
523 }
524 }
525
526 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_qmin) {
527 TEST_REQUIRES_ARM_NEON;
528 for (size_t channels = 1; channels <= 120; channels += 23) {
529 DWConvMicrokernelTester()
530 .cr(24)
531 .kr(9)
532 .channels(channels)
533 .width(3)
534 .qmin(128)
535 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
536 }
537 }
538
539 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_qmax) {
540 TEST_REQUIRES_ARM_NEON;
541 for (size_t channels = 1; channels <= 120; channels += 23) {
542 DWConvMicrokernelTester()
543 .cr(24)
544 .kr(9)
545 .channels(channels)
546 .width(3)
547 .qmax(128)
548 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
549 }
550 }
551
552 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, input_offset) {
553 TEST_REQUIRES_ARM_NEON;
554 for (uint32_t channels = 48; channels < 384; channels += 72) {
555 DWConvMicrokernelTester()
556 .cr(24)
557 .kr(9)
558 .channels(channels)
559 .input_offset(464)
560 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
561 }
562 }
563
564 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, zero) {
565 TEST_REQUIRES_ARM_NEON;
566 for (uint32_t mz = 0; mz < 9; mz++) {
567 for (uint32_t channels = 48; channels < 384; channels += 72) {
568 DWConvMicrokernelTester()
569 .cr(24)
570 .kr(9)
571 .channels(channels)
572 .input_offset(464)
573 .zero_index(mz)
574 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
575 }
576 }
577 }
578#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
579
580
581#if XNN_ARCH_ARM || XNN_ARCH_ARM64
582 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_eq_32) {
583 TEST_REQUIRES_ARM_NEON;
584 DWConvMicrokernelTester()
585 .cr(32)
586 .kr(9)
587 .channels(32)
588 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
589 }
590
591 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_div_32) {
592 TEST_REQUIRES_ARM_NEON;
593 for (uint32_t channels = 64; channels < 512; channels += 96) {
594 DWConvMicrokernelTester()
595 .cr(32)
596 .kr(9)
597 .channels(channels)
598 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
599 }
600 }
601
602 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
603 TEST_REQUIRES_ARM_NEON;
604 for (uint32_t channels = 64; channels < 512; channels += 96) {
605 DWConvMicrokernelTester()
606 .cr(32)
607 .kr(9)
608 .channels(channels)
609 .qmin(128)
610 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
611 }
612 }
613
614 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
615 TEST_REQUIRES_ARM_NEON;
616 for (uint32_t channels = 64; channels < 512; channels += 96) {
617 DWConvMicrokernelTester()
618 .cr(32)
619 .kr(9)
620 .channels(channels)
621 .qmax(128)
622 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
623 }
624 }
625
626 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_lt_32) {
627 TEST_REQUIRES_ARM_NEON;
628 for (uint32_t channels = 1; channels < 32; channels++) {
629 DWConvMicrokernelTester()
630 .cr(32)
631 .kr(9)
632 .channels(channels)
633 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
634 }
635 }
636
637 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_gt_32) {
638 TEST_REQUIRES_ARM_NEON;
639 for (uint32_t channels = 33; channels < 64; channels++) {
640 DWConvMicrokernelTester()
641 .cr(32)
642 .kr(9)
643 .channels(channels)
644 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
645 }
646 }
647
648 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
649 TEST_REQUIRES_ARM_NEON;
650 for (uint32_t channels = 33; channels < 64; channels++) {
651 DWConvMicrokernelTester()
652 .cr(32)
653 .kr(9)
654 .channels(channels)
655 .qmin(128)
656 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
657 }
658 }
659
660 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
661 TEST_REQUIRES_ARM_NEON;
662 for (uint32_t channels = 33; channels < 64; channels++) {
663 DWConvMicrokernelTester()
664 .cr(32)
665 .kr(9)
666 .channels(channels)
667 .qmax(128)
668 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
669 }
670 }
671
672 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel) {
673 TEST_REQUIRES_ARM_NEON;
674 for (size_t channels = 1; channels <= 160; channels += 31) {
675 DWConvMicrokernelTester()
676 .cr(32)
677 .kr(9)
678 .channels(channels)
679 .width(3)
680 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
681 }
682 }
683
684 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_step) {
685 TEST_REQUIRES_ARM_NEON;
686 for (size_t channels = 1; channels <= 160; channels += 31) {
687 for (size_t step = 2; step <= 9; step++) {
688 DWConvMicrokernelTester()
689 .cr(32)
690 .kr(9)
691 .channels(channels)
692 .width(3)
693 .step(step)
694 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
695 }
696 }
697 }
698
699 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
700 TEST_REQUIRES_ARM_NEON;
701 for (size_t channels = 1; channels <= 160; channels += 31) {
702 DWConvMicrokernelTester()
703 .cr(32)
704 .kr(9)
705 .channels(32)
706 .width(5)
707 .output_stride(163)
708 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
709 }
710 }
711
712 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_qmin) {
713 TEST_REQUIRES_ARM_NEON;
714 for (size_t channels = 1; channels <= 160; channels += 31) {
715 DWConvMicrokernelTester()
716 .cr(32)
717 .kr(9)
718 .channels(channels)
719 .width(3)
720 .qmin(128)
721 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
722 }
723 }
724
725 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_qmax) {
726 TEST_REQUIRES_ARM_NEON;
727 for (size_t channels = 1; channels <= 160; channels += 31) {
728 DWConvMicrokernelTester()
729 .cr(32)
730 .kr(9)
731 .channels(channels)
732 .width(3)
733 .qmax(128)
734 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
735 }
736 }
737
738 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, input_offset) {
739 TEST_REQUIRES_ARM_NEON;
740 for (uint32_t channels = 64; channels < 512; channels += 96) {
741 DWConvMicrokernelTester()
742 .cr(32)
743 .kr(9)
744 .channels(channels)
745 .input_offset(592)
746 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
747 }
748 }
749
750 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, zero) {
751 TEST_REQUIRES_ARM_NEON;
752 for (uint32_t mz = 0; mz < 9; mz++) {
753 for (uint32_t channels = 64; channels < 512; channels += 96) {
754 DWConvMicrokernelTester()
755 .cr(32)
756 .kr(9)
757 .channels(channels)
758 .input_offset(592)
759 .zero_index(mz)
760 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
761 }
762 }
763 }
764#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
765
766
Marat Dukhanf62bbdc2020-08-04 13:59:04 -0700767#if XNN_ARCH_X86 || XNN_ARCH_X86_64
768 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_eq_8) {
769 TEST_REQUIRES_X86_SSE2;
770 DWConvMicrokernelTester()
771 .cr(8)
772 .kr(9)
773 .channels(8)
774 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
775 }
776
777 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_div_8) {
778 TEST_REQUIRES_X86_SSE2;
779 for (uint32_t channels = 16; channels < 128; channels += 24) {
780 DWConvMicrokernelTester()
781 .cr(8)
782 .kr(9)
783 .channels(channels)
784 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
785 }
786 }
787
788 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
789 TEST_REQUIRES_X86_SSE2;
790 for (uint32_t channels = 16; channels < 128; channels += 24) {
791 DWConvMicrokernelTester()
792 .cr(8)
793 .kr(9)
794 .channels(channels)
795 .qmin(128)
796 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
797 }
798 }
799
800 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
801 TEST_REQUIRES_X86_SSE2;
802 for (uint32_t channels = 16; channels < 128; channels += 24) {
803 DWConvMicrokernelTester()
804 .cr(8)
805 .kr(9)
806 .channels(channels)
807 .qmax(128)
808 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
809 }
810 }
811
812 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_lt_8) {
813 TEST_REQUIRES_X86_SSE2;
814 for (uint32_t channels = 1; channels < 8; channels++) {
815 DWConvMicrokernelTester()
816 .cr(8)
817 .kr(9)
818 .channels(channels)
819 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
820 }
821 }
822
823 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_gt_8) {
824 TEST_REQUIRES_X86_SSE2;
825 for (uint32_t channels = 9; channels < 16; channels++) {
826 DWConvMicrokernelTester()
827 .cr(8)
828 .kr(9)
829 .channels(channels)
830 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
831 }
832 }
833
834 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
835 TEST_REQUIRES_X86_SSE2;
836 for (uint32_t channels = 9; channels < 16; channels++) {
837 DWConvMicrokernelTester()
838 .cr(8)
839 .kr(9)
840 .channels(channels)
841 .qmin(128)
842 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
843 }
844 }
845
846 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
847 TEST_REQUIRES_X86_SSE2;
848 for (uint32_t channels = 9; channels < 16; channels++) {
849 DWConvMicrokernelTester()
850 .cr(8)
851 .kr(9)
852 .channels(channels)
853 .qmax(128)
854 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
855 }
856 }
857
858 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel) {
859 TEST_REQUIRES_X86_SSE2;
860 for (size_t channels = 1; channels <= 40; channels += 7) {
861 DWConvMicrokernelTester()
862 .cr(8)
863 .kr(9)
864 .channels(channels)
865 .width(3)
866 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
867 }
868 }
869
870 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_step) {
871 TEST_REQUIRES_X86_SSE2;
872 for (size_t channels = 1; channels <= 40; channels += 7) {
873 for (size_t step = 2; step <= 9; step++) {
874 DWConvMicrokernelTester()
875 .cr(8)
876 .kr(9)
877 .channels(channels)
878 .width(3)
879 .step(step)
880 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
881 }
882 }
883 }
884
885 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
886 TEST_REQUIRES_X86_SSE2;
887 for (size_t channels = 1; channels <= 40; channels += 7) {
888 DWConvMicrokernelTester()
889 .cr(8)
890 .kr(9)
891 .channels(8)
892 .width(5)
893 .output_stride(43)
894 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
895 }
896 }
897
898 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
899 TEST_REQUIRES_X86_SSE2;
900 for (size_t channels = 1; channels <= 40; channels += 7) {
901 DWConvMicrokernelTester()
902 .cr(8)
903 .kr(9)
904 .channels(channels)
905 .width(3)
906 .qmin(128)
907 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
908 }
909 }
910
911 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
912 TEST_REQUIRES_X86_SSE2;
913 for (size_t channels = 1; channels <= 40; channels += 7) {
914 DWConvMicrokernelTester()
915 .cr(8)
916 .kr(9)
917 .channels(channels)
918 .width(3)
919 .qmax(128)
920 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
921 }
922 }
923
924 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, input_offset) {
925 TEST_REQUIRES_X86_SSE2;
926 for (uint32_t channels = 16; channels < 128; channels += 24) {
927 DWConvMicrokernelTester()
928 .cr(8)
929 .kr(9)
930 .channels(channels)
931 .input_offset(176)
932 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
933 }
934 }
935
936 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, zero) {
937 TEST_REQUIRES_X86_SSE2;
938 for (uint32_t mz = 0; mz < 9; mz++) {
939 for (uint32_t channels = 16; channels < 128; channels += 24) {
940 DWConvMicrokernelTester()
941 .cr(8)
942 .kr(9)
943 .channels(channels)
944 .input_offset(176)
945 .zero_index(mz)
946 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
947 }
948 }
949 }
950#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
951
952
953#if XNN_ARCH_X86 || XNN_ARCH_X86_64
954 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_eq_16) {
955 TEST_REQUIRES_X86_SSE2;
956 DWConvMicrokernelTester()
957 .cr(16)
958 .kr(9)
959 .channels(16)
960 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
961 }
962
963 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_div_16) {
964 TEST_REQUIRES_X86_SSE2;
965 for (uint32_t channels = 32; channels < 256; channels += 48) {
966 DWConvMicrokernelTester()
967 .cr(16)
968 .kr(9)
969 .channels(channels)
970 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
971 }
972 }
973
974 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
975 TEST_REQUIRES_X86_SSE2;
976 for (uint32_t channels = 32; channels < 256; channels += 48) {
977 DWConvMicrokernelTester()
978 .cr(16)
979 .kr(9)
980 .channels(channels)
981 .qmin(128)
982 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
983 }
984 }
985
986 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
987 TEST_REQUIRES_X86_SSE2;
988 for (uint32_t channels = 32; channels < 256; channels += 48) {
989 DWConvMicrokernelTester()
990 .cr(16)
991 .kr(9)
992 .channels(channels)
993 .qmax(128)
994 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
995 }
996 }
997
998 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_lt_16) {
999 TEST_REQUIRES_X86_SSE2;
1000 for (uint32_t channels = 1; channels < 16; channels++) {
1001 DWConvMicrokernelTester()
1002 .cr(16)
1003 .kr(9)
1004 .channels(channels)
1005 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1006 }
1007 }
1008
1009 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_gt_16) {
1010 TEST_REQUIRES_X86_SSE2;
1011 for (uint32_t channels = 17; channels < 32; channels++) {
1012 DWConvMicrokernelTester()
1013 .cr(16)
1014 .kr(9)
1015 .channels(channels)
1016 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1017 }
1018 }
1019
1020 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
1021 TEST_REQUIRES_X86_SSE2;
1022 for (uint32_t channels = 17; channels < 32; channels++) {
1023 DWConvMicrokernelTester()
1024 .cr(16)
1025 .kr(9)
1026 .channels(channels)
1027 .qmin(128)
1028 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1029 }
1030 }
1031
1032 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
1033 TEST_REQUIRES_X86_SSE2;
1034 for (uint32_t channels = 17; channels < 32; channels++) {
1035 DWConvMicrokernelTester()
1036 .cr(16)
1037 .kr(9)
1038 .channels(channels)
1039 .qmax(128)
1040 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1041 }
1042 }
1043
1044 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel) {
1045 TEST_REQUIRES_X86_SSE2;
1046 for (size_t channels = 1; channels <= 80; channels += 15) {
1047 DWConvMicrokernelTester()
1048 .cr(16)
1049 .kr(9)
1050 .channels(channels)
1051 .width(3)
1052 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1053 }
1054 }
1055
1056 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_step) {
1057 TEST_REQUIRES_X86_SSE2;
1058 for (size_t channels = 1; channels <= 80; channels += 15) {
1059 for (size_t step = 2; step <= 9; step++) {
1060 DWConvMicrokernelTester()
1061 .cr(16)
1062 .kr(9)
1063 .channels(channels)
1064 .width(3)
1065 .step(step)
1066 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1067 }
1068 }
1069 }
1070
1071 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
1072 TEST_REQUIRES_X86_SSE2;
1073 for (size_t channels = 1; channels <= 80; channels += 15) {
1074 DWConvMicrokernelTester()
1075 .cr(16)
1076 .kr(9)
1077 .channels(16)
1078 .width(5)
1079 .output_stride(83)
1080 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1081 }
1082 }
1083
1084 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
1085 TEST_REQUIRES_X86_SSE2;
1086 for (size_t channels = 1; channels <= 80; channels += 15) {
1087 DWConvMicrokernelTester()
1088 .cr(16)
1089 .kr(9)
1090 .channels(channels)
1091 .width(3)
1092 .qmin(128)
1093 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1094 }
1095 }
1096
1097 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
1098 TEST_REQUIRES_X86_SSE2;
1099 for (size_t channels = 1; channels <= 80; channels += 15) {
1100 DWConvMicrokernelTester()
1101 .cr(16)
1102 .kr(9)
1103 .channels(channels)
1104 .width(3)
1105 .qmax(128)
1106 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1107 }
1108 }
1109
1110 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, input_offset) {
1111 TEST_REQUIRES_X86_SSE2;
1112 for (uint32_t channels = 32; channels < 256; channels += 48) {
1113 DWConvMicrokernelTester()
1114 .cr(16)
1115 .kr(9)
1116 .channels(channels)
1117 .input_offset(304)
1118 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1119 }
1120 }
1121
1122 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, zero) {
1123 TEST_REQUIRES_X86_SSE2;
1124 for (uint32_t mz = 0; mz < 9; mz++) {
1125 for (uint32_t channels = 32; channels < 256; channels += 48) {
1126 DWConvMicrokernelTester()
1127 .cr(16)
1128 .kr(9)
1129 .channels(channels)
1130 .input_offset(304)
1131 .zero_index(mz)
1132 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1133 }
1134 }
1135 }
1136#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1137
1138
1139#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1140 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_eq_24) {
1141 TEST_REQUIRES_X86_SSE2;
1142 DWConvMicrokernelTester()
1143 .cr(24)
1144 .kr(9)
1145 .channels(24)
1146 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1147 }
1148
1149 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_div_24) {
1150 TEST_REQUIRES_X86_SSE2;
1151 for (uint32_t channels = 48; channels < 384; channels += 72) {
1152 DWConvMicrokernelTester()
1153 .cr(24)
1154 .kr(9)
1155 .channels(channels)
1156 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1157 }
1158 }
1159
1160 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_div_24_with_qmin) {
1161 TEST_REQUIRES_X86_SSE2;
1162 for (uint32_t channels = 48; channels < 384; channels += 72) {
1163 DWConvMicrokernelTester()
1164 .cr(24)
1165 .kr(9)
1166 .channels(channels)
1167 .qmin(128)
1168 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1169 }
1170 }
1171
1172 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_div_24_with_qmax) {
1173 TEST_REQUIRES_X86_SSE2;
1174 for (uint32_t channels = 48; channels < 384; channels += 72) {
1175 DWConvMicrokernelTester()
1176 .cr(24)
1177 .kr(9)
1178 .channels(channels)
1179 .qmax(128)
1180 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1181 }
1182 }
1183
1184 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_lt_24) {
1185 TEST_REQUIRES_X86_SSE2;
1186 for (uint32_t channels = 1; channels < 24; channels++) {
1187 DWConvMicrokernelTester()
1188 .cr(24)
1189 .kr(9)
1190 .channels(channels)
1191 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1192 }
1193 }
1194
1195 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_gt_24) {
1196 TEST_REQUIRES_X86_SSE2;
1197 for (uint32_t channels = 25; channels < 48; channels++) {
1198 DWConvMicrokernelTester()
1199 .cr(24)
1200 .kr(9)
1201 .channels(channels)
1202 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1203 }
1204 }
1205
1206 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_gt_24_with_qmin) {
1207 TEST_REQUIRES_X86_SSE2;
1208 for (uint32_t channels = 25; channels < 48; channels++) {
1209 DWConvMicrokernelTester()
1210 .cr(24)
1211 .kr(9)
1212 .channels(channels)
1213 .qmin(128)
1214 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1215 }
1216 }
1217
1218 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_gt_24_with_qmax) {
1219 TEST_REQUIRES_X86_SSE2;
1220 for (uint32_t channels = 25; channels < 48; channels++) {
1221 DWConvMicrokernelTester()
1222 .cr(24)
1223 .kr(9)
1224 .channels(channels)
1225 .qmax(128)
1226 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1227 }
1228 }
1229
1230 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel) {
1231 TEST_REQUIRES_X86_SSE2;
1232 for (size_t channels = 1; channels <= 120; channels += 23) {
1233 DWConvMicrokernelTester()
1234 .cr(24)
1235 .kr(9)
1236 .channels(channels)
1237 .width(3)
1238 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1239 }
1240 }
1241
1242 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_step) {
1243 TEST_REQUIRES_X86_SSE2;
1244 for (size_t channels = 1; channels <= 120; channels += 23) {
1245 for (size_t step = 2; step <= 9; step++) {
1246 DWConvMicrokernelTester()
1247 .cr(24)
1248 .kr(9)
1249 .channels(channels)
1250 .width(3)
1251 .step(step)
1252 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1253 }
1254 }
1255 }
1256
1257 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_output_stride) {
1258 TEST_REQUIRES_X86_SSE2;
1259 for (size_t channels = 1; channels <= 120; channels += 23) {
1260 DWConvMicrokernelTester()
1261 .cr(24)
1262 .kr(9)
1263 .channels(24)
1264 .width(5)
1265 .output_stride(127)
1266 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1267 }
1268 }
1269
1270 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_qmin) {
1271 TEST_REQUIRES_X86_SSE2;
1272 for (size_t channels = 1; channels <= 120; channels += 23) {
1273 DWConvMicrokernelTester()
1274 .cr(24)
1275 .kr(9)
1276 .channels(channels)
1277 .width(3)
1278 .qmin(128)
1279 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1280 }
1281 }
1282
1283 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_qmax) {
1284 TEST_REQUIRES_X86_SSE2;
1285 for (size_t channels = 1; channels <= 120; channels += 23) {
1286 DWConvMicrokernelTester()
1287 .cr(24)
1288 .kr(9)
1289 .channels(channels)
1290 .width(3)
1291 .qmax(128)
1292 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1293 }
1294 }
1295
1296 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, input_offset) {
1297 TEST_REQUIRES_X86_SSE2;
1298 for (uint32_t channels = 48; channels < 384; channels += 72) {
1299 DWConvMicrokernelTester()
1300 .cr(24)
1301 .kr(9)
1302 .channels(channels)
1303 .input_offset(464)
1304 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1305 }
1306 }
1307
1308 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, zero) {
1309 TEST_REQUIRES_X86_SSE2;
1310 for (uint32_t mz = 0; mz < 9; mz++) {
1311 for (uint32_t channels = 48; channels < 384; channels += 72) {
1312 DWConvMicrokernelTester()
1313 .cr(24)
1314 .kr(9)
1315 .channels(channels)
1316 .input_offset(464)
1317 .zero_index(mz)
1318 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1319 }
1320 }
1321 }
1322#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1323
1324
1325#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1326 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_eq_8) {
1327 TEST_REQUIRES_X86_SSSE3;
1328 DWConvMicrokernelTester()
1329 .cr(8)
1330 .kr(9)
1331 .channels(8)
1332 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1333 }
1334
1335 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_div_8) {
1336 TEST_REQUIRES_X86_SSSE3;
1337 for (uint32_t channels = 16; channels < 128; channels += 24) {
1338 DWConvMicrokernelTester()
1339 .cr(8)
1340 .kr(9)
1341 .channels(channels)
1342 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1343 }
1344 }
1345
1346 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_div_8_with_qmin) {
1347 TEST_REQUIRES_X86_SSSE3;
1348 for (uint32_t channels = 16; channels < 128; channels += 24) {
1349 DWConvMicrokernelTester()
1350 .cr(8)
1351 .kr(9)
1352 .channels(channels)
1353 .qmin(128)
1354 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1355 }
1356 }
1357
1358 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_div_8_with_qmax) {
1359 TEST_REQUIRES_X86_SSSE3;
1360 for (uint32_t channels = 16; channels < 128; channels += 24) {
1361 DWConvMicrokernelTester()
1362 .cr(8)
1363 .kr(9)
1364 .channels(channels)
1365 .qmax(128)
1366 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1367 }
1368 }
1369
1370 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_lt_8) {
1371 TEST_REQUIRES_X86_SSSE3;
1372 for (uint32_t channels = 1; channels < 8; channels++) {
1373 DWConvMicrokernelTester()
1374 .cr(8)
1375 .kr(9)
1376 .channels(channels)
1377 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1378 }
1379 }
1380
1381 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_gt_8) {
1382 TEST_REQUIRES_X86_SSSE3;
1383 for (uint32_t channels = 9; channels < 16; channels++) {
1384 DWConvMicrokernelTester()
1385 .cr(8)
1386 .kr(9)
1387 .channels(channels)
1388 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1389 }
1390 }
1391
1392 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_gt_8_with_qmin) {
1393 TEST_REQUIRES_X86_SSSE3;
1394 for (uint32_t channels = 9; channels < 16; channels++) {
1395 DWConvMicrokernelTester()
1396 .cr(8)
1397 .kr(9)
1398 .channels(channels)
1399 .qmin(128)
1400 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1401 }
1402 }
1403
1404 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_gt_8_with_qmax) {
1405 TEST_REQUIRES_X86_SSSE3;
1406 for (uint32_t channels = 9; channels < 16; channels++) {
1407 DWConvMicrokernelTester()
1408 .cr(8)
1409 .kr(9)
1410 .channels(channels)
1411 .qmax(128)
1412 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1413 }
1414 }
1415
1416 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel) {
1417 TEST_REQUIRES_X86_SSSE3;
1418 for (size_t channels = 1; channels <= 40; channels += 7) {
1419 DWConvMicrokernelTester()
1420 .cr(8)
1421 .kr(9)
1422 .channels(channels)
1423 .width(3)
1424 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1425 }
1426 }
1427
1428 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_step) {
1429 TEST_REQUIRES_X86_SSSE3;
1430 for (size_t channels = 1; channels <= 40; channels += 7) {
1431 for (size_t step = 2; step <= 9; step++) {
1432 DWConvMicrokernelTester()
1433 .cr(8)
1434 .kr(9)
1435 .channels(channels)
1436 .width(3)
1437 .step(step)
1438 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1439 }
1440 }
1441 }
1442
1443 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_output_stride) {
1444 TEST_REQUIRES_X86_SSSE3;
1445 for (size_t channels = 1; channels <= 40; channels += 7) {
1446 DWConvMicrokernelTester()
1447 .cr(8)
1448 .kr(9)
1449 .channels(8)
1450 .width(5)
1451 .output_stride(43)
1452 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1453 }
1454 }
1455
1456 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_qmin) {
1457 TEST_REQUIRES_X86_SSSE3;
1458 for (size_t channels = 1; channels <= 40; channels += 7) {
1459 DWConvMicrokernelTester()
1460 .cr(8)
1461 .kr(9)
1462 .channels(channels)
1463 .width(3)
1464 .qmin(128)
1465 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1466 }
1467 }
1468
1469 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_qmax) {
1470 TEST_REQUIRES_X86_SSSE3;
1471 for (size_t channels = 1; channels <= 40; channels += 7) {
1472 DWConvMicrokernelTester()
1473 .cr(8)
1474 .kr(9)
1475 .channels(channels)
1476 .width(3)
1477 .qmax(128)
1478 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1479 }
1480 }
1481
1482 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, input_offset) {
1483 TEST_REQUIRES_X86_SSSE3;
1484 for (uint32_t channels = 16; channels < 128; channels += 24) {
1485 DWConvMicrokernelTester()
1486 .cr(8)
1487 .kr(9)
1488 .channels(channels)
1489 .input_offset(176)
1490 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1491 }
1492 }
1493
1494 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, zero) {
1495 TEST_REQUIRES_X86_SSSE3;
1496 for (uint32_t mz = 0; mz < 9; mz++) {
1497 for (uint32_t channels = 16; channels < 128; channels += 24) {
1498 DWConvMicrokernelTester()
1499 .cr(8)
1500 .kr(9)
1501 .channels(channels)
1502 .input_offset(176)
1503 .zero_index(mz)
1504 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1505 }
1506 }
1507 }
1508#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1509
1510
1511#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1512 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_eq_16) {
1513 TEST_REQUIRES_X86_SSSE3;
1514 DWConvMicrokernelTester()
1515 .cr(16)
1516 .kr(9)
1517 .channels(16)
1518 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1519 }
1520
1521 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_div_16) {
1522 TEST_REQUIRES_X86_SSSE3;
1523 for (uint32_t channels = 32; channels < 256; channels += 48) {
1524 DWConvMicrokernelTester()
1525 .cr(16)
1526 .kr(9)
1527 .channels(channels)
1528 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1529 }
1530 }
1531
1532 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_div_16_with_qmin) {
1533 TEST_REQUIRES_X86_SSSE3;
1534 for (uint32_t channels = 32; channels < 256; channels += 48) {
1535 DWConvMicrokernelTester()
1536 .cr(16)
1537 .kr(9)
1538 .channels(channels)
1539 .qmin(128)
1540 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1541 }
1542 }
1543
1544 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_div_16_with_qmax) {
1545 TEST_REQUIRES_X86_SSSE3;
1546 for (uint32_t channels = 32; channels < 256; channels += 48) {
1547 DWConvMicrokernelTester()
1548 .cr(16)
1549 .kr(9)
1550 .channels(channels)
1551 .qmax(128)
1552 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1553 }
1554 }
1555
1556 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_lt_16) {
1557 TEST_REQUIRES_X86_SSSE3;
1558 for (uint32_t channels = 1; channels < 16; channels++) {
1559 DWConvMicrokernelTester()
1560 .cr(16)
1561 .kr(9)
1562 .channels(channels)
1563 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1564 }
1565 }
1566
1567 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_gt_16) {
1568 TEST_REQUIRES_X86_SSSE3;
1569 for (uint32_t channels = 17; channels < 32; channels++) {
1570 DWConvMicrokernelTester()
1571 .cr(16)
1572 .kr(9)
1573 .channels(channels)
1574 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1575 }
1576 }
1577
1578 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_gt_16_with_qmin) {
1579 TEST_REQUIRES_X86_SSSE3;
1580 for (uint32_t channels = 17; channels < 32; channels++) {
1581 DWConvMicrokernelTester()
1582 .cr(16)
1583 .kr(9)
1584 .channels(channels)
1585 .qmin(128)
1586 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1587 }
1588 }
1589
1590 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_gt_16_with_qmax) {
1591 TEST_REQUIRES_X86_SSSE3;
1592 for (uint32_t channels = 17; channels < 32; channels++) {
1593 DWConvMicrokernelTester()
1594 .cr(16)
1595 .kr(9)
1596 .channels(channels)
1597 .qmax(128)
1598 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1599 }
1600 }
1601
1602 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel) {
1603 TEST_REQUIRES_X86_SSSE3;
1604 for (size_t channels = 1; channels <= 80; channels += 15) {
1605 DWConvMicrokernelTester()
1606 .cr(16)
1607 .kr(9)
1608 .channels(channels)
1609 .width(3)
1610 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1611 }
1612 }
1613
1614 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_step) {
1615 TEST_REQUIRES_X86_SSSE3;
1616 for (size_t channels = 1; channels <= 80; channels += 15) {
1617 for (size_t step = 2; step <= 9; step++) {
1618 DWConvMicrokernelTester()
1619 .cr(16)
1620 .kr(9)
1621 .channels(channels)
1622 .width(3)
1623 .step(step)
1624 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1625 }
1626 }
1627 }
1628
1629 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_output_stride) {
1630 TEST_REQUIRES_X86_SSSE3;
1631 for (size_t channels = 1; channels <= 80; channels += 15) {
1632 DWConvMicrokernelTester()
1633 .cr(16)
1634 .kr(9)
1635 .channels(16)
1636 .width(5)
1637 .output_stride(83)
1638 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1639 }
1640 }
1641
1642 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_qmin) {
1643 TEST_REQUIRES_X86_SSSE3;
1644 for (size_t channels = 1; channels <= 80; channels += 15) {
1645 DWConvMicrokernelTester()
1646 .cr(16)
1647 .kr(9)
1648 .channels(channels)
1649 .width(3)
1650 .qmin(128)
1651 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1652 }
1653 }
1654
1655 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_qmax) {
1656 TEST_REQUIRES_X86_SSSE3;
1657 for (size_t channels = 1; channels <= 80; channels += 15) {
1658 DWConvMicrokernelTester()
1659 .cr(16)
1660 .kr(9)
1661 .channels(channels)
1662 .width(3)
1663 .qmax(128)
1664 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1665 }
1666 }
1667
1668 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, input_offset) {
1669 TEST_REQUIRES_X86_SSSE3;
1670 for (uint32_t channels = 32; channels < 256; channels += 48) {
1671 DWConvMicrokernelTester()
1672 .cr(16)
1673 .kr(9)
1674 .channels(channels)
1675 .input_offset(304)
1676 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1677 }
1678 }
1679
1680 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, zero) {
1681 TEST_REQUIRES_X86_SSSE3;
1682 for (uint32_t mz = 0; mz < 9; mz++) {
1683 for (uint32_t channels = 32; channels < 256; channels += 48) {
1684 DWConvMicrokernelTester()
1685 .cr(16)
1686 .kr(9)
1687 .channels(channels)
1688 .input_offset(304)
1689 .zero_index(mz)
1690 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1691 }
1692 }
1693 }
1694#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1695
1696
1697#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1698 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_eq_24) {
1699 TEST_REQUIRES_X86_SSSE3;
1700 DWConvMicrokernelTester()
1701 .cr(24)
1702 .kr(9)
1703 .channels(24)
1704 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1705 }
1706
1707 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_div_24) {
1708 TEST_REQUIRES_X86_SSSE3;
1709 for (uint32_t channels = 48; channels < 384; channels += 72) {
1710 DWConvMicrokernelTester()
1711 .cr(24)
1712 .kr(9)
1713 .channels(channels)
1714 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1715 }
1716 }
1717
1718 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_div_24_with_qmin) {
1719 TEST_REQUIRES_X86_SSSE3;
1720 for (uint32_t channels = 48; channels < 384; channels += 72) {
1721 DWConvMicrokernelTester()
1722 .cr(24)
1723 .kr(9)
1724 .channels(channels)
1725 .qmin(128)
1726 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1727 }
1728 }
1729
1730 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_div_24_with_qmax) {
1731 TEST_REQUIRES_X86_SSSE3;
1732 for (uint32_t channels = 48; channels < 384; channels += 72) {
1733 DWConvMicrokernelTester()
1734 .cr(24)
1735 .kr(9)
1736 .channels(channels)
1737 .qmax(128)
1738 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1739 }
1740 }
1741
1742 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_lt_24) {
1743 TEST_REQUIRES_X86_SSSE3;
1744 for (uint32_t channels = 1; channels < 24; channels++) {
1745 DWConvMicrokernelTester()
1746 .cr(24)
1747 .kr(9)
1748 .channels(channels)
1749 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1750 }
1751 }
1752
1753 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_gt_24) {
1754 TEST_REQUIRES_X86_SSSE3;
1755 for (uint32_t channels = 25; channels < 48; channels++) {
1756 DWConvMicrokernelTester()
1757 .cr(24)
1758 .kr(9)
1759 .channels(channels)
1760 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1761 }
1762 }
1763
1764 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_gt_24_with_qmin) {
1765 TEST_REQUIRES_X86_SSSE3;
1766 for (uint32_t channels = 25; channels < 48; channels++) {
1767 DWConvMicrokernelTester()
1768 .cr(24)
1769 .kr(9)
1770 .channels(channels)
1771 .qmin(128)
1772 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1773 }
1774 }
1775
1776 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_gt_24_with_qmax) {
1777 TEST_REQUIRES_X86_SSSE3;
1778 for (uint32_t channels = 25; channels < 48; channels++) {
1779 DWConvMicrokernelTester()
1780 .cr(24)
1781 .kr(9)
1782 .channels(channels)
1783 .qmax(128)
1784 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1785 }
1786 }
1787
1788 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel) {
1789 TEST_REQUIRES_X86_SSSE3;
1790 for (size_t channels = 1; channels <= 120; channels += 23) {
1791 DWConvMicrokernelTester()
1792 .cr(24)
1793 .kr(9)
1794 .channels(channels)
1795 .width(3)
1796 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1797 }
1798 }
1799
1800 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_step) {
1801 TEST_REQUIRES_X86_SSSE3;
1802 for (size_t channels = 1; channels <= 120; channels += 23) {
1803 for (size_t step = 2; step <= 9; step++) {
1804 DWConvMicrokernelTester()
1805 .cr(24)
1806 .kr(9)
1807 .channels(channels)
1808 .width(3)
1809 .step(step)
1810 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1811 }
1812 }
1813 }
1814
1815 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_output_stride) {
1816 TEST_REQUIRES_X86_SSSE3;
1817 for (size_t channels = 1; channels <= 120; channels += 23) {
1818 DWConvMicrokernelTester()
1819 .cr(24)
1820 .kr(9)
1821 .channels(24)
1822 .width(5)
1823 .output_stride(127)
1824 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1825 }
1826 }
1827
1828 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_qmin) {
1829 TEST_REQUIRES_X86_SSSE3;
1830 for (size_t channels = 1; channels <= 120; channels += 23) {
1831 DWConvMicrokernelTester()
1832 .cr(24)
1833 .kr(9)
1834 .channels(channels)
1835 .width(3)
1836 .qmin(128)
1837 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1838 }
1839 }
1840
1841 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_qmax) {
1842 TEST_REQUIRES_X86_SSSE3;
1843 for (size_t channels = 1; channels <= 120; channels += 23) {
1844 DWConvMicrokernelTester()
1845 .cr(24)
1846 .kr(9)
1847 .channels(channels)
1848 .width(3)
1849 .qmax(128)
1850 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1851 }
1852 }
1853
1854 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, input_offset) {
1855 TEST_REQUIRES_X86_SSSE3;
1856 for (uint32_t channels = 48; channels < 384; channels += 72) {
1857 DWConvMicrokernelTester()
1858 .cr(24)
1859 .kr(9)
1860 .channels(channels)
1861 .input_offset(464)
1862 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1863 }
1864 }
1865
1866 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, zero) {
1867 TEST_REQUIRES_X86_SSSE3;
1868 for (uint32_t mz = 0; mz < 9; mz++) {
1869 for (uint32_t channels = 48; channels < 384; channels += 72) {
1870 DWConvMicrokernelTester()
1871 .cr(24)
1872 .kr(9)
1873 .channels(channels)
1874 .input_offset(464)
1875 .zero_index(mz)
1876 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1877 }
1878 }
1879 }
1880#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1881
1882
1883#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1884 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_eq_8) {
1885 TEST_REQUIRES_X86_SSE41;
1886 DWConvMicrokernelTester()
1887 .cr(8)
1888 .kr(9)
1889 .channels(8)
1890 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1891 }
1892
1893 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_div_8) {
1894 TEST_REQUIRES_X86_SSE41;
1895 for (uint32_t channels = 16; channels < 128; channels += 24) {
1896 DWConvMicrokernelTester()
1897 .cr(8)
1898 .kr(9)
1899 .channels(channels)
1900 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1901 }
1902 }
1903
1904 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
1905 TEST_REQUIRES_X86_SSE41;
1906 for (uint32_t channels = 16; channels < 128; channels += 24) {
1907 DWConvMicrokernelTester()
1908 .cr(8)
1909 .kr(9)
1910 .channels(channels)
1911 .qmin(128)
1912 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1913 }
1914 }
1915
1916 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
1917 TEST_REQUIRES_X86_SSE41;
1918 for (uint32_t channels = 16; channels < 128; channels += 24) {
1919 DWConvMicrokernelTester()
1920 .cr(8)
1921 .kr(9)
1922 .channels(channels)
1923 .qmax(128)
1924 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1925 }
1926 }
1927
1928 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_lt_8) {
1929 TEST_REQUIRES_X86_SSE41;
1930 for (uint32_t channels = 1; channels < 8; channels++) {
1931 DWConvMicrokernelTester()
1932 .cr(8)
1933 .kr(9)
1934 .channels(channels)
1935 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1936 }
1937 }
1938
1939 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_gt_8) {
1940 TEST_REQUIRES_X86_SSE41;
1941 for (uint32_t channels = 9; channels < 16; channels++) {
1942 DWConvMicrokernelTester()
1943 .cr(8)
1944 .kr(9)
1945 .channels(channels)
1946 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1947 }
1948 }
1949
1950 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
1951 TEST_REQUIRES_X86_SSE41;
1952 for (uint32_t channels = 9; channels < 16; channels++) {
1953 DWConvMicrokernelTester()
1954 .cr(8)
1955 .kr(9)
1956 .channels(channels)
1957 .qmin(128)
1958 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1959 }
1960 }
1961
1962 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
1963 TEST_REQUIRES_X86_SSE41;
1964 for (uint32_t channels = 9; channels < 16; channels++) {
1965 DWConvMicrokernelTester()
1966 .cr(8)
1967 .kr(9)
1968 .channels(channels)
1969 .qmax(128)
1970 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1971 }
1972 }
1973
1974 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel) {
1975 TEST_REQUIRES_X86_SSE41;
1976 for (size_t channels = 1; channels <= 40; channels += 7) {
1977 DWConvMicrokernelTester()
1978 .cr(8)
1979 .kr(9)
1980 .channels(channels)
1981 .width(3)
1982 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1983 }
1984 }
1985
1986 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_step) {
1987 TEST_REQUIRES_X86_SSE41;
1988 for (size_t channels = 1; channels <= 40; channels += 7) {
1989 for (size_t step = 2; step <= 9; step++) {
1990 DWConvMicrokernelTester()
1991 .cr(8)
1992 .kr(9)
1993 .channels(channels)
1994 .width(3)
1995 .step(step)
1996 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1997 }
1998 }
1999 }
2000
2001 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
2002 TEST_REQUIRES_X86_SSE41;
2003 for (size_t channels = 1; channels <= 40; channels += 7) {
2004 DWConvMicrokernelTester()
2005 .cr(8)
2006 .kr(9)
2007 .channels(8)
2008 .width(5)
2009 .output_stride(43)
2010 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2011 }
2012 }
2013
2014 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
2015 TEST_REQUIRES_X86_SSE41;
2016 for (size_t channels = 1; channels <= 40; channels += 7) {
2017 DWConvMicrokernelTester()
2018 .cr(8)
2019 .kr(9)
2020 .channels(channels)
2021 .width(3)
2022 .qmin(128)
2023 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2024 }
2025 }
2026
2027 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
2028 TEST_REQUIRES_X86_SSE41;
2029 for (size_t channels = 1; channels <= 40; channels += 7) {
2030 DWConvMicrokernelTester()
2031 .cr(8)
2032 .kr(9)
2033 .channels(channels)
2034 .width(3)
2035 .qmax(128)
2036 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2037 }
2038 }
2039
2040 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, input_offset) {
2041 TEST_REQUIRES_X86_SSE41;
2042 for (uint32_t channels = 16; channels < 128; channels += 24) {
2043 DWConvMicrokernelTester()
2044 .cr(8)
2045 .kr(9)
2046 .channels(channels)
2047 .input_offset(176)
2048 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2049 }
2050 }
2051
2052 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, zero) {
2053 TEST_REQUIRES_X86_SSE41;
2054 for (uint32_t mz = 0; mz < 9; mz++) {
2055 for (uint32_t channels = 16; channels < 128; channels += 24) {
2056 DWConvMicrokernelTester()
2057 .cr(8)
2058 .kr(9)
2059 .channels(channels)
2060 .input_offset(176)
2061 .zero_index(mz)
2062 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2063 }
2064 }
2065 }
2066#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2067
2068
2069#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2070 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_eq_16) {
2071 TEST_REQUIRES_X86_SSE41;
2072 DWConvMicrokernelTester()
2073 .cr(16)
2074 .kr(9)
2075 .channels(16)
2076 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2077 }
2078
2079 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_div_16) {
2080 TEST_REQUIRES_X86_SSE41;
2081 for (uint32_t channels = 32; channels < 256; channels += 48) {
2082 DWConvMicrokernelTester()
2083 .cr(16)
2084 .kr(9)
2085 .channels(channels)
2086 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2087 }
2088 }
2089
2090 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
2091 TEST_REQUIRES_X86_SSE41;
2092 for (uint32_t channels = 32; channels < 256; channels += 48) {
2093 DWConvMicrokernelTester()
2094 .cr(16)
2095 .kr(9)
2096 .channels(channels)
2097 .qmin(128)
2098 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2099 }
2100 }
2101
2102 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
2103 TEST_REQUIRES_X86_SSE41;
2104 for (uint32_t channels = 32; channels < 256; channels += 48) {
2105 DWConvMicrokernelTester()
2106 .cr(16)
2107 .kr(9)
2108 .channels(channels)
2109 .qmax(128)
2110 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2111 }
2112 }
2113
2114 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_lt_16) {
2115 TEST_REQUIRES_X86_SSE41;
2116 for (uint32_t channels = 1; channels < 16; channels++) {
2117 DWConvMicrokernelTester()
2118 .cr(16)
2119 .kr(9)
2120 .channels(channels)
2121 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2122 }
2123 }
2124
2125 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_gt_16) {
2126 TEST_REQUIRES_X86_SSE41;
2127 for (uint32_t channels = 17; channels < 32; channels++) {
2128 DWConvMicrokernelTester()
2129 .cr(16)
2130 .kr(9)
2131 .channels(channels)
2132 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2133 }
2134 }
2135
2136 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
2137 TEST_REQUIRES_X86_SSE41;
2138 for (uint32_t channels = 17; channels < 32; channels++) {
2139 DWConvMicrokernelTester()
2140 .cr(16)
2141 .kr(9)
2142 .channels(channels)
2143 .qmin(128)
2144 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2145 }
2146 }
2147
2148 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
2149 TEST_REQUIRES_X86_SSE41;
2150 for (uint32_t channels = 17; channels < 32; channels++) {
2151 DWConvMicrokernelTester()
2152 .cr(16)
2153 .kr(9)
2154 .channels(channels)
2155 .qmax(128)
2156 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2157 }
2158 }
2159
2160 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel) {
2161 TEST_REQUIRES_X86_SSE41;
2162 for (size_t channels = 1; channels <= 80; channels += 15) {
2163 DWConvMicrokernelTester()
2164 .cr(16)
2165 .kr(9)
2166 .channels(channels)
2167 .width(3)
2168 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2169 }
2170 }
2171
2172 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_step) {
2173 TEST_REQUIRES_X86_SSE41;
2174 for (size_t channels = 1; channels <= 80; channels += 15) {
2175 for (size_t step = 2; step <= 9; step++) {
2176 DWConvMicrokernelTester()
2177 .cr(16)
2178 .kr(9)
2179 .channels(channels)
2180 .width(3)
2181 .step(step)
2182 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2183 }
2184 }
2185 }
2186
2187 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
2188 TEST_REQUIRES_X86_SSE41;
2189 for (size_t channels = 1; channels <= 80; channels += 15) {
2190 DWConvMicrokernelTester()
2191 .cr(16)
2192 .kr(9)
2193 .channels(16)
2194 .width(5)
2195 .output_stride(83)
2196 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2197 }
2198 }
2199
2200 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
2201 TEST_REQUIRES_X86_SSE41;
2202 for (size_t channels = 1; channels <= 80; channels += 15) {
2203 DWConvMicrokernelTester()
2204 .cr(16)
2205 .kr(9)
2206 .channels(channels)
2207 .width(3)
2208 .qmin(128)
2209 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2210 }
2211 }
2212
2213 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
2214 TEST_REQUIRES_X86_SSE41;
2215 for (size_t channels = 1; channels <= 80; channels += 15) {
2216 DWConvMicrokernelTester()
2217 .cr(16)
2218 .kr(9)
2219 .channels(channels)
2220 .width(3)
2221 .qmax(128)
2222 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2223 }
2224 }
2225
2226 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, input_offset) {
2227 TEST_REQUIRES_X86_SSE41;
2228 for (uint32_t channels = 32; channels < 256; channels += 48) {
2229 DWConvMicrokernelTester()
2230 .cr(16)
2231 .kr(9)
2232 .channels(channels)
2233 .input_offset(304)
2234 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2235 }
2236 }
2237
2238 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, zero) {
2239 TEST_REQUIRES_X86_SSE41;
2240 for (uint32_t mz = 0; mz < 9; mz++) {
2241 for (uint32_t channels = 32; channels < 256; channels += 48) {
2242 DWConvMicrokernelTester()
2243 .cr(16)
2244 .kr(9)
2245 .channels(channels)
2246 .input_offset(304)
2247 .zero_index(mz)
2248 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2249 }
2250 }
2251 }
2252#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2253
2254
2255#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2256 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_eq_24) {
2257 TEST_REQUIRES_X86_SSE41;
2258 DWConvMicrokernelTester()
2259 .cr(24)
2260 .kr(9)
2261 .channels(24)
2262 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2263 }
2264
2265 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_div_24) {
2266 TEST_REQUIRES_X86_SSE41;
2267 for (uint32_t channels = 48; channels < 384; channels += 72) {
2268 DWConvMicrokernelTester()
2269 .cr(24)
2270 .kr(9)
2271 .channels(channels)
2272 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2273 }
2274 }
2275
2276 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_div_24_with_qmin) {
2277 TEST_REQUIRES_X86_SSE41;
2278 for (uint32_t channels = 48; channels < 384; channels += 72) {
2279 DWConvMicrokernelTester()
2280 .cr(24)
2281 .kr(9)
2282 .channels(channels)
2283 .qmin(128)
2284 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2285 }
2286 }
2287
2288 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_div_24_with_qmax) {
2289 TEST_REQUIRES_X86_SSE41;
2290 for (uint32_t channels = 48; channels < 384; channels += 72) {
2291 DWConvMicrokernelTester()
2292 .cr(24)
2293 .kr(9)
2294 .channels(channels)
2295 .qmax(128)
2296 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2297 }
2298 }
2299
2300 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_lt_24) {
2301 TEST_REQUIRES_X86_SSE41;
2302 for (uint32_t channels = 1; channels < 24; channels++) {
2303 DWConvMicrokernelTester()
2304 .cr(24)
2305 .kr(9)
2306 .channels(channels)
2307 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2308 }
2309 }
2310
2311 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_gt_24) {
2312 TEST_REQUIRES_X86_SSE41;
2313 for (uint32_t channels = 25; channels < 48; channels++) {
2314 DWConvMicrokernelTester()
2315 .cr(24)
2316 .kr(9)
2317 .channels(channels)
2318 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2319 }
2320 }
2321
2322 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_gt_24_with_qmin) {
2323 TEST_REQUIRES_X86_SSE41;
2324 for (uint32_t channels = 25; channels < 48; channels++) {
2325 DWConvMicrokernelTester()
2326 .cr(24)
2327 .kr(9)
2328 .channels(channels)
2329 .qmin(128)
2330 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2331 }
2332 }
2333
2334 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_gt_24_with_qmax) {
2335 TEST_REQUIRES_X86_SSE41;
2336 for (uint32_t channels = 25; channels < 48; channels++) {
2337 DWConvMicrokernelTester()
2338 .cr(24)
2339 .kr(9)
2340 .channels(channels)
2341 .qmax(128)
2342 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2343 }
2344 }
2345
2346 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel) {
2347 TEST_REQUIRES_X86_SSE41;
2348 for (size_t channels = 1; channels <= 120; channels += 23) {
2349 DWConvMicrokernelTester()
2350 .cr(24)
2351 .kr(9)
2352 .channels(channels)
2353 .width(3)
2354 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2355 }
2356 }
2357
2358 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_step) {
2359 TEST_REQUIRES_X86_SSE41;
2360 for (size_t channels = 1; channels <= 120; channels += 23) {
2361 for (size_t step = 2; step <= 9; step++) {
2362 DWConvMicrokernelTester()
2363 .cr(24)
2364 .kr(9)
2365 .channels(channels)
2366 .width(3)
2367 .step(step)
2368 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2369 }
2370 }
2371 }
2372
2373 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_output_stride) {
2374 TEST_REQUIRES_X86_SSE41;
2375 for (size_t channels = 1; channels <= 120; channels += 23) {
2376 DWConvMicrokernelTester()
2377 .cr(24)
2378 .kr(9)
2379 .channels(24)
2380 .width(5)
2381 .output_stride(127)
2382 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2383 }
2384 }
2385
2386 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_qmin) {
2387 TEST_REQUIRES_X86_SSE41;
2388 for (size_t channels = 1; channels <= 120; channels += 23) {
2389 DWConvMicrokernelTester()
2390 .cr(24)
2391 .kr(9)
2392 .channels(channels)
2393 .width(3)
2394 .qmin(128)
2395 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2396 }
2397 }
2398
2399 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_qmax) {
2400 TEST_REQUIRES_X86_SSE41;
2401 for (size_t channels = 1; channels <= 120; channels += 23) {
2402 DWConvMicrokernelTester()
2403 .cr(24)
2404 .kr(9)
2405 .channels(channels)
2406 .width(3)
2407 .qmax(128)
2408 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2409 }
2410 }
2411
2412 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, input_offset) {
2413 TEST_REQUIRES_X86_SSE41;
2414 for (uint32_t channels = 48; channels < 384; channels += 72) {
2415 DWConvMicrokernelTester()
2416 .cr(24)
2417 .kr(9)
2418 .channels(channels)
2419 .input_offset(464)
2420 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2421 }
2422 }
2423
2424 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, zero) {
2425 TEST_REQUIRES_X86_SSE41;
2426 for (uint32_t mz = 0; mz < 9; mz++) {
2427 for (uint32_t channels = 48; channels < 384; channels += 72) {
2428 DWConvMicrokernelTester()
2429 .cr(24)
2430 .kr(9)
2431 .channels(channels)
2432 .input_offset(464)
2433 .zero_index(mz)
2434 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2435 }
2436 }
2437 }
2438#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2439
2440
2441#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan40135522020-08-07 01:21:00 -07002442 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_eq_16) {
2443 TEST_REQUIRES_X86_AVX2;
2444 DWConvMicrokernelTester()
2445 .cr(16)
2446 .kr(9)
2447 .channels(16)
2448 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2449 }
2450
2451 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_div_16) {
2452 TEST_REQUIRES_X86_AVX2;
2453 for (uint32_t channels = 32; channels < 256; channels += 48) {
2454 DWConvMicrokernelTester()
2455 .cr(16)
2456 .kr(9)
2457 .channels(channels)
2458 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2459 }
2460 }
2461
2462 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_div_16_with_qmin) {
2463 TEST_REQUIRES_X86_AVX2;
2464 for (uint32_t channels = 32; channels < 256; channels += 48) {
2465 DWConvMicrokernelTester()
2466 .cr(16)
2467 .kr(9)
2468 .channels(channels)
2469 .qmin(128)
2470 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2471 }
2472 }
2473
2474 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_div_16_with_qmax) {
2475 TEST_REQUIRES_X86_AVX2;
2476 for (uint32_t channels = 32; channels < 256; channels += 48) {
2477 DWConvMicrokernelTester()
2478 .cr(16)
2479 .kr(9)
2480 .channels(channels)
2481 .qmax(128)
2482 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2483 }
2484 }
2485
2486 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_lt_16) {
2487 TEST_REQUIRES_X86_AVX2;
2488 for (uint32_t channels = 1; channels < 16; channels++) {
2489 DWConvMicrokernelTester()
2490 .cr(16)
2491 .kr(9)
2492 .channels(channels)
2493 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2494 }
2495 }
2496
2497 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_gt_16) {
2498 TEST_REQUIRES_X86_AVX2;
2499 for (uint32_t channels = 17; channels < 32; channels++) {
2500 DWConvMicrokernelTester()
2501 .cr(16)
2502 .kr(9)
2503 .channels(channels)
2504 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2505 }
2506 }
2507
2508 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_gt_16_with_qmin) {
2509 TEST_REQUIRES_X86_AVX2;
2510 for (uint32_t channels = 17; channels < 32; channels++) {
2511 DWConvMicrokernelTester()
2512 .cr(16)
2513 .kr(9)
2514 .channels(channels)
2515 .qmin(128)
2516 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2517 }
2518 }
2519
2520 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_gt_16_with_qmax) {
2521 TEST_REQUIRES_X86_AVX2;
2522 for (uint32_t channels = 17; channels < 32; channels++) {
2523 DWConvMicrokernelTester()
2524 .cr(16)
2525 .kr(9)
2526 .channels(channels)
2527 .qmax(128)
2528 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2529 }
2530 }
2531
2532 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel) {
2533 TEST_REQUIRES_X86_AVX2;
2534 for (size_t channels = 1; channels <= 80; channels += 15) {
2535 DWConvMicrokernelTester()
2536 .cr(16)
2537 .kr(9)
2538 .channels(channels)
2539 .width(3)
2540 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2541 }
2542 }
2543
2544 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_step) {
2545 TEST_REQUIRES_X86_AVX2;
2546 for (size_t channels = 1; channels <= 80; channels += 15) {
2547 for (size_t step = 2; step <= 9; step++) {
2548 DWConvMicrokernelTester()
2549 .cr(16)
2550 .kr(9)
2551 .channels(channels)
2552 .width(3)
2553 .step(step)
2554 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2555 }
2556 }
2557 }
2558
2559 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_output_stride) {
2560 TEST_REQUIRES_X86_AVX2;
2561 for (size_t channels = 1; channels <= 80; channels += 15) {
2562 DWConvMicrokernelTester()
2563 .cr(16)
2564 .kr(9)
2565 .channels(16)
2566 .width(5)
2567 .output_stride(83)
2568 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2569 }
2570 }
2571
2572 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_qmin) {
2573 TEST_REQUIRES_X86_AVX2;
2574 for (size_t channels = 1; channels <= 80; channels += 15) {
2575 DWConvMicrokernelTester()
2576 .cr(16)
2577 .kr(9)
2578 .channels(channels)
2579 .width(3)
2580 .qmin(128)
2581 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2582 }
2583 }
2584
2585 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_qmax) {
2586 TEST_REQUIRES_X86_AVX2;
2587 for (size_t channels = 1; channels <= 80; channels += 15) {
2588 DWConvMicrokernelTester()
2589 .cr(16)
2590 .kr(9)
2591 .channels(channels)
2592 .width(3)
2593 .qmax(128)
2594 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2595 }
2596 }
2597
2598 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, input_offset) {
2599 TEST_REQUIRES_X86_AVX2;
2600 for (uint32_t channels = 32; channels < 256; channels += 48) {
2601 DWConvMicrokernelTester()
2602 .cr(16)
2603 .kr(9)
2604 .channels(channels)
2605 .input_offset(304)
2606 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2607 }
2608 }
2609
2610 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, zero) {
2611 TEST_REQUIRES_X86_AVX2;
2612 for (uint32_t mz = 0; mz < 9; mz++) {
2613 for (uint32_t channels = 32; channels < 256; channels += 48) {
2614 DWConvMicrokernelTester()
2615 .cr(16)
2616 .kr(9)
2617 .channels(channels)
2618 .input_offset(304)
2619 .zero_index(mz)
2620 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
2621 }
2622 }
2623 }
2624#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2625
2626
2627#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2628 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_eq_32) {
2629 TEST_REQUIRES_X86_AVX2;
2630 DWConvMicrokernelTester()
2631 .cr(32)
2632 .kr(9)
2633 .channels(32)
2634 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2635 }
2636
2637 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_div_32) {
2638 TEST_REQUIRES_X86_AVX2;
2639 for (uint32_t channels = 64; channels < 512; channels += 96) {
2640 DWConvMicrokernelTester()
2641 .cr(32)
2642 .kr(9)
2643 .channels(channels)
2644 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2645 }
2646 }
2647
2648 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_div_32_with_qmin) {
2649 TEST_REQUIRES_X86_AVX2;
2650 for (uint32_t channels = 64; channels < 512; channels += 96) {
2651 DWConvMicrokernelTester()
2652 .cr(32)
2653 .kr(9)
2654 .channels(channels)
2655 .qmin(128)
2656 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2657 }
2658 }
2659
2660 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_div_32_with_qmax) {
2661 TEST_REQUIRES_X86_AVX2;
2662 for (uint32_t channels = 64; channels < 512; channels += 96) {
2663 DWConvMicrokernelTester()
2664 .cr(32)
2665 .kr(9)
2666 .channels(channels)
2667 .qmax(128)
2668 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2669 }
2670 }
2671
2672 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_lt_32) {
2673 TEST_REQUIRES_X86_AVX2;
2674 for (uint32_t channels = 1; channels < 32; channels++) {
2675 DWConvMicrokernelTester()
2676 .cr(32)
2677 .kr(9)
2678 .channels(channels)
2679 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2680 }
2681 }
2682
2683 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_gt_32) {
2684 TEST_REQUIRES_X86_AVX2;
2685 for (uint32_t channels = 33; channels < 64; channels++) {
2686 DWConvMicrokernelTester()
2687 .cr(32)
2688 .kr(9)
2689 .channels(channels)
2690 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2691 }
2692 }
2693
2694 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_gt_32_with_qmin) {
2695 TEST_REQUIRES_X86_AVX2;
2696 for (uint32_t channels = 33; channels < 64; channels++) {
2697 DWConvMicrokernelTester()
2698 .cr(32)
2699 .kr(9)
2700 .channels(channels)
2701 .qmin(128)
2702 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2703 }
2704 }
2705
2706 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_gt_32_with_qmax) {
2707 TEST_REQUIRES_X86_AVX2;
2708 for (uint32_t channels = 33; channels < 64; channels++) {
2709 DWConvMicrokernelTester()
2710 .cr(32)
2711 .kr(9)
2712 .channels(channels)
2713 .qmax(128)
2714 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2715 }
2716 }
2717
2718 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel) {
2719 TEST_REQUIRES_X86_AVX2;
2720 for (size_t channels = 1; channels <= 160; channels += 31) {
2721 DWConvMicrokernelTester()
2722 .cr(32)
2723 .kr(9)
2724 .channels(channels)
2725 .width(3)
2726 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2727 }
2728 }
2729
2730 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_step) {
2731 TEST_REQUIRES_X86_AVX2;
2732 for (size_t channels = 1; channels <= 160; channels += 31) {
2733 for (size_t step = 2; step <= 9; step++) {
2734 DWConvMicrokernelTester()
2735 .cr(32)
2736 .kr(9)
2737 .channels(channels)
2738 .width(3)
2739 .step(step)
2740 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2741 }
2742 }
2743 }
2744
2745 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_output_stride) {
2746 TEST_REQUIRES_X86_AVX2;
2747 for (size_t channels = 1; channels <= 160; channels += 31) {
2748 DWConvMicrokernelTester()
2749 .cr(32)
2750 .kr(9)
2751 .channels(32)
2752 .width(5)
2753 .output_stride(163)
2754 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2755 }
2756 }
2757
2758 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_qmin) {
2759 TEST_REQUIRES_X86_AVX2;
2760 for (size_t channels = 1; channels <= 160; channels += 31) {
2761 DWConvMicrokernelTester()
2762 .cr(32)
2763 .kr(9)
2764 .channels(channels)
2765 .width(3)
2766 .qmin(128)
2767 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2768 }
2769 }
2770
2771 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_qmax) {
2772 TEST_REQUIRES_X86_AVX2;
2773 for (size_t channels = 1; channels <= 160; channels += 31) {
2774 DWConvMicrokernelTester()
2775 .cr(32)
2776 .kr(9)
2777 .channels(channels)
2778 .width(3)
2779 .qmax(128)
2780 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2781 }
2782 }
2783
2784 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, input_offset) {
2785 TEST_REQUIRES_X86_AVX2;
2786 for (uint32_t channels = 64; channels < 512; channels += 96) {
2787 DWConvMicrokernelTester()
2788 .cr(32)
2789 .kr(9)
2790 .channels(channels)
2791 .input_offset(592)
2792 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2793 }
2794 }
2795
2796 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, zero) {
2797 TEST_REQUIRES_X86_AVX2;
2798 for (uint32_t mz = 0; mz < 9; mz++) {
2799 for (uint32_t channels = 64; channels < 512; channels += 96) {
2800 DWConvMicrokernelTester()
2801 .cr(32)
2802 .kr(9)
2803 .channels(channels)
2804 .input_offset(592)
2805 .zero_index(mz)
2806 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
2807 }
2808 }
2809 }
2810#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2811
2812
2813#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhand65a1522020-08-04 19:28:18 -07002814 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_eq_8) {
2815 TEST_REQUIRES_X86_AVX2;
2816 DWConvMicrokernelTester()
2817 .cr(8)
2818 .kr(9)
2819 .channels(8)
2820 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2821 }
2822
2823 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_div_8) {
2824 TEST_REQUIRES_X86_AVX2;
2825 for (uint32_t channels = 16; channels < 128; channels += 24) {
2826 DWConvMicrokernelTester()
2827 .cr(8)
2828 .kr(9)
2829 .channels(channels)
2830 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2831 }
2832 }
2833
2834 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
2835 TEST_REQUIRES_X86_AVX2;
2836 for (uint32_t channels = 16; channels < 128; channels += 24) {
2837 DWConvMicrokernelTester()
2838 .cr(8)
2839 .kr(9)
2840 .channels(channels)
2841 .qmin(128)
2842 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2843 }
2844 }
2845
2846 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
2847 TEST_REQUIRES_X86_AVX2;
2848 for (uint32_t channels = 16; channels < 128; channels += 24) {
2849 DWConvMicrokernelTester()
2850 .cr(8)
2851 .kr(9)
2852 .channels(channels)
2853 .qmax(128)
2854 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2855 }
2856 }
2857
2858 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_lt_8) {
2859 TEST_REQUIRES_X86_AVX2;
2860 for (uint32_t channels = 1; channels < 8; channels++) {
2861 DWConvMicrokernelTester()
2862 .cr(8)
2863 .kr(9)
2864 .channels(channels)
2865 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2866 }
2867 }
2868
2869 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_gt_8) {
2870 TEST_REQUIRES_X86_AVX2;
2871 for (uint32_t channels = 9; channels < 16; channels++) {
2872 DWConvMicrokernelTester()
2873 .cr(8)
2874 .kr(9)
2875 .channels(channels)
2876 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2877 }
2878 }
2879
2880 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
2881 TEST_REQUIRES_X86_AVX2;
2882 for (uint32_t channels = 9; channels < 16; channels++) {
2883 DWConvMicrokernelTester()
2884 .cr(8)
2885 .kr(9)
2886 .channels(channels)
2887 .qmin(128)
2888 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2889 }
2890 }
2891
2892 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
2893 TEST_REQUIRES_X86_AVX2;
2894 for (uint32_t channels = 9; channels < 16; channels++) {
2895 DWConvMicrokernelTester()
2896 .cr(8)
2897 .kr(9)
2898 .channels(channels)
2899 .qmax(128)
2900 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2901 }
2902 }
2903
2904 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel) {
2905 TEST_REQUIRES_X86_AVX2;
2906 for (size_t channels = 1; channels <= 40; channels += 7) {
2907 DWConvMicrokernelTester()
2908 .cr(8)
2909 .kr(9)
2910 .channels(channels)
2911 .width(3)
2912 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2913 }
2914 }
2915
2916 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_step) {
2917 TEST_REQUIRES_X86_AVX2;
2918 for (size_t channels = 1; channels <= 40; channels += 7) {
2919 for (size_t step = 2; step <= 9; step++) {
2920 DWConvMicrokernelTester()
2921 .cr(8)
2922 .kr(9)
2923 .channels(channels)
2924 .width(3)
2925 .step(step)
2926 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2927 }
2928 }
2929 }
2930
2931 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
2932 TEST_REQUIRES_X86_AVX2;
2933 for (size_t channels = 1; channels <= 40; channels += 7) {
2934 DWConvMicrokernelTester()
2935 .cr(8)
2936 .kr(9)
2937 .channels(8)
2938 .width(5)
2939 .output_stride(43)
2940 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2941 }
2942 }
2943
2944 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
2945 TEST_REQUIRES_X86_AVX2;
2946 for (size_t channels = 1; channels <= 40; channels += 7) {
2947 DWConvMicrokernelTester()
2948 .cr(8)
2949 .kr(9)
2950 .channels(channels)
2951 .width(3)
2952 .qmin(128)
2953 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2954 }
2955 }
2956
2957 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
2958 TEST_REQUIRES_X86_AVX2;
2959 for (size_t channels = 1; channels <= 40; channels += 7) {
2960 DWConvMicrokernelTester()
2961 .cr(8)
2962 .kr(9)
2963 .channels(channels)
2964 .width(3)
2965 .qmax(128)
2966 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2967 }
2968 }
2969
2970 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, input_offset) {
2971 TEST_REQUIRES_X86_AVX2;
2972 for (uint32_t channels = 16; channels < 128; channels += 24) {
2973 DWConvMicrokernelTester()
2974 .cr(8)
2975 .kr(9)
2976 .channels(channels)
2977 .input_offset(176)
2978 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2979 }
2980 }
2981
2982 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, zero) {
2983 TEST_REQUIRES_X86_AVX2;
2984 for (uint32_t mz = 0; mz < 9; mz++) {
2985 for (uint32_t channels = 16; channels < 128; channels += 24) {
2986 DWConvMicrokernelTester()
2987 .cr(8)
2988 .kr(9)
2989 .channels(channels)
2990 .input_offset(176)
2991 .zero_index(mz)
2992 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
2993 }
2994 }
2995 }
2996#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2997
2998
2999#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3000 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_eq_16) {
3001 TEST_REQUIRES_X86_AVX2;
3002 DWConvMicrokernelTester()
3003 .cr(16)
3004 .kr(9)
3005 .channels(16)
3006 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3007 }
3008
3009 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_div_16) {
3010 TEST_REQUIRES_X86_AVX2;
3011 for (uint32_t channels = 32; channels < 256; channels += 48) {
3012 DWConvMicrokernelTester()
3013 .cr(16)
3014 .kr(9)
3015 .channels(channels)
3016 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3017 }
3018 }
3019
3020 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
3021 TEST_REQUIRES_X86_AVX2;
3022 for (uint32_t channels = 32; channels < 256; channels += 48) {
3023 DWConvMicrokernelTester()
3024 .cr(16)
3025 .kr(9)
3026 .channels(channels)
3027 .qmin(128)
3028 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3029 }
3030 }
3031
3032 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
3033 TEST_REQUIRES_X86_AVX2;
3034 for (uint32_t channels = 32; channels < 256; channels += 48) {
3035 DWConvMicrokernelTester()
3036 .cr(16)
3037 .kr(9)
3038 .channels(channels)
3039 .qmax(128)
3040 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3041 }
3042 }
3043
3044 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_lt_16) {
3045 TEST_REQUIRES_X86_AVX2;
3046 for (uint32_t channels = 1; channels < 16; channels++) {
3047 DWConvMicrokernelTester()
3048 .cr(16)
3049 .kr(9)
3050 .channels(channels)
3051 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3052 }
3053 }
3054
3055 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_gt_16) {
3056 TEST_REQUIRES_X86_AVX2;
3057 for (uint32_t channels = 17; channels < 32; channels++) {
3058 DWConvMicrokernelTester()
3059 .cr(16)
3060 .kr(9)
3061 .channels(channels)
3062 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3063 }
3064 }
3065
3066 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
3067 TEST_REQUIRES_X86_AVX2;
3068 for (uint32_t channels = 17; channels < 32; channels++) {
3069 DWConvMicrokernelTester()
3070 .cr(16)
3071 .kr(9)
3072 .channels(channels)
3073 .qmin(128)
3074 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3075 }
3076 }
3077
3078 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
3079 TEST_REQUIRES_X86_AVX2;
3080 for (uint32_t channels = 17; channels < 32; channels++) {
3081 DWConvMicrokernelTester()
3082 .cr(16)
3083 .kr(9)
3084 .channels(channels)
3085 .qmax(128)
3086 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3087 }
3088 }
3089
3090 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel) {
3091 TEST_REQUIRES_X86_AVX2;
3092 for (size_t channels = 1; channels <= 80; channels += 15) {
3093 DWConvMicrokernelTester()
3094 .cr(16)
3095 .kr(9)
3096 .channels(channels)
3097 .width(3)
3098 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3099 }
3100 }
3101
3102 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_step) {
3103 TEST_REQUIRES_X86_AVX2;
3104 for (size_t channels = 1; channels <= 80; channels += 15) {
3105 for (size_t step = 2; step <= 9; step++) {
3106 DWConvMicrokernelTester()
3107 .cr(16)
3108 .kr(9)
3109 .channels(channels)
3110 .width(3)
3111 .step(step)
3112 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3113 }
3114 }
3115 }
3116
3117 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
3118 TEST_REQUIRES_X86_AVX2;
3119 for (size_t channels = 1; channels <= 80; channels += 15) {
3120 DWConvMicrokernelTester()
3121 .cr(16)
3122 .kr(9)
3123 .channels(16)
3124 .width(5)
3125 .output_stride(83)
3126 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3127 }
3128 }
3129
3130 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
3131 TEST_REQUIRES_X86_AVX2;
3132 for (size_t channels = 1; channels <= 80; channels += 15) {
3133 DWConvMicrokernelTester()
3134 .cr(16)
3135 .kr(9)
3136 .channels(channels)
3137 .width(3)
3138 .qmin(128)
3139 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3140 }
3141 }
3142
3143 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
3144 TEST_REQUIRES_X86_AVX2;
3145 for (size_t channels = 1; channels <= 80; channels += 15) {
3146 DWConvMicrokernelTester()
3147 .cr(16)
3148 .kr(9)
3149 .channels(channels)
3150 .width(3)
3151 .qmax(128)
3152 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3153 }
3154 }
3155
3156 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, input_offset) {
3157 TEST_REQUIRES_X86_AVX2;
3158 for (uint32_t channels = 32; channels < 256; channels += 48) {
3159 DWConvMicrokernelTester()
3160 .cr(16)
3161 .kr(9)
3162 .channels(channels)
3163 .input_offset(304)
3164 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3165 }
3166 }
3167
3168 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, zero) {
3169 TEST_REQUIRES_X86_AVX2;
3170 for (uint32_t mz = 0; mz < 9; mz++) {
3171 for (uint32_t channels = 32; channels < 256; channels += 48) {
3172 DWConvMicrokernelTester()
3173 .cr(16)
3174 .kr(9)
3175 .channels(channels)
3176 .input_offset(304)
3177 .zero_index(mz)
3178 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
3179 }
3180 }
3181 }
3182#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3183
3184
3185#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3186 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_eq_24) {
3187 TEST_REQUIRES_X86_AVX2;
3188 DWConvMicrokernelTester()
3189 .cr(24)
3190 .kr(9)
3191 .channels(24)
3192 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3193 }
3194
3195 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_div_24) {
3196 TEST_REQUIRES_X86_AVX2;
3197 for (uint32_t channels = 48; channels < 384; channels += 72) {
3198 DWConvMicrokernelTester()
3199 .cr(24)
3200 .kr(9)
3201 .channels(channels)
3202 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3203 }
3204 }
3205
3206 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
3207 TEST_REQUIRES_X86_AVX2;
3208 for (uint32_t channels = 48; channels < 384; channels += 72) {
3209 DWConvMicrokernelTester()
3210 .cr(24)
3211 .kr(9)
3212 .channels(channels)
3213 .qmin(128)
3214 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3215 }
3216 }
3217
3218 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
3219 TEST_REQUIRES_X86_AVX2;
3220 for (uint32_t channels = 48; channels < 384; channels += 72) {
3221 DWConvMicrokernelTester()
3222 .cr(24)
3223 .kr(9)
3224 .channels(channels)
3225 .qmax(128)
3226 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3227 }
3228 }
3229
3230 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_lt_24) {
3231 TEST_REQUIRES_X86_AVX2;
3232 for (uint32_t channels = 1; channels < 24; channels++) {
3233 DWConvMicrokernelTester()
3234 .cr(24)
3235 .kr(9)
3236 .channels(channels)
3237 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3238 }
3239 }
3240
3241 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_gt_24) {
3242 TEST_REQUIRES_X86_AVX2;
3243 for (uint32_t channels = 25; channels < 48; channels++) {
3244 DWConvMicrokernelTester()
3245 .cr(24)
3246 .kr(9)
3247 .channels(channels)
3248 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3249 }
3250 }
3251
3252 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
3253 TEST_REQUIRES_X86_AVX2;
3254 for (uint32_t channels = 25; channels < 48; channels++) {
3255 DWConvMicrokernelTester()
3256 .cr(24)
3257 .kr(9)
3258 .channels(channels)
3259 .qmin(128)
3260 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3261 }
3262 }
3263
3264 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
3265 TEST_REQUIRES_X86_AVX2;
3266 for (uint32_t channels = 25; channels < 48; channels++) {
3267 DWConvMicrokernelTester()
3268 .cr(24)
3269 .kr(9)
3270 .channels(channels)
3271 .qmax(128)
3272 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3273 }
3274 }
3275
3276 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel) {
3277 TEST_REQUIRES_X86_AVX2;
3278 for (size_t channels = 1; channels <= 120; channels += 23) {
3279 DWConvMicrokernelTester()
3280 .cr(24)
3281 .kr(9)
3282 .channels(channels)
3283 .width(3)
3284 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3285 }
3286 }
3287
3288 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_step) {
3289 TEST_REQUIRES_X86_AVX2;
3290 for (size_t channels = 1; channels <= 120; channels += 23) {
3291 for (size_t step = 2; step <= 9; step++) {
3292 DWConvMicrokernelTester()
3293 .cr(24)
3294 .kr(9)
3295 .channels(channels)
3296 .width(3)
3297 .step(step)
3298 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3299 }
3300 }
3301 }
3302
3303 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
3304 TEST_REQUIRES_X86_AVX2;
3305 for (size_t channels = 1; channels <= 120; channels += 23) {
3306 DWConvMicrokernelTester()
3307 .cr(24)
3308 .kr(9)
3309 .channels(24)
3310 .width(5)
3311 .output_stride(127)
3312 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3313 }
3314 }
3315
3316 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
3317 TEST_REQUIRES_X86_AVX2;
3318 for (size_t channels = 1; channels <= 120; channels += 23) {
3319 DWConvMicrokernelTester()
3320 .cr(24)
3321 .kr(9)
3322 .channels(channels)
3323 .width(3)
3324 .qmin(128)
3325 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3326 }
3327 }
3328
3329 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
3330 TEST_REQUIRES_X86_AVX2;
3331 for (size_t channels = 1; channels <= 120; channels += 23) {
3332 DWConvMicrokernelTester()
3333 .cr(24)
3334 .kr(9)
3335 .channels(channels)
3336 .width(3)
3337 .qmax(128)
3338 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3339 }
3340 }
3341
3342 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, input_offset) {
3343 TEST_REQUIRES_X86_AVX2;
3344 for (uint32_t channels = 48; channels < 384; channels += 72) {
3345 DWConvMicrokernelTester()
3346 .cr(24)
3347 .kr(9)
3348 .channels(channels)
3349 .input_offset(464)
3350 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3351 }
3352 }
3353
3354 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, zero) {
3355 TEST_REQUIRES_X86_AVX2;
3356 for (uint32_t mz = 0; mz < 9; mz++) {
3357 for (uint32_t channels = 48; channels < 384; channels += 72) {
3358 DWConvMicrokernelTester()
3359 .cr(24)
3360 .kr(9)
3361 .channels(channels)
3362 .input_offset(464)
3363 .zero_index(mz)
3364 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
3365 }
3366 }
3367 }
3368#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3369
3370
3371#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3372 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_eq_32) {
3373 TEST_REQUIRES_X86_AVX2;
3374 DWConvMicrokernelTester()
3375 .cr(32)
3376 .kr(9)
3377 .channels(32)
3378 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3379 }
3380
3381 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_div_32) {
3382 TEST_REQUIRES_X86_AVX2;
3383 for (uint32_t channels = 64; channels < 512; channels += 96) {
3384 DWConvMicrokernelTester()
3385 .cr(32)
3386 .kr(9)
3387 .channels(channels)
3388 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3389 }
3390 }
3391
3392 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
3393 TEST_REQUIRES_X86_AVX2;
3394 for (uint32_t channels = 64; channels < 512; channels += 96) {
3395 DWConvMicrokernelTester()
3396 .cr(32)
3397 .kr(9)
3398 .channels(channels)
3399 .qmin(128)
3400 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3401 }
3402 }
3403
3404 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
3405 TEST_REQUIRES_X86_AVX2;
3406 for (uint32_t channels = 64; channels < 512; channels += 96) {
3407 DWConvMicrokernelTester()
3408 .cr(32)
3409 .kr(9)
3410 .channels(channels)
3411 .qmax(128)
3412 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3413 }
3414 }
3415
3416 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_lt_32) {
3417 TEST_REQUIRES_X86_AVX2;
3418 for (uint32_t channels = 1; channels < 32; channels++) {
3419 DWConvMicrokernelTester()
3420 .cr(32)
3421 .kr(9)
3422 .channels(channels)
3423 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3424 }
3425 }
3426
3427 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_gt_32) {
3428 TEST_REQUIRES_X86_AVX2;
3429 for (uint32_t channels = 33; channels < 64; channels++) {
3430 DWConvMicrokernelTester()
3431 .cr(32)
3432 .kr(9)
3433 .channels(channels)
3434 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3435 }
3436 }
3437
3438 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
3439 TEST_REQUIRES_X86_AVX2;
3440 for (uint32_t channels = 33; channels < 64; channels++) {
3441 DWConvMicrokernelTester()
3442 .cr(32)
3443 .kr(9)
3444 .channels(channels)
3445 .qmin(128)
3446 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3447 }
3448 }
3449
3450 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
3451 TEST_REQUIRES_X86_AVX2;
3452 for (uint32_t channels = 33; channels < 64; channels++) {
3453 DWConvMicrokernelTester()
3454 .cr(32)
3455 .kr(9)
3456 .channels(channels)
3457 .qmax(128)
3458 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3459 }
3460 }
3461
3462 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel) {
3463 TEST_REQUIRES_X86_AVX2;
3464 for (size_t channels = 1; channels <= 160; channels += 31) {
3465 DWConvMicrokernelTester()
3466 .cr(32)
3467 .kr(9)
3468 .channels(channels)
3469 .width(3)
3470 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3471 }
3472 }
3473
3474 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_step) {
3475 TEST_REQUIRES_X86_AVX2;
3476 for (size_t channels = 1; channels <= 160; channels += 31) {
3477 for (size_t step = 2; step <= 9; step++) {
3478 DWConvMicrokernelTester()
3479 .cr(32)
3480 .kr(9)
3481 .channels(channels)
3482 .width(3)
3483 .step(step)
3484 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3485 }
3486 }
3487 }
3488
3489 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
3490 TEST_REQUIRES_X86_AVX2;
3491 for (size_t channels = 1; channels <= 160; channels += 31) {
3492 DWConvMicrokernelTester()
3493 .cr(32)
3494 .kr(9)
3495 .channels(32)
3496 .width(5)
3497 .output_stride(163)
3498 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3499 }
3500 }
3501
3502 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
3503 TEST_REQUIRES_X86_AVX2;
3504 for (size_t channels = 1; channels <= 160; channels += 31) {
3505 DWConvMicrokernelTester()
3506 .cr(32)
3507 .kr(9)
3508 .channels(channels)
3509 .width(3)
3510 .qmin(128)
3511 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3512 }
3513 }
3514
3515 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
3516 TEST_REQUIRES_X86_AVX2;
3517 for (size_t channels = 1; channels <= 160; channels += 31) {
3518 DWConvMicrokernelTester()
3519 .cr(32)
3520 .kr(9)
3521 .channels(channels)
3522 .width(3)
3523 .qmax(128)
3524 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3525 }
3526 }
3527
3528 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, input_offset) {
3529 TEST_REQUIRES_X86_AVX2;
3530 for (uint32_t channels = 64; channels < 512; channels += 96) {
3531 DWConvMicrokernelTester()
3532 .cr(32)
3533 .kr(9)
3534 .channels(channels)
3535 .input_offset(592)
3536 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3537 }
3538 }
3539
3540 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, zero) {
3541 TEST_REQUIRES_X86_AVX2;
3542 for (uint32_t mz = 0; mz < 9; mz++) {
3543 for (uint32_t channels = 64; channels < 512; channels += 96) {
3544 DWConvMicrokernelTester()
3545 .cr(32)
3546 .kr(9)
3547 .channels(channels)
3548 .input_offset(592)
3549 .zero_index(mz)
3550 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
3551 }
3552 }
3553 }
3554#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancc8f34c2020-08-05 16:36:38 -07003555
3556
Marat Dukhan2ffc5e62020-09-06 22:33:38 -07003557#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3558 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_eq_16) {
3559 TEST_REQUIRES_X86_AVX512SKX;
3560 DWConvMicrokernelTester()
3561 .cr(16)
3562 .kr(9)
3563 .channels(16)
3564 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3565 }
3566
3567 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_div_16) {
3568 TEST_REQUIRES_X86_AVX512SKX;
3569 for (uint32_t channels = 32; channels < 256; channels += 48) {
3570 DWConvMicrokernelTester()
3571 .cr(16)
3572 .kr(9)
3573 .channels(channels)
3574 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3575 }
3576 }
3577
3578 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
3579 TEST_REQUIRES_X86_AVX512SKX;
3580 for (uint32_t channels = 32; channels < 256; channels += 48) {
3581 DWConvMicrokernelTester()
3582 .cr(16)
3583 .kr(9)
3584 .channels(channels)
3585 .qmin(128)
3586 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3587 }
3588 }
3589
3590 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
3591 TEST_REQUIRES_X86_AVX512SKX;
3592 for (uint32_t channels = 32; channels < 256; channels += 48) {
3593 DWConvMicrokernelTester()
3594 .cr(16)
3595 .kr(9)
3596 .channels(channels)
3597 .qmax(128)
3598 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3599 }
3600 }
3601
3602 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_lt_16) {
3603 TEST_REQUIRES_X86_AVX512SKX;
3604 for (uint32_t channels = 1; channels < 16; channels++) {
3605 DWConvMicrokernelTester()
3606 .cr(16)
3607 .kr(9)
3608 .channels(channels)
3609 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3610 }
3611 }
3612
3613 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_gt_16) {
3614 TEST_REQUIRES_X86_AVX512SKX;
3615 for (uint32_t channels = 17; channels < 32; channels++) {
3616 DWConvMicrokernelTester()
3617 .cr(16)
3618 .kr(9)
3619 .channels(channels)
3620 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3621 }
3622 }
3623
3624 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
3625 TEST_REQUIRES_X86_AVX512SKX;
3626 for (uint32_t channels = 17; channels < 32; channels++) {
3627 DWConvMicrokernelTester()
3628 .cr(16)
3629 .kr(9)
3630 .channels(channels)
3631 .qmin(128)
3632 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3633 }
3634 }
3635
3636 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
3637 TEST_REQUIRES_X86_AVX512SKX;
3638 for (uint32_t channels = 17; channels < 32; channels++) {
3639 DWConvMicrokernelTester()
3640 .cr(16)
3641 .kr(9)
3642 .channels(channels)
3643 .qmax(128)
3644 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3645 }
3646 }
3647
3648 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel) {
3649 TEST_REQUIRES_X86_AVX512SKX;
3650 for (size_t channels = 1; channels <= 80; channels += 15) {
3651 DWConvMicrokernelTester()
3652 .cr(16)
3653 .kr(9)
3654 .channels(channels)
3655 .width(3)
3656 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3657 }
3658 }
3659
3660 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
3661 TEST_REQUIRES_X86_AVX512SKX;
3662 for (size_t channels = 1; channels <= 80; channels += 15) {
3663 for (size_t step = 2; step <= 9; step++) {
3664 DWConvMicrokernelTester()
3665 .cr(16)
3666 .kr(9)
3667 .channels(channels)
3668 .width(3)
3669 .step(step)
3670 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3671 }
3672 }
3673 }
3674
3675 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
3676 TEST_REQUIRES_X86_AVX512SKX;
3677 for (size_t channels = 1; channels <= 80; channels += 15) {
3678 DWConvMicrokernelTester()
3679 .cr(16)
3680 .kr(9)
3681 .channels(16)
3682 .width(5)
3683 .output_stride(83)
3684 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3685 }
3686 }
3687
3688 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
3689 TEST_REQUIRES_X86_AVX512SKX;
3690 for (size_t channels = 1; channels <= 80; channels += 15) {
3691 DWConvMicrokernelTester()
3692 .cr(16)
3693 .kr(9)
3694 .channels(channels)
3695 .width(3)
3696 .qmin(128)
3697 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3698 }
3699 }
3700
3701 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
3702 TEST_REQUIRES_X86_AVX512SKX;
3703 for (size_t channels = 1; channels <= 80; channels += 15) {
3704 DWConvMicrokernelTester()
3705 .cr(16)
3706 .kr(9)
3707 .channels(channels)
3708 .width(3)
3709 .qmax(128)
3710 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3711 }
3712 }
3713
3714 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, input_offset) {
3715 TEST_REQUIRES_X86_AVX512SKX;
3716 for (uint32_t channels = 32; channels < 256; channels += 48) {
3717 DWConvMicrokernelTester()
3718 .cr(16)
3719 .kr(9)
3720 .channels(channels)
3721 .input_offset(304)
3722 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3723 }
3724 }
3725
3726 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, zero) {
3727 TEST_REQUIRES_X86_AVX512SKX;
3728 for (uint32_t mz = 0; mz < 9; mz++) {
3729 for (uint32_t channels = 32; channels < 256; channels += 48) {
3730 DWConvMicrokernelTester()
3731 .cr(16)
3732 .kr(9)
3733 .channels(channels)
3734 .input_offset(304)
3735 .zero_index(mz)
3736 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
3737 }
3738 }
3739 }
3740#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3741
3742
3743#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3744 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_eq_32) {
3745 TEST_REQUIRES_X86_AVX512SKX;
3746 DWConvMicrokernelTester()
3747 .cr(32)
3748 .kr(9)
3749 .channels(32)
3750 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3751 }
3752
3753 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_div_32) {
3754 TEST_REQUIRES_X86_AVX512SKX;
3755 for (uint32_t channels = 64; channels < 512; channels += 96) {
3756 DWConvMicrokernelTester()
3757 .cr(32)
3758 .kr(9)
3759 .channels(channels)
3760 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3761 }
3762 }
3763
3764 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
3765 TEST_REQUIRES_X86_AVX512SKX;
3766 for (uint32_t channels = 64; channels < 512; channels += 96) {
3767 DWConvMicrokernelTester()
3768 .cr(32)
3769 .kr(9)
3770 .channels(channels)
3771 .qmin(128)
3772 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3773 }
3774 }
3775
3776 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
3777 TEST_REQUIRES_X86_AVX512SKX;
3778 for (uint32_t channels = 64; channels < 512; channels += 96) {
3779 DWConvMicrokernelTester()
3780 .cr(32)
3781 .kr(9)
3782 .channels(channels)
3783 .qmax(128)
3784 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3785 }
3786 }
3787
3788 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_lt_32) {
3789 TEST_REQUIRES_X86_AVX512SKX;
3790 for (uint32_t channels = 1; channels < 32; channels++) {
3791 DWConvMicrokernelTester()
3792 .cr(32)
3793 .kr(9)
3794 .channels(channels)
3795 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3796 }
3797 }
3798
3799 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_gt_32) {
3800 TEST_REQUIRES_X86_AVX512SKX;
3801 for (uint32_t channels = 33; channels < 64; channels++) {
3802 DWConvMicrokernelTester()
3803 .cr(32)
3804 .kr(9)
3805 .channels(channels)
3806 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3807 }
3808 }
3809
3810 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
3811 TEST_REQUIRES_X86_AVX512SKX;
3812 for (uint32_t channels = 33; channels < 64; channels++) {
3813 DWConvMicrokernelTester()
3814 .cr(32)
3815 .kr(9)
3816 .channels(channels)
3817 .qmin(128)
3818 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3819 }
3820 }
3821
3822 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
3823 TEST_REQUIRES_X86_AVX512SKX;
3824 for (uint32_t channels = 33; channels < 64; channels++) {
3825 DWConvMicrokernelTester()
3826 .cr(32)
3827 .kr(9)
3828 .channels(channels)
3829 .qmax(128)
3830 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3831 }
3832 }
3833
3834 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel) {
3835 TEST_REQUIRES_X86_AVX512SKX;
3836 for (size_t channels = 1; channels <= 160; channels += 31) {
3837 DWConvMicrokernelTester()
3838 .cr(32)
3839 .kr(9)
3840 .channels(channels)
3841 .width(3)
3842 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3843 }
3844 }
3845
3846 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
3847 TEST_REQUIRES_X86_AVX512SKX;
3848 for (size_t channels = 1; channels <= 160; channels += 31) {
3849 for (size_t step = 2; step <= 9; step++) {
3850 DWConvMicrokernelTester()
3851 .cr(32)
3852 .kr(9)
3853 .channels(channels)
3854 .width(3)
3855 .step(step)
3856 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3857 }
3858 }
3859 }
3860
3861 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
3862 TEST_REQUIRES_X86_AVX512SKX;
3863 for (size_t channels = 1; channels <= 160; channels += 31) {
3864 DWConvMicrokernelTester()
3865 .cr(32)
3866 .kr(9)
3867 .channels(32)
3868 .width(5)
3869 .output_stride(163)
3870 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3871 }
3872 }
3873
3874 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
3875 TEST_REQUIRES_X86_AVX512SKX;
3876 for (size_t channels = 1; channels <= 160; channels += 31) {
3877 DWConvMicrokernelTester()
3878 .cr(32)
3879 .kr(9)
3880 .channels(channels)
3881 .width(3)
3882 .qmin(128)
3883 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3884 }
3885 }
3886
3887 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
3888 TEST_REQUIRES_X86_AVX512SKX;
3889 for (size_t channels = 1; channels <= 160; channels += 31) {
3890 DWConvMicrokernelTester()
3891 .cr(32)
3892 .kr(9)
3893 .channels(channels)
3894 .width(3)
3895 .qmax(128)
3896 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3897 }
3898 }
3899
3900 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, input_offset) {
3901 TEST_REQUIRES_X86_AVX512SKX;
3902 for (uint32_t channels = 64; channels < 512; channels += 96) {
3903 DWConvMicrokernelTester()
3904 .cr(32)
3905 .kr(9)
3906 .channels(channels)
3907 .input_offset(592)
3908 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3909 }
3910 }
3911
3912 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, zero) {
3913 TEST_REQUIRES_X86_AVX512SKX;
3914 for (uint32_t mz = 0; mz < 9; mz++) {
3915 for (uint32_t channels = 64; channels < 512; channels += 96) {
3916 DWConvMicrokernelTester()
3917 .cr(32)
3918 .kr(9)
3919 .channels(channels)
3920 .input_offset(592)
3921 .zero_index(mz)
3922 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
3923 }
3924 }
3925 }
3926#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3927
3928
Marat Dukhancc8f34c2020-08-05 16:36:38 -07003929#if XNN_ARCH_WASMSIMD
3930 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_eq_8) {
3931 DWConvMicrokernelTester()
3932 .cr(8)
3933 .kr(9)
3934 .channels(8)
3935 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
3936 }
3937
3938 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_div_8) {
3939 for (uint32_t channels = 16; channels < 128; channels += 24) {
3940 DWConvMicrokernelTester()
3941 .cr(8)
3942 .kr(9)
3943 .channels(channels)
3944 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
3945 }
3946 }
3947
3948 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
3949 for (uint32_t channels = 16; channels < 128; channels += 24) {
3950 DWConvMicrokernelTester()
3951 .cr(8)
3952 .kr(9)
3953 .channels(channels)
3954 .qmin(128)
3955 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
3956 }
3957 }
3958
3959 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
3960 for (uint32_t channels = 16; channels < 128; channels += 24) {
3961 DWConvMicrokernelTester()
3962 .cr(8)
3963 .kr(9)
3964 .channels(channels)
3965 .qmax(128)
3966 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
3967 }
3968 }
3969
3970 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_lt_8) {
3971 for (uint32_t channels = 1; channels < 8; channels++) {
3972 DWConvMicrokernelTester()
3973 .cr(8)
3974 .kr(9)
3975 .channels(channels)
3976 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
3977 }
3978 }
3979
3980 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_gt_8) {
3981 for (uint32_t channels = 9; channels < 16; channels++) {
3982 DWConvMicrokernelTester()
3983 .cr(8)
3984 .kr(9)
3985 .channels(channels)
3986 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
3987 }
3988 }
3989
3990 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
3991 for (uint32_t channels = 9; channels < 16; channels++) {
3992 DWConvMicrokernelTester()
3993 .cr(8)
3994 .kr(9)
3995 .channels(channels)
3996 .qmin(128)
3997 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
3998 }
3999 }
4000
4001 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
4002 for (uint32_t channels = 9; channels < 16; channels++) {
4003 DWConvMicrokernelTester()
4004 .cr(8)
4005 .kr(9)
4006 .channels(channels)
4007 .qmax(128)
4008 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
4009 }
4010 }
4011
4012 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel) {
4013 for (size_t channels = 1; channels <= 40; channels += 7) {
4014 DWConvMicrokernelTester()
4015 .cr(8)
4016 .kr(9)
4017 .channels(channels)
4018 .width(3)
4019 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
4020 }
4021 }
4022
4023 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
4024 for (size_t channels = 1; channels <= 40; channels += 7) {
4025 for (size_t step = 2; step <= 9; step++) {
4026 DWConvMicrokernelTester()
4027 .cr(8)
4028 .kr(9)
4029 .channels(channels)
4030 .width(3)
4031 .step(step)
4032 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
4033 }
4034 }
4035 }
4036
4037 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
4038 for (size_t channels = 1; channels <= 40; channels += 7) {
4039 DWConvMicrokernelTester()
4040 .cr(8)
4041 .kr(9)
4042 .channels(8)
4043 .width(5)
4044 .output_stride(43)
4045 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
4046 }
4047 }
4048
4049 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
4050 for (size_t channels = 1; channels <= 40; channels += 7) {
4051 DWConvMicrokernelTester()
4052 .cr(8)
4053 .kr(9)
4054 .channels(channels)
4055 .width(3)
4056 .qmin(128)
4057 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
4058 }
4059 }
4060
4061 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
4062 for (size_t channels = 1; channels <= 40; channels += 7) {
4063 DWConvMicrokernelTester()
4064 .cr(8)
4065 .kr(9)
4066 .channels(channels)
4067 .width(3)
4068 .qmax(128)
4069 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
4070 }
4071 }
4072
4073 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, input_offset) {
4074 for (uint32_t channels = 16; channels < 128; channels += 24) {
4075 DWConvMicrokernelTester()
4076 .cr(8)
4077 .kr(9)
4078 .channels(channels)
4079 .input_offset(176)
4080 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
4081 }
4082 }
4083
4084 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, zero) {
4085 for (uint32_t mz = 0; mz < 9; mz++) {
4086 for (uint32_t channels = 16; channels < 128; channels += 24) {
4087 DWConvMicrokernelTester()
4088 .cr(8)
4089 .kr(9)
4090 .channels(channels)
4091 .input_offset(176)
4092 .zero_index(mz)
4093 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
4094 }
4095 }
4096 }
4097#endif // XNN_ARCH_WASMSIMD
4098
4099
4100#if XNN_ARCH_WASMSIMD
4101 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_eq_16) {
4102 DWConvMicrokernelTester()
4103 .cr(16)
4104 .kr(9)
4105 .channels(16)
4106 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4107 }
4108
4109 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_div_16) {
4110 for (uint32_t channels = 32; channels < 256; channels += 48) {
4111 DWConvMicrokernelTester()
4112 .cr(16)
4113 .kr(9)
4114 .channels(channels)
4115 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4116 }
4117 }
4118
4119 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
4120 for (uint32_t channels = 32; channels < 256; channels += 48) {
4121 DWConvMicrokernelTester()
4122 .cr(16)
4123 .kr(9)
4124 .channels(channels)
4125 .qmin(128)
4126 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4127 }
4128 }
4129
4130 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
4131 for (uint32_t channels = 32; channels < 256; channels += 48) {
4132 DWConvMicrokernelTester()
4133 .cr(16)
4134 .kr(9)
4135 .channels(channels)
4136 .qmax(128)
4137 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4138 }
4139 }
4140
4141 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_lt_16) {
4142 for (uint32_t channels = 1; channels < 16; channels++) {
4143 DWConvMicrokernelTester()
4144 .cr(16)
4145 .kr(9)
4146 .channels(channels)
4147 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4148 }
4149 }
4150
4151 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_gt_16) {
4152 for (uint32_t channels = 17; channels < 32; channels++) {
4153 DWConvMicrokernelTester()
4154 .cr(16)
4155 .kr(9)
4156 .channels(channels)
4157 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4158 }
4159 }
4160
4161 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
4162 for (uint32_t channels = 17; channels < 32; channels++) {
4163 DWConvMicrokernelTester()
4164 .cr(16)
4165 .kr(9)
4166 .channels(channels)
4167 .qmin(128)
4168 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4169 }
4170 }
4171
4172 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
4173 for (uint32_t channels = 17; channels < 32; channels++) {
4174 DWConvMicrokernelTester()
4175 .cr(16)
4176 .kr(9)
4177 .channels(channels)
4178 .qmax(128)
4179 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4180 }
4181 }
4182
4183 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel) {
4184 for (size_t channels = 1; channels <= 80; channels += 15) {
4185 DWConvMicrokernelTester()
4186 .cr(16)
4187 .kr(9)
4188 .channels(channels)
4189 .width(3)
4190 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4191 }
4192 }
4193
4194 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
4195 for (size_t channels = 1; channels <= 80; channels += 15) {
4196 for (size_t step = 2; step <= 9; step++) {
4197 DWConvMicrokernelTester()
4198 .cr(16)
4199 .kr(9)
4200 .channels(channels)
4201 .width(3)
4202 .step(step)
4203 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4204 }
4205 }
4206 }
4207
4208 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
4209 for (size_t channels = 1; channels <= 80; channels += 15) {
4210 DWConvMicrokernelTester()
4211 .cr(16)
4212 .kr(9)
4213 .channels(16)
4214 .width(5)
4215 .output_stride(83)
4216 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4217 }
4218 }
4219
4220 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
4221 for (size_t channels = 1; channels <= 80; channels += 15) {
4222 DWConvMicrokernelTester()
4223 .cr(16)
4224 .kr(9)
4225 .channels(channels)
4226 .width(3)
4227 .qmin(128)
4228 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4229 }
4230 }
4231
4232 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
4233 for (size_t channels = 1; channels <= 80; channels += 15) {
4234 DWConvMicrokernelTester()
4235 .cr(16)
4236 .kr(9)
4237 .channels(channels)
4238 .width(3)
4239 .qmax(128)
4240 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4241 }
4242 }
4243
4244 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, input_offset) {
4245 for (uint32_t channels = 32; channels < 256; channels += 48) {
4246 DWConvMicrokernelTester()
4247 .cr(16)
4248 .kr(9)
4249 .channels(channels)
4250 .input_offset(304)
4251 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4252 }
4253 }
4254
4255 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, zero) {
4256 for (uint32_t mz = 0; mz < 9; mz++) {
4257 for (uint32_t channels = 32; channels < 256; channels += 48) {
4258 DWConvMicrokernelTester()
4259 .cr(16)
4260 .kr(9)
4261 .channels(channels)
4262 .input_offset(304)
4263 .zero_index(mz)
4264 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
4265 }
4266 }
4267 }
4268#endif // XNN_ARCH_WASMSIMD
4269
4270
4271#if XNN_ARCH_WASMSIMD
4272 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_eq_24) {
4273 DWConvMicrokernelTester()
4274 .cr(24)
4275 .kr(9)
4276 .channels(24)
4277 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4278 }
4279
4280 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_div_24) {
4281 for (uint32_t channels = 48; channels < 384; channels += 72) {
4282 DWConvMicrokernelTester()
4283 .cr(24)
4284 .kr(9)
4285 .channels(channels)
4286 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4287 }
4288 }
4289
4290 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
4291 for (uint32_t channels = 48; channels < 384; channels += 72) {
4292 DWConvMicrokernelTester()
4293 .cr(24)
4294 .kr(9)
4295 .channels(channels)
4296 .qmin(128)
4297 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4298 }
4299 }
4300
4301 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
4302 for (uint32_t channels = 48; channels < 384; channels += 72) {
4303 DWConvMicrokernelTester()
4304 .cr(24)
4305 .kr(9)
4306 .channels(channels)
4307 .qmax(128)
4308 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4309 }
4310 }
4311
4312 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_lt_24) {
4313 for (uint32_t channels = 1; channels < 24; channels++) {
4314 DWConvMicrokernelTester()
4315 .cr(24)
4316 .kr(9)
4317 .channels(channels)
4318 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4319 }
4320 }
4321
4322 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_gt_24) {
4323 for (uint32_t channels = 25; channels < 48; channels++) {
4324 DWConvMicrokernelTester()
4325 .cr(24)
4326 .kr(9)
4327 .channels(channels)
4328 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4329 }
4330 }
4331
4332 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
4333 for (uint32_t channels = 25; channels < 48; channels++) {
4334 DWConvMicrokernelTester()
4335 .cr(24)
4336 .kr(9)
4337 .channels(channels)
4338 .qmin(128)
4339 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4340 }
4341 }
4342
4343 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
4344 for (uint32_t channels = 25; channels < 48; channels++) {
4345 DWConvMicrokernelTester()
4346 .cr(24)
4347 .kr(9)
4348 .channels(channels)
4349 .qmax(128)
4350 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4351 }
4352 }
4353
4354 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel) {
4355 for (size_t channels = 1; channels <= 120; channels += 23) {
4356 DWConvMicrokernelTester()
4357 .cr(24)
4358 .kr(9)
4359 .channels(channels)
4360 .width(3)
4361 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4362 }
4363 }
4364
4365 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
4366 for (size_t channels = 1; channels <= 120; channels += 23) {
4367 for (size_t step = 2; step <= 9; step++) {
4368 DWConvMicrokernelTester()
4369 .cr(24)
4370 .kr(9)
4371 .channels(channels)
4372 .width(3)
4373 .step(step)
4374 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4375 }
4376 }
4377 }
4378
4379 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
4380 for (size_t channels = 1; channels <= 120; channels += 23) {
4381 DWConvMicrokernelTester()
4382 .cr(24)
4383 .kr(9)
4384 .channels(24)
4385 .width(5)
4386 .output_stride(127)
4387 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4388 }
4389 }
4390
4391 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
4392 for (size_t channels = 1; channels <= 120; channels += 23) {
4393 DWConvMicrokernelTester()
4394 .cr(24)
4395 .kr(9)
4396 .channels(channels)
4397 .width(3)
4398 .qmin(128)
4399 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4400 }
4401 }
4402
4403 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
4404 for (size_t channels = 1; channels <= 120; channels += 23) {
4405 DWConvMicrokernelTester()
4406 .cr(24)
4407 .kr(9)
4408 .channels(channels)
4409 .width(3)
4410 .qmax(128)
4411 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4412 }
4413 }
4414
4415 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, input_offset) {
4416 for (uint32_t channels = 48; channels < 384; channels += 72) {
4417 DWConvMicrokernelTester()
4418 .cr(24)
4419 .kr(9)
4420 .channels(channels)
4421 .input_offset(464)
4422 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4423 }
4424 }
4425
4426 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, zero) {
4427 for (uint32_t mz = 0; mz < 9; mz++) {
4428 for (uint32_t channels = 48; channels < 384; channels += 72) {
4429 DWConvMicrokernelTester()
4430 .cr(24)
4431 .kr(9)
4432 .channels(channels)
4433 .input_offset(464)
4434 .zero_index(mz)
4435 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
4436 }
4437 }
4438 }
4439#endif // XNN_ARCH_WASMSIMD