blob: 5247715d4105db4174564ed172738b037d777320 [file] [log] [blame]
Marat Dukhanf62bbdc2020-08-04 13:59:04 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qs8-dwconv-minmax.yaml
11// Generator: tools/generate-dwconv-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/dwconv.h>
20#include "dwconv-microkernel-tester.h"
21
22
Marat Dukhan023bcf92020-08-10 12:40:50 -070023#if XNN_ARCH_ARM || XNN_ARCH_ARM64
24 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_eq_8) {
25 TEST_REQUIRES_ARM_NEON;
26 DWConvMicrokernelTester()
27 .cr(8)
28 .kr(9)
29 .channels(8)
30 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
31 }
32
33 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_div_8) {
34 TEST_REQUIRES_ARM_NEON;
35 for (uint32_t channels = 16; channels < 128; channels += 24) {
36 DWConvMicrokernelTester()
37 .cr(8)
38 .kr(9)
39 .channels(channels)
40 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
41 }
42 }
43
44 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
45 TEST_REQUIRES_ARM_NEON;
46 for (uint32_t channels = 16; channels < 128; channels += 24) {
47 DWConvMicrokernelTester()
48 .cr(8)
49 .kr(9)
50 .channels(channels)
51 .qmin(128)
52 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
53 }
54 }
55
56 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
57 TEST_REQUIRES_ARM_NEON;
58 for (uint32_t channels = 16; channels < 128; channels += 24) {
59 DWConvMicrokernelTester()
60 .cr(8)
61 .kr(9)
62 .channels(channels)
63 .qmax(128)
64 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
65 }
66 }
67
68 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_lt_8) {
69 TEST_REQUIRES_ARM_NEON;
70 for (uint32_t channels = 1; channels < 8; channels++) {
71 DWConvMicrokernelTester()
72 .cr(8)
73 .kr(9)
74 .channels(channels)
75 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
76 }
77 }
78
79 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_gt_8) {
80 TEST_REQUIRES_ARM_NEON;
81 for (uint32_t channels = 9; channels < 16; channels++) {
82 DWConvMicrokernelTester()
83 .cr(8)
84 .kr(9)
85 .channels(channels)
86 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
87 }
88 }
89
90 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
91 TEST_REQUIRES_ARM_NEON;
92 for (uint32_t channels = 9; channels < 16; channels++) {
93 DWConvMicrokernelTester()
94 .cr(8)
95 .kr(9)
96 .channels(channels)
97 .qmin(128)
98 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
99 }
100 }
101
102 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t channels = 9; channels < 16; channels++) {
105 DWConvMicrokernelTester()
106 .cr(8)
107 .kr(9)
108 .channels(channels)
109 .qmax(128)
110 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
111 }
112 }
113
114 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t channels = 1; channels <= 40; channels += 7) {
117 DWConvMicrokernelTester()
118 .cr(8)
119 .kr(9)
120 .channels(channels)
121 .width(3)
122 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
123 }
124 }
125
126 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_step) {
127 TEST_REQUIRES_ARM_NEON;
128 for (size_t channels = 1; channels <= 40; channels += 7) {
129 for (size_t step = 2; step <= 9; step++) {
130 DWConvMicrokernelTester()
131 .cr(8)
132 .kr(9)
133 .channels(channels)
134 .width(3)
135 .step(step)
136 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
137 }
138 }
139 }
140
141 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
142 TEST_REQUIRES_ARM_NEON;
143 for (size_t channels = 1; channels <= 40; channels += 7) {
144 DWConvMicrokernelTester()
145 .cr(8)
146 .kr(9)
147 .channels(8)
148 .width(5)
149 .output_stride(43)
150 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
151 }
152 }
153
154 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_qmin) {
155 TEST_REQUIRES_ARM_NEON;
156 for (size_t channels = 1; channels <= 40; channels += 7) {
157 DWConvMicrokernelTester()
158 .cr(8)
159 .kr(9)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
163 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
164 }
165 }
166
167 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_qmax) {
168 TEST_REQUIRES_ARM_NEON;
169 for (size_t channels = 1; channels <= 40; channels += 7) {
170 DWConvMicrokernelTester()
171 .cr(8)
172 .kr(9)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
176 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
177 }
178 }
179
180 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, input_offset) {
181 TEST_REQUIRES_ARM_NEON;
182 for (uint32_t channels = 16; channels < 128; channels += 24) {
183 DWConvMicrokernelTester()
184 .cr(8)
185 .kr(9)
186 .channels(channels)
187 .input_offset(176)
188 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
189 }
190 }
191
192 TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, zero) {
193 TEST_REQUIRES_ARM_NEON;
194 for (uint32_t mz = 0; mz < 9; mz++) {
195 for (uint32_t channels = 16; channels < 128; channels += 24) {
196 DWConvMicrokernelTester()
197 .cr(8)
198 .kr(9)
199 .channels(channels)
200 .input_offset(176)
201 .zero_index(mz)
202 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16);
203 }
204 }
205 }
206#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
207
208
209#if XNN_ARCH_ARM || XNN_ARCH_ARM64
210 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_eq_16) {
211 TEST_REQUIRES_ARM_NEON;
212 DWConvMicrokernelTester()
213 .cr(16)
214 .kr(9)
215 .channels(16)
216 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
217 }
218
219 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_div_16) {
220 TEST_REQUIRES_ARM_NEON;
221 for (uint32_t channels = 32; channels < 256; channels += 48) {
222 DWConvMicrokernelTester()
223 .cr(16)
224 .kr(9)
225 .channels(channels)
226 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
227 }
228 }
229
230 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
231 TEST_REQUIRES_ARM_NEON;
232 for (uint32_t channels = 32; channels < 256; channels += 48) {
233 DWConvMicrokernelTester()
234 .cr(16)
235 .kr(9)
236 .channels(channels)
237 .qmin(128)
238 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
239 }
240 }
241
242 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
243 TEST_REQUIRES_ARM_NEON;
244 for (uint32_t channels = 32; channels < 256; channels += 48) {
245 DWConvMicrokernelTester()
246 .cr(16)
247 .kr(9)
248 .channels(channels)
249 .qmax(128)
250 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
251 }
252 }
253
254 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_lt_16) {
255 TEST_REQUIRES_ARM_NEON;
256 for (uint32_t channels = 1; channels < 16; channels++) {
257 DWConvMicrokernelTester()
258 .cr(16)
259 .kr(9)
260 .channels(channels)
261 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
262 }
263 }
264
265 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_gt_16) {
266 TEST_REQUIRES_ARM_NEON;
267 for (uint32_t channels = 17; channels < 32; channels++) {
268 DWConvMicrokernelTester()
269 .cr(16)
270 .kr(9)
271 .channels(channels)
272 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
273 }
274 }
275
276 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
277 TEST_REQUIRES_ARM_NEON;
278 for (uint32_t channels = 17; channels < 32; channels++) {
279 DWConvMicrokernelTester()
280 .cr(16)
281 .kr(9)
282 .channels(channels)
283 .qmin(128)
284 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
285 }
286 }
287
288 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
289 TEST_REQUIRES_ARM_NEON;
290 for (uint32_t channels = 17; channels < 32; channels++) {
291 DWConvMicrokernelTester()
292 .cr(16)
293 .kr(9)
294 .channels(channels)
295 .qmax(128)
296 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
297 }
298 }
299
300 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel) {
301 TEST_REQUIRES_ARM_NEON;
302 for (size_t channels = 1; channels <= 80; channels += 15) {
303 DWConvMicrokernelTester()
304 .cr(16)
305 .kr(9)
306 .channels(channels)
307 .width(3)
308 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
309 }
310 }
311
312 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_step) {
313 TEST_REQUIRES_ARM_NEON;
314 for (size_t channels = 1; channels <= 80; channels += 15) {
315 for (size_t step = 2; step <= 9; step++) {
316 DWConvMicrokernelTester()
317 .cr(16)
318 .kr(9)
319 .channels(channels)
320 .width(3)
321 .step(step)
322 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
323 }
324 }
325 }
326
327 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
328 TEST_REQUIRES_ARM_NEON;
329 for (size_t channels = 1; channels <= 80; channels += 15) {
330 DWConvMicrokernelTester()
331 .cr(16)
332 .kr(9)
333 .channels(16)
334 .width(5)
335 .output_stride(83)
336 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
337 }
338 }
339
340 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_qmin) {
341 TEST_REQUIRES_ARM_NEON;
342 for (size_t channels = 1; channels <= 80; channels += 15) {
343 DWConvMicrokernelTester()
344 .cr(16)
345 .kr(9)
346 .channels(channels)
347 .width(3)
348 .qmin(128)
349 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
350 }
351 }
352
353 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_qmax) {
354 TEST_REQUIRES_ARM_NEON;
355 for (size_t channels = 1; channels <= 80; channels += 15) {
356 DWConvMicrokernelTester()
357 .cr(16)
358 .kr(9)
359 .channels(channels)
360 .width(3)
361 .qmax(128)
362 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
363 }
364 }
365
366 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, input_offset) {
367 TEST_REQUIRES_ARM_NEON;
368 for (uint32_t channels = 32; channels < 256; channels += 48) {
369 DWConvMicrokernelTester()
370 .cr(16)
371 .kr(9)
372 .channels(channels)
373 .input_offset(304)
374 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
375 }
376 }
377
378 TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, zero) {
379 TEST_REQUIRES_ARM_NEON;
380 for (uint32_t mz = 0; mz < 9; mz++) {
381 for (uint32_t channels = 32; channels < 256; channels += 48) {
382 DWConvMicrokernelTester()
383 .cr(16)
384 .kr(9)
385 .channels(channels)
386 .input_offset(304)
387 .zero_index(mz)
388 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16);
389 }
390 }
391 }
392#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
393
394
395#if XNN_ARCH_ARM || XNN_ARCH_ARM64
396 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_eq_24) {
397 TEST_REQUIRES_ARM_NEON;
398 DWConvMicrokernelTester()
399 .cr(24)
400 .kr(9)
401 .channels(24)
402 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
403 }
404
405 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_div_24) {
406 TEST_REQUIRES_ARM_NEON;
407 for (uint32_t channels = 48; channels < 384; channels += 72) {
408 DWConvMicrokernelTester()
409 .cr(24)
410 .kr(9)
411 .channels(channels)
412 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
413 }
414 }
415
416 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
417 TEST_REQUIRES_ARM_NEON;
418 for (uint32_t channels = 48; channels < 384; channels += 72) {
419 DWConvMicrokernelTester()
420 .cr(24)
421 .kr(9)
422 .channels(channels)
423 .qmin(128)
424 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
425 }
426 }
427
428 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
429 TEST_REQUIRES_ARM_NEON;
430 for (uint32_t channels = 48; channels < 384; channels += 72) {
431 DWConvMicrokernelTester()
432 .cr(24)
433 .kr(9)
434 .channels(channels)
435 .qmax(128)
436 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
437 }
438 }
439
440 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_lt_24) {
441 TEST_REQUIRES_ARM_NEON;
442 for (uint32_t channels = 1; channels < 24; channels++) {
443 DWConvMicrokernelTester()
444 .cr(24)
445 .kr(9)
446 .channels(channels)
447 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
448 }
449 }
450
451 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_gt_24) {
452 TEST_REQUIRES_ARM_NEON;
453 for (uint32_t channels = 25; channels < 48; channels++) {
454 DWConvMicrokernelTester()
455 .cr(24)
456 .kr(9)
457 .channels(channels)
458 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
459 }
460 }
461
462 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
463 TEST_REQUIRES_ARM_NEON;
464 for (uint32_t channels = 25; channels < 48; channels++) {
465 DWConvMicrokernelTester()
466 .cr(24)
467 .kr(9)
468 .channels(channels)
469 .qmin(128)
470 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
471 }
472 }
473
474 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
475 TEST_REQUIRES_ARM_NEON;
476 for (uint32_t channels = 25; channels < 48; channels++) {
477 DWConvMicrokernelTester()
478 .cr(24)
479 .kr(9)
480 .channels(channels)
481 .qmax(128)
482 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
483 }
484 }
485
486 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel) {
487 TEST_REQUIRES_ARM_NEON;
488 for (size_t channels = 1; channels <= 120; channels += 23) {
489 DWConvMicrokernelTester()
490 .cr(24)
491 .kr(9)
492 .channels(channels)
493 .width(3)
494 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
495 }
496 }
497
498 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_step) {
499 TEST_REQUIRES_ARM_NEON;
500 for (size_t channels = 1; channels <= 120; channels += 23) {
501 for (size_t step = 2; step <= 9; step++) {
502 DWConvMicrokernelTester()
503 .cr(24)
504 .kr(9)
505 .channels(channels)
506 .width(3)
507 .step(step)
508 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
509 }
510 }
511 }
512
513 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
514 TEST_REQUIRES_ARM_NEON;
515 for (size_t channels = 1; channels <= 120; channels += 23) {
516 DWConvMicrokernelTester()
517 .cr(24)
518 .kr(9)
519 .channels(24)
520 .width(5)
521 .output_stride(127)
522 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
523 }
524 }
525
526 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_qmin) {
527 TEST_REQUIRES_ARM_NEON;
528 for (size_t channels = 1; channels <= 120; channels += 23) {
529 DWConvMicrokernelTester()
530 .cr(24)
531 .kr(9)
532 .channels(channels)
533 .width(3)
534 .qmin(128)
535 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
536 }
537 }
538
539 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_qmax) {
540 TEST_REQUIRES_ARM_NEON;
541 for (size_t channels = 1; channels <= 120; channels += 23) {
542 DWConvMicrokernelTester()
543 .cr(24)
544 .kr(9)
545 .channels(channels)
546 .width(3)
547 .qmax(128)
548 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
549 }
550 }
551
552 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, input_offset) {
553 TEST_REQUIRES_ARM_NEON;
554 for (uint32_t channels = 48; channels < 384; channels += 72) {
555 DWConvMicrokernelTester()
556 .cr(24)
557 .kr(9)
558 .channels(channels)
559 .input_offset(464)
560 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
561 }
562 }
563
564 TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, zero) {
565 TEST_REQUIRES_ARM_NEON;
566 for (uint32_t mz = 0; mz < 9; mz++) {
567 for (uint32_t channels = 48; channels < 384; channels += 72) {
568 DWConvMicrokernelTester()
569 .cr(24)
570 .kr(9)
571 .channels(channels)
572 .input_offset(464)
573 .zero_index(mz)
574 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16);
575 }
576 }
577 }
578#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
579
580
581#if XNN_ARCH_ARM || XNN_ARCH_ARM64
582 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_eq_32) {
583 TEST_REQUIRES_ARM_NEON;
584 DWConvMicrokernelTester()
585 .cr(32)
586 .kr(9)
587 .channels(32)
588 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
589 }
590
591 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_div_32) {
592 TEST_REQUIRES_ARM_NEON;
593 for (uint32_t channels = 64; channels < 512; channels += 96) {
594 DWConvMicrokernelTester()
595 .cr(32)
596 .kr(9)
597 .channels(channels)
598 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
599 }
600 }
601
602 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
603 TEST_REQUIRES_ARM_NEON;
604 for (uint32_t channels = 64; channels < 512; channels += 96) {
605 DWConvMicrokernelTester()
606 .cr(32)
607 .kr(9)
608 .channels(channels)
609 .qmin(128)
610 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
611 }
612 }
613
614 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
615 TEST_REQUIRES_ARM_NEON;
616 for (uint32_t channels = 64; channels < 512; channels += 96) {
617 DWConvMicrokernelTester()
618 .cr(32)
619 .kr(9)
620 .channels(channels)
621 .qmax(128)
622 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
623 }
624 }
625
626 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_lt_32) {
627 TEST_REQUIRES_ARM_NEON;
628 for (uint32_t channels = 1; channels < 32; channels++) {
629 DWConvMicrokernelTester()
630 .cr(32)
631 .kr(9)
632 .channels(channels)
633 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
634 }
635 }
636
637 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_gt_32) {
638 TEST_REQUIRES_ARM_NEON;
639 for (uint32_t channels = 33; channels < 64; channels++) {
640 DWConvMicrokernelTester()
641 .cr(32)
642 .kr(9)
643 .channels(channels)
644 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
645 }
646 }
647
648 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
649 TEST_REQUIRES_ARM_NEON;
650 for (uint32_t channels = 33; channels < 64; channels++) {
651 DWConvMicrokernelTester()
652 .cr(32)
653 .kr(9)
654 .channels(channels)
655 .qmin(128)
656 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
657 }
658 }
659
660 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
661 TEST_REQUIRES_ARM_NEON;
662 for (uint32_t channels = 33; channels < 64; channels++) {
663 DWConvMicrokernelTester()
664 .cr(32)
665 .kr(9)
666 .channels(channels)
667 .qmax(128)
668 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
669 }
670 }
671
672 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel) {
673 TEST_REQUIRES_ARM_NEON;
674 for (size_t channels = 1; channels <= 160; channels += 31) {
675 DWConvMicrokernelTester()
676 .cr(32)
677 .kr(9)
678 .channels(channels)
679 .width(3)
680 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
681 }
682 }
683
684 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_step) {
685 TEST_REQUIRES_ARM_NEON;
686 for (size_t channels = 1; channels <= 160; channels += 31) {
687 for (size_t step = 2; step <= 9; step++) {
688 DWConvMicrokernelTester()
689 .cr(32)
690 .kr(9)
691 .channels(channels)
692 .width(3)
693 .step(step)
694 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
695 }
696 }
697 }
698
699 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
700 TEST_REQUIRES_ARM_NEON;
701 for (size_t channels = 1; channels <= 160; channels += 31) {
702 DWConvMicrokernelTester()
703 .cr(32)
704 .kr(9)
705 .channels(32)
706 .width(5)
707 .output_stride(163)
708 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
709 }
710 }
711
712 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_qmin) {
713 TEST_REQUIRES_ARM_NEON;
714 for (size_t channels = 1; channels <= 160; channels += 31) {
715 DWConvMicrokernelTester()
716 .cr(32)
717 .kr(9)
718 .channels(channels)
719 .width(3)
720 .qmin(128)
721 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
722 }
723 }
724
725 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_qmax) {
726 TEST_REQUIRES_ARM_NEON;
727 for (size_t channels = 1; channels <= 160; channels += 31) {
728 DWConvMicrokernelTester()
729 .cr(32)
730 .kr(9)
731 .channels(channels)
732 .width(3)
733 .qmax(128)
734 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
735 }
736 }
737
738 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, input_offset) {
739 TEST_REQUIRES_ARM_NEON;
740 for (uint32_t channels = 64; channels < 512; channels += 96) {
741 DWConvMicrokernelTester()
742 .cr(32)
743 .kr(9)
744 .channels(channels)
745 .input_offset(592)
746 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
747 }
748 }
749
750 TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, zero) {
751 TEST_REQUIRES_ARM_NEON;
752 for (uint32_t mz = 0; mz < 9; mz++) {
753 for (uint32_t channels = 64; channels < 512; channels += 96) {
754 DWConvMicrokernelTester()
755 .cr(32)
756 .kr(9)
757 .channels(channels)
758 .input_offset(592)
759 .zero_index(mz)
760 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16);
761 }
762 }
763 }
764#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
765
766
Marat Dukhanf62bbdc2020-08-04 13:59:04 -0700767#if XNN_ARCH_X86 || XNN_ARCH_X86_64
768 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_eq_8) {
769 TEST_REQUIRES_X86_SSE2;
770 DWConvMicrokernelTester()
771 .cr(8)
772 .kr(9)
773 .channels(8)
774 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
775 }
776
777 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_div_8) {
778 TEST_REQUIRES_X86_SSE2;
779 for (uint32_t channels = 16; channels < 128; channels += 24) {
780 DWConvMicrokernelTester()
781 .cr(8)
782 .kr(9)
783 .channels(channels)
784 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
785 }
786 }
787
788 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
789 TEST_REQUIRES_X86_SSE2;
790 for (uint32_t channels = 16; channels < 128; channels += 24) {
791 DWConvMicrokernelTester()
792 .cr(8)
793 .kr(9)
794 .channels(channels)
795 .qmin(128)
796 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
797 }
798 }
799
800 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
801 TEST_REQUIRES_X86_SSE2;
802 for (uint32_t channels = 16; channels < 128; channels += 24) {
803 DWConvMicrokernelTester()
804 .cr(8)
805 .kr(9)
806 .channels(channels)
807 .qmax(128)
808 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
809 }
810 }
811
812 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_lt_8) {
813 TEST_REQUIRES_X86_SSE2;
814 for (uint32_t channels = 1; channels < 8; channels++) {
815 DWConvMicrokernelTester()
816 .cr(8)
817 .kr(9)
818 .channels(channels)
819 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
820 }
821 }
822
823 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_gt_8) {
824 TEST_REQUIRES_X86_SSE2;
825 for (uint32_t channels = 9; channels < 16; channels++) {
826 DWConvMicrokernelTester()
827 .cr(8)
828 .kr(9)
829 .channels(channels)
830 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
831 }
832 }
833
834 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
835 TEST_REQUIRES_X86_SSE2;
836 for (uint32_t channels = 9; channels < 16; channels++) {
837 DWConvMicrokernelTester()
838 .cr(8)
839 .kr(9)
840 .channels(channels)
841 .qmin(128)
842 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
843 }
844 }
845
846 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
847 TEST_REQUIRES_X86_SSE2;
848 for (uint32_t channels = 9; channels < 16; channels++) {
849 DWConvMicrokernelTester()
850 .cr(8)
851 .kr(9)
852 .channels(channels)
853 .qmax(128)
854 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
855 }
856 }
857
858 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel) {
859 TEST_REQUIRES_X86_SSE2;
860 for (size_t channels = 1; channels <= 40; channels += 7) {
861 DWConvMicrokernelTester()
862 .cr(8)
863 .kr(9)
864 .channels(channels)
865 .width(3)
866 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
867 }
868 }
869
870 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_step) {
871 TEST_REQUIRES_X86_SSE2;
872 for (size_t channels = 1; channels <= 40; channels += 7) {
873 for (size_t step = 2; step <= 9; step++) {
874 DWConvMicrokernelTester()
875 .cr(8)
876 .kr(9)
877 .channels(channels)
878 .width(3)
879 .step(step)
880 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
881 }
882 }
883 }
884
885 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
886 TEST_REQUIRES_X86_SSE2;
887 for (size_t channels = 1; channels <= 40; channels += 7) {
888 DWConvMicrokernelTester()
889 .cr(8)
890 .kr(9)
891 .channels(8)
892 .width(5)
893 .output_stride(43)
894 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
895 }
896 }
897
898 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
899 TEST_REQUIRES_X86_SSE2;
900 for (size_t channels = 1; channels <= 40; channels += 7) {
901 DWConvMicrokernelTester()
902 .cr(8)
903 .kr(9)
904 .channels(channels)
905 .width(3)
906 .qmin(128)
907 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
908 }
909 }
910
911 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
912 TEST_REQUIRES_X86_SSE2;
913 for (size_t channels = 1; channels <= 40; channels += 7) {
914 DWConvMicrokernelTester()
915 .cr(8)
916 .kr(9)
917 .channels(channels)
918 .width(3)
919 .qmax(128)
920 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
921 }
922 }
923
924 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, input_offset) {
925 TEST_REQUIRES_X86_SSE2;
926 for (uint32_t channels = 16; channels < 128; channels += 24) {
927 DWConvMicrokernelTester()
928 .cr(8)
929 .kr(9)
930 .channels(channels)
931 .input_offset(176)
932 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
933 }
934 }
935
936 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, zero) {
937 TEST_REQUIRES_X86_SSE2;
938 for (uint32_t mz = 0; mz < 9; mz++) {
939 for (uint32_t channels = 16; channels < 128; channels += 24) {
940 DWConvMicrokernelTester()
941 .cr(8)
942 .kr(9)
943 .channels(channels)
944 .input_offset(176)
945 .zero_index(mz)
946 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16);
947 }
948 }
949 }
950#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
951
952
953#if XNN_ARCH_X86 || XNN_ARCH_X86_64
954 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_eq_16) {
955 TEST_REQUIRES_X86_SSE2;
956 DWConvMicrokernelTester()
957 .cr(16)
958 .kr(9)
959 .channels(16)
960 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
961 }
962
963 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_div_16) {
964 TEST_REQUIRES_X86_SSE2;
965 for (uint32_t channels = 32; channels < 256; channels += 48) {
966 DWConvMicrokernelTester()
967 .cr(16)
968 .kr(9)
969 .channels(channels)
970 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
971 }
972 }
973
974 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
975 TEST_REQUIRES_X86_SSE2;
976 for (uint32_t channels = 32; channels < 256; channels += 48) {
977 DWConvMicrokernelTester()
978 .cr(16)
979 .kr(9)
980 .channels(channels)
981 .qmin(128)
982 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
983 }
984 }
985
986 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
987 TEST_REQUIRES_X86_SSE2;
988 for (uint32_t channels = 32; channels < 256; channels += 48) {
989 DWConvMicrokernelTester()
990 .cr(16)
991 .kr(9)
992 .channels(channels)
993 .qmax(128)
994 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
995 }
996 }
997
998 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_lt_16) {
999 TEST_REQUIRES_X86_SSE2;
1000 for (uint32_t channels = 1; channels < 16; channels++) {
1001 DWConvMicrokernelTester()
1002 .cr(16)
1003 .kr(9)
1004 .channels(channels)
1005 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1006 }
1007 }
1008
1009 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_gt_16) {
1010 TEST_REQUIRES_X86_SSE2;
1011 for (uint32_t channels = 17; channels < 32; channels++) {
1012 DWConvMicrokernelTester()
1013 .cr(16)
1014 .kr(9)
1015 .channels(channels)
1016 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1017 }
1018 }
1019
1020 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
1021 TEST_REQUIRES_X86_SSE2;
1022 for (uint32_t channels = 17; channels < 32; channels++) {
1023 DWConvMicrokernelTester()
1024 .cr(16)
1025 .kr(9)
1026 .channels(channels)
1027 .qmin(128)
1028 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1029 }
1030 }
1031
1032 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
1033 TEST_REQUIRES_X86_SSE2;
1034 for (uint32_t channels = 17; channels < 32; channels++) {
1035 DWConvMicrokernelTester()
1036 .cr(16)
1037 .kr(9)
1038 .channels(channels)
1039 .qmax(128)
1040 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1041 }
1042 }
1043
1044 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel) {
1045 TEST_REQUIRES_X86_SSE2;
1046 for (size_t channels = 1; channels <= 80; channels += 15) {
1047 DWConvMicrokernelTester()
1048 .cr(16)
1049 .kr(9)
1050 .channels(channels)
1051 .width(3)
1052 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1053 }
1054 }
1055
1056 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_step) {
1057 TEST_REQUIRES_X86_SSE2;
1058 for (size_t channels = 1; channels <= 80; channels += 15) {
1059 for (size_t step = 2; step <= 9; step++) {
1060 DWConvMicrokernelTester()
1061 .cr(16)
1062 .kr(9)
1063 .channels(channels)
1064 .width(3)
1065 .step(step)
1066 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1067 }
1068 }
1069 }
1070
1071 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
1072 TEST_REQUIRES_X86_SSE2;
1073 for (size_t channels = 1; channels <= 80; channels += 15) {
1074 DWConvMicrokernelTester()
1075 .cr(16)
1076 .kr(9)
1077 .channels(16)
1078 .width(5)
1079 .output_stride(83)
1080 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1081 }
1082 }
1083
1084 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
1085 TEST_REQUIRES_X86_SSE2;
1086 for (size_t channels = 1; channels <= 80; channels += 15) {
1087 DWConvMicrokernelTester()
1088 .cr(16)
1089 .kr(9)
1090 .channels(channels)
1091 .width(3)
1092 .qmin(128)
1093 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1094 }
1095 }
1096
1097 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
1098 TEST_REQUIRES_X86_SSE2;
1099 for (size_t channels = 1; channels <= 80; channels += 15) {
1100 DWConvMicrokernelTester()
1101 .cr(16)
1102 .kr(9)
1103 .channels(channels)
1104 .width(3)
1105 .qmax(128)
1106 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1107 }
1108 }
1109
1110 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, input_offset) {
1111 TEST_REQUIRES_X86_SSE2;
1112 for (uint32_t channels = 32; channels < 256; channels += 48) {
1113 DWConvMicrokernelTester()
1114 .cr(16)
1115 .kr(9)
1116 .channels(channels)
1117 .input_offset(304)
1118 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1119 }
1120 }
1121
1122 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, zero) {
1123 TEST_REQUIRES_X86_SSE2;
1124 for (uint32_t mz = 0; mz < 9; mz++) {
1125 for (uint32_t channels = 32; channels < 256; channels += 48) {
1126 DWConvMicrokernelTester()
1127 .cr(16)
1128 .kr(9)
1129 .channels(channels)
1130 .input_offset(304)
1131 .zero_index(mz)
1132 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16);
1133 }
1134 }
1135 }
1136#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1137
1138
1139#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1140 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_eq_24) {
1141 TEST_REQUIRES_X86_SSE2;
1142 DWConvMicrokernelTester()
1143 .cr(24)
1144 .kr(9)
1145 .channels(24)
1146 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1147 }
1148
1149 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_div_24) {
1150 TEST_REQUIRES_X86_SSE2;
1151 for (uint32_t channels = 48; channels < 384; channels += 72) {
1152 DWConvMicrokernelTester()
1153 .cr(24)
1154 .kr(9)
1155 .channels(channels)
1156 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1157 }
1158 }
1159
1160 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_div_24_with_qmin) {
1161 TEST_REQUIRES_X86_SSE2;
1162 for (uint32_t channels = 48; channels < 384; channels += 72) {
1163 DWConvMicrokernelTester()
1164 .cr(24)
1165 .kr(9)
1166 .channels(channels)
1167 .qmin(128)
1168 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1169 }
1170 }
1171
1172 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_div_24_with_qmax) {
1173 TEST_REQUIRES_X86_SSE2;
1174 for (uint32_t channels = 48; channels < 384; channels += 72) {
1175 DWConvMicrokernelTester()
1176 .cr(24)
1177 .kr(9)
1178 .channels(channels)
1179 .qmax(128)
1180 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1181 }
1182 }
1183
1184 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_lt_24) {
1185 TEST_REQUIRES_X86_SSE2;
1186 for (uint32_t channels = 1; channels < 24; channels++) {
1187 DWConvMicrokernelTester()
1188 .cr(24)
1189 .kr(9)
1190 .channels(channels)
1191 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1192 }
1193 }
1194
1195 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_gt_24) {
1196 TEST_REQUIRES_X86_SSE2;
1197 for (uint32_t channels = 25; channels < 48; channels++) {
1198 DWConvMicrokernelTester()
1199 .cr(24)
1200 .kr(9)
1201 .channels(channels)
1202 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1203 }
1204 }
1205
1206 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_gt_24_with_qmin) {
1207 TEST_REQUIRES_X86_SSE2;
1208 for (uint32_t channels = 25; channels < 48; channels++) {
1209 DWConvMicrokernelTester()
1210 .cr(24)
1211 .kr(9)
1212 .channels(channels)
1213 .qmin(128)
1214 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1215 }
1216 }
1217
1218 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_gt_24_with_qmax) {
1219 TEST_REQUIRES_X86_SSE2;
1220 for (uint32_t channels = 25; channels < 48; channels++) {
1221 DWConvMicrokernelTester()
1222 .cr(24)
1223 .kr(9)
1224 .channels(channels)
1225 .qmax(128)
1226 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1227 }
1228 }
1229
1230 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel) {
1231 TEST_REQUIRES_X86_SSE2;
1232 for (size_t channels = 1; channels <= 120; channels += 23) {
1233 DWConvMicrokernelTester()
1234 .cr(24)
1235 .kr(9)
1236 .channels(channels)
1237 .width(3)
1238 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1239 }
1240 }
1241
1242 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_step) {
1243 TEST_REQUIRES_X86_SSE2;
1244 for (size_t channels = 1; channels <= 120; channels += 23) {
1245 for (size_t step = 2; step <= 9; step++) {
1246 DWConvMicrokernelTester()
1247 .cr(24)
1248 .kr(9)
1249 .channels(channels)
1250 .width(3)
1251 .step(step)
1252 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1253 }
1254 }
1255 }
1256
1257 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_output_stride) {
1258 TEST_REQUIRES_X86_SSE2;
1259 for (size_t channels = 1; channels <= 120; channels += 23) {
1260 DWConvMicrokernelTester()
1261 .cr(24)
1262 .kr(9)
1263 .channels(24)
1264 .width(5)
1265 .output_stride(127)
1266 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1267 }
1268 }
1269
1270 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_qmin) {
1271 TEST_REQUIRES_X86_SSE2;
1272 for (size_t channels = 1; channels <= 120; channels += 23) {
1273 DWConvMicrokernelTester()
1274 .cr(24)
1275 .kr(9)
1276 .channels(channels)
1277 .width(3)
1278 .qmin(128)
1279 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1280 }
1281 }
1282
1283 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_qmax) {
1284 TEST_REQUIRES_X86_SSE2;
1285 for (size_t channels = 1; channels <= 120; channels += 23) {
1286 DWConvMicrokernelTester()
1287 .cr(24)
1288 .kr(9)
1289 .channels(channels)
1290 .width(3)
1291 .qmax(128)
1292 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1293 }
1294 }
1295
1296 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, input_offset) {
1297 TEST_REQUIRES_X86_SSE2;
1298 for (uint32_t channels = 48; channels < 384; channels += 72) {
1299 DWConvMicrokernelTester()
1300 .cr(24)
1301 .kr(9)
1302 .channels(channels)
1303 .input_offset(464)
1304 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1305 }
1306 }
1307
1308 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, zero) {
1309 TEST_REQUIRES_X86_SSE2;
1310 for (uint32_t mz = 0; mz < 9; mz++) {
1311 for (uint32_t channels = 48; channels < 384; channels += 72) {
1312 DWConvMicrokernelTester()
1313 .cr(24)
1314 .kr(9)
1315 .channels(channels)
1316 .input_offset(464)
1317 .zero_index(mz)
1318 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16);
1319 }
1320 }
1321 }
1322#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1323
1324
1325#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1326 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_eq_8) {
1327 TEST_REQUIRES_X86_SSSE3;
1328 DWConvMicrokernelTester()
1329 .cr(8)
1330 .kr(9)
1331 .channels(8)
1332 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1333 }
1334
1335 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_div_8) {
1336 TEST_REQUIRES_X86_SSSE3;
1337 for (uint32_t channels = 16; channels < 128; channels += 24) {
1338 DWConvMicrokernelTester()
1339 .cr(8)
1340 .kr(9)
1341 .channels(channels)
1342 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1343 }
1344 }
1345
1346 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_div_8_with_qmin) {
1347 TEST_REQUIRES_X86_SSSE3;
1348 for (uint32_t channels = 16; channels < 128; channels += 24) {
1349 DWConvMicrokernelTester()
1350 .cr(8)
1351 .kr(9)
1352 .channels(channels)
1353 .qmin(128)
1354 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1355 }
1356 }
1357
1358 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_div_8_with_qmax) {
1359 TEST_REQUIRES_X86_SSSE3;
1360 for (uint32_t channels = 16; channels < 128; channels += 24) {
1361 DWConvMicrokernelTester()
1362 .cr(8)
1363 .kr(9)
1364 .channels(channels)
1365 .qmax(128)
1366 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1367 }
1368 }
1369
1370 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_lt_8) {
1371 TEST_REQUIRES_X86_SSSE3;
1372 for (uint32_t channels = 1; channels < 8; channels++) {
1373 DWConvMicrokernelTester()
1374 .cr(8)
1375 .kr(9)
1376 .channels(channels)
1377 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1378 }
1379 }
1380
1381 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_gt_8) {
1382 TEST_REQUIRES_X86_SSSE3;
1383 for (uint32_t channels = 9; channels < 16; channels++) {
1384 DWConvMicrokernelTester()
1385 .cr(8)
1386 .kr(9)
1387 .channels(channels)
1388 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1389 }
1390 }
1391
1392 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_gt_8_with_qmin) {
1393 TEST_REQUIRES_X86_SSSE3;
1394 for (uint32_t channels = 9; channels < 16; channels++) {
1395 DWConvMicrokernelTester()
1396 .cr(8)
1397 .kr(9)
1398 .channels(channels)
1399 .qmin(128)
1400 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1401 }
1402 }
1403
1404 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_gt_8_with_qmax) {
1405 TEST_REQUIRES_X86_SSSE3;
1406 for (uint32_t channels = 9; channels < 16; channels++) {
1407 DWConvMicrokernelTester()
1408 .cr(8)
1409 .kr(9)
1410 .channels(channels)
1411 .qmax(128)
1412 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1413 }
1414 }
1415
1416 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel) {
1417 TEST_REQUIRES_X86_SSSE3;
1418 for (size_t channels = 1; channels <= 40; channels += 7) {
1419 DWConvMicrokernelTester()
1420 .cr(8)
1421 .kr(9)
1422 .channels(channels)
1423 .width(3)
1424 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1425 }
1426 }
1427
1428 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_step) {
1429 TEST_REQUIRES_X86_SSSE3;
1430 for (size_t channels = 1; channels <= 40; channels += 7) {
1431 for (size_t step = 2; step <= 9; step++) {
1432 DWConvMicrokernelTester()
1433 .cr(8)
1434 .kr(9)
1435 .channels(channels)
1436 .width(3)
1437 .step(step)
1438 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1439 }
1440 }
1441 }
1442
1443 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_output_stride) {
1444 TEST_REQUIRES_X86_SSSE3;
1445 for (size_t channels = 1; channels <= 40; channels += 7) {
1446 DWConvMicrokernelTester()
1447 .cr(8)
1448 .kr(9)
1449 .channels(8)
1450 .width(5)
1451 .output_stride(43)
1452 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1453 }
1454 }
1455
1456 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_qmin) {
1457 TEST_REQUIRES_X86_SSSE3;
1458 for (size_t channels = 1; channels <= 40; channels += 7) {
1459 DWConvMicrokernelTester()
1460 .cr(8)
1461 .kr(9)
1462 .channels(channels)
1463 .width(3)
1464 .qmin(128)
1465 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1466 }
1467 }
1468
1469 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_qmax) {
1470 TEST_REQUIRES_X86_SSSE3;
1471 for (size_t channels = 1; channels <= 40; channels += 7) {
1472 DWConvMicrokernelTester()
1473 .cr(8)
1474 .kr(9)
1475 .channels(channels)
1476 .width(3)
1477 .qmax(128)
1478 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1479 }
1480 }
1481
1482 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, input_offset) {
1483 TEST_REQUIRES_X86_SSSE3;
1484 for (uint32_t channels = 16; channels < 128; channels += 24) {
1485 DWConvMicrokernelTester()
1486 .cr(8)
1487 .kr(9)
1488 .channels(channels)
1489 .input_offset(176)
1490 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1491 }
1492 }
1493
1494 TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, zero) {
1495 TEST_REQUIRES_X86_SSSE3;
1496 for (uint32_t mz = 0; mz < 9; mz++) {
1497 for (uint32_t channels = 16; channels < 128; channels += 24) {
1498 DWConvMicrokernelTester()
1499 .cr(8)
1500 .kr(9)
1501 .channels(channels)
1502 .input_offset(176)
1503 .zero_index(mz)
1504 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16);
1505 }
1506 }
1507 }
1508#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1509
1510
1511#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1512 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_eq_16) {
1513 TEST_REQUIRES_X86_SSSE3;
1514 DWConvMicrokernelTester()
1515 .cr(16)
1516 .kr(9)
1517 .channels(16)
1518 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1519 }
1520
1521 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_div_16) {
1522 TEST_REQUIRES_X86_SSSE3;
1523 for (uint32_t channels = 32; channels < 256; channels += 48) {
1524 DWConvMicrokernelTester()
1525 .cr(16)
1526 .kr(9)
1527 .channels(channels)
1528 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1529 }
1530 }
1531
1532 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_div_16_with_qmin) {
1533 TEST_REQUIRES_X86_SSSE3;
1534 for (uint32_t channels = 32; channels < 256; channels += 48) {
1535 DWConvMicrokernelTester()
1536 .cr(16)
1537 .kr(9)
1538 .channels(channels)
1539 .qmin(128)
1540 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1541 }
1542 }
1543
1544 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_div_16_with_qmax) {
1545 TEST_REQUIRES_X86_SSSE3;
1546 for (uint32_t channels = 32; channels < 256; channels += 48) {
1547 DWConvMicrokernelTester()
1548 .cr(16)
1549 .kr(9)
1550 .channels(channels)
1551 .qmax(128)
1552 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1553 }
1554 }
1555
1556 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_lt_16) {
1557 TEST_REQUIRES_X86_SSSE3;
1558 for (uint32_t channels = 1; channels < 16; channels++) {
1559 DWConvMicrokernelTester()
1560 .cr(16)
1561 .kr(9)
1562 .channels(channels)
1563 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1564 }
1565 }
1566
1567 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_gt_16) {
1568 TEST_REQUIRES_X86_SSSE3;
1569 for (uint32_t channels = 17; channels < 32; channels++) {
1570 DWConvMicrokernelTester()
1571 .cr(16)
1572 .kr(9)
1573 .channels(channels)
1574 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1575 }
1576 }
1577
1578 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_gt_16_with_qmin) {
1579 TEST_REQUIRES_X86_SSSE3;
1580 for (uint32_t channels = 17; channels < 32; channels++) {
1581 DWConvMicrokernelTester()
1582 .cr(16)
1583 .kr(9)
1584 .channels(channels)
1585 .qmin(128)
1586 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1587 }
1588 }
1589
1590 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_gt_16_with_qmax) {
1591 TEST_REQUIRES_X86_SSSE3;
1592 for (uint32_t channels = 17; channels < 32; channels++) {
1593 DWConvMicrokernelTester()
1594 .cr(16)
1595 .kr(9)
1596 .channels(channels)
1597 .qmax(128)
1598 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1599 }
1600 }
1601
1602 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel) {
1603 TEST_REQUIRES_X86_SSSE3;
1604 for (size_t channels = 1; channels <= 80; channels += 15) {
1605 DWConvMicrokernelTester()
1606 .cr(16)
1607 .kr(9)
1608 .channels(channels)
1609 .width(3)
1610 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1611 }
1612 }
1613
1614 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_step) {
1615 TEST_REQUIRES_X86_SSSE3;
1616 for (size_t channels = 1; channels <= 80; channels += 15) {
1617 for (size_t step = 2; step <= 9; step++) {
1618 DWConvMicrokernelTester()
1619 .cr(16)
1620 .kr(9)
1621 .channels(channels)
1622 .width(3)
1623 .step(step)
1624 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1625 }
1626 }
1627 }
1628
1629 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_output_stride) {
1630 TEST_REQUIRES_X86_SSSE3;
1631 for (size_t channels = 1; channels <= 80; channels += 15) {
1632 DWConvMicrokernelTester()
1633 .cr(16)
1634 .kr(9)
1635 .channels(16)
1636 .width(5)
1637 .output_stride(83)
1638 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1639 }
1640 }
1641
1642 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_qmin) {
1643 TEST_REQUIRES_X86_SSSE3;
1644 for (size_t channels = 1; channels <= 80; channels += 15) {
1645 DWConvMicrokernelTester()
1646 .cr(16)
1647 .kr(9)
1648 .channels(channels)
1649 .width(3)
1650 .qmin(128)
1651 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1652 }
1653 }
1654
1655 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_qmax) {
1656 TEST_REQUIRES_X86_SSSE3;
1657 for (size_t channels = 1; channels <= 80; channels += 15) {
1658 DWConvMicrokernelTester()
1659 .cr(16)
1660 .kr(9)
1661 .channels(channels)
1662 .width(3)
1663 .qmax(128)
1664 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1665 }
1666 }
1667
1668 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, input_offset) {
1669 TEST_REQUIRES_X86_SSSE3;
1670 for (uint32_t channels = 32; channels < 256; channels += 48) {
1671 DWConvMicrokernelTester()
1672 .cr(16)
1673 .kr(9)
1674 .channels(channels)
1675 .input_offset(304)
1676 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1677 }
1678 }
1679
1680 TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, zero) {
1681 TEST_REQUIRES_X86_SSSE3;
1682 for (uint32_t mz = 0; mz < 9; mz++) {
1683 for (uint32_t channels = 32; channels < 256; channels += 48) {
1684 DWConvMicrokernelTester()
1685 .cr(16)
1686 .kr(9)
1687 .channels(channels)
1688 .input_offset(304)
1689 .zero_index(mz)
1690 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16);
1691 }
1692 }
1693 }
1694#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1695
1696
1697#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1698 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_eq_24) {
1699 TEST_REQUIRES_X86_SSSE3;
1700 DWConvMicrokernelTester()
1701 .cr(24)
1702 .kr(9)
1703 .channels(24)
1704 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1705 }
1706
1707 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_div_24) {
1708 TEST_REQUIRES_X86_SSSE3;
1709 for (uint32_t channels = 48; channels < 384; channels += 72) {
1710 DWConvMicrokernelTester()
1711 .cr(24)
1712 .kr(9)
1713 .channels(channels)
1714 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1715 }
1716 }
1717
1718 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_div_24_with_qmin) {
1719 TEST_REQUIRES_X86_SSSE3;
1720 for (uint32_t channels = 48; channels < 384; channels += 72) {
1721 DWConvMicrokernelTester()
1722 .cr(24)
1723 .kr(9)
1724 .channels(channels)
1725 .qmin(128)
1726 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1727 }
1728 }
1729
1730 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_div_24_with_qmax) {
1731 TEST_REQUIRES_X86_SSSE3;
1732 for (uint32_t channels = 48; channels < 384; channels += 72) {
1733 DWConvMicrokernelTester()
1734 .cr(24)
1735 .kr(9)
1736 .channels(channels)
1737 .qmax(128)
1738 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1739 }
1740 }
1741
1742 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_lt_24) {
1743 TEST_REQUIRES_X86_SSSE3;
1744 for (uint32_t channels = 1; channels < 24; channels++) {
1745 DWConvMicrokernelTester()
1746 .cr(24)
1747 .kr(9)
1748 .channels(channels)
1749 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1750 }
1751 }
1752
1753 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_gt_24) {
1754 TEST_REQUIRES_X86_SSSE3;
1755 for (uint32_t channels = 25; channels < 48; channels++) {
1756 DWConvMicrokernelTester()
1757 .cr(24)
1758 .kr(9)
1759 .channels(channels)
1760 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1761 }
1762 }
1763
1764 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_gt_24_with_qmin) {
1765 TEST_REQUIRES_X86_SSSE3;
1766 for (uint32_t channels = 25; channels < 48; channels++) {
1767 DWConvMicrokernelTester()
1768 .cr(24)
1769 .kr(9)
1770 .channels(channels)
1771 .qmin(128)
1772 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1773 }
1774 }
1775
1776 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_gt_24_with_qmax) {
1777 TEST_REQUIRES_X86_SSSE3;
1778 for (uint32_t channels = 25; channels < 48; channels++) {
1779 DWConvMicrokernelTester()
1780 .cr(24)
1781 .kr(9)
1782 .channels(channels)
1783 .qmax(128)
1784 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1785 }
1786 }
1787
1788 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel) {
1789 TEST_REQUIRES_X86_SSSE3;
1790 for (size_t channels = 1; channels <= 120; channels += 23) {
1791 DWConvMicrokernelTester()
1792 .cr(24)
1793 .kr(9)
1794 .channels(channels)
1795 .width(3)
1796 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1797 }
1798 }
1799
1800 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_step) {
1801 TEST_REQUIRES_X86_SSSE3;
1802 for (size_t channels = 1; channels <= 120; channels += 23) {
1803 for (size_t step = 2; step <= 9; step++) {
1804 DWConvMicrokernelTester()
1805 .cr(24)
1806 .kr(9)
1807 .channels(channels)
1808 .width(3)
1809 .step(step)
1810 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1811 }
1812 }
1813 }
1814
1815 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_output_stride) {
1816 TEST_REQUIRES_X86_SSSE3;
1817 for (size_t channels = 1; channels <= 120; channels += 23) {
1818 DWConvMicrokernelTester()
1819 .cr(24)
1820 .kr(9)
1821 .channels(24)
1822 .width(5)
1823 .output_stride(127)
1824 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1825 }
1826 }
1827
1828 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_qmin) {
1829 TEST_REQUIRES_X86_SSSE3;
1830 for (size_t channels = 1; channels <= 120; channels += 23) {
1831 DWConvMicrokernelTester()
1832 .cr(24)
1833 .kr(9)
1834 .channels(channels)
1835 .width(3)
1836 .qmin(128)
1837 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1838 }
1839 }
1840
1841 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_qmax) {
1842 TEST_REQUIRES_X86_SSSE3;
1843 for (size_t channels = 1; channels <= 120; channels += 23) {
1844 DWConvMicrokernelTester()
1845 .cr(24)
1846 .kr(9)
1847 .channels(channels)
1848 .width(3)
1849 .qmax(128)
1850 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1851 }
1852 }
1853
1854 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, input_offset) {
1855 TEST_REQUIRES_X86_SSSE3;
1856 for (uint32_t channels = 48; channels < 384; channels += 72) {
1857 DWConvMicrokernelTester()
1858 .cr(24)
1859 .kr(9)
1860 .channels(channels)
1861 .input_offset(464)
1862 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1863 }
1864 }
1865
1866 TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, zero) {
1867 TEST_REQUIRES_X86_SSSE3;
1868 for (uint32_t mz = 0; mz < 9; mz++) {
1869 for (uint32_t channels = 48; channels < 384; channels += 72) {
1870 DWConvMicrokernelTester()
1871 .cr(24)
1872 .kr(9)
1873 .channels(channels)
1874 .input_offset(464)
1875 .zero_index(mz)
1876 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16);
1877 }
1878 }
1879 }
1880#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1881
1882
1883#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1884 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_eq_8) {
1885 TEST_REQUIRES_X86_SSE41;
1886 DWConvMicrokernelTester()
1887 .cr(8)
1888 .kr(9)
1889 .channels(8)
1890 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1891 }
1892
1893 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_div_8) {
1894 TEST_REQUIRES_X86_SSE41;
1895 for (uint32_t channels = 16; channels < 128; channels += 24) {
1896 DWConvMicrokernelTester()
1897 .cr(8)
1898 .kr(9)
1899 .channels(channels)
1900 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1901 }
1902 }
1903
1904 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
1905 TEST_REQUIRES_X86_SSE41;
1906 for (uint32_t channels = 16; channels < 128; channels += 24) {
1907 DWConvMicrokernelTester()
1908 .cr(8)
1909 .kr(9)
1910 .channels(channels)
1911 .qmin(128)
1912 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1913 }
1914 }
1915
1916 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
1917 TEST_REQUIRES_X86_SSE41;
1918 for (uint32_t channels = 16; channels < 128; channels += 24) {
1919 DWConvMicrokernelTester()
1920 .cr(8)
1921 .kr(9)
1922 .channels(channels)
1923 .qmax(128)
1924 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1925 }
1926 }
1927
1928 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_lt_8) {
1929 TEST_REQUIRES_X86_SSE41;
1930 for (uint32_t channels = 1; channels < 8; channels++) {
1931 DWConvMicrokernelTester()
1932 .cr(8)
1933 .kr(9)
1934 .channels(channels)
1935 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1936 }
1937 }
1938
1939 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_gt_8) {
1940 TEST_REQUIRES_X86_SSE41;
1941 for (uint32_t channels = 9; channels < 16; channels++) {
1942 DWConvMicrokernelTester()
1943 .cr(8)
1944 .kr(9)
1945 .channels(channels)
1946 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1947 }
1948 }
1949
1950 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
1951 TEST_REQUIRES_X86_SSE41;
1952 for (uint32_t channels = 9; channels < 16; channels++) {
1953 DWConvMicrokernelTester()
1954 .cr(8)
1955 .kr(9)
1956 .channels(channels)
1957 .qmin(128)
1958 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1959 }
1960 }
1961
1962 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
1963 TEST_REQUIRES_X86_SSE41;
1964 for (uint32_t channels = 9; channels < 16; channels++) {
1965 DWConvMicrokernelTester()
1966 .cr(8)
1967 .kr(9)
1968 .channels(channels)
1969 .qmax(128)
1970 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1971 }
1972 }
1973
1974 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel) {
1975 TEST_REQUIRES_X86_SSE41;
1976 for (size_t channels = 1; channels <= 40; channels += 7) {
1977 DWConvMicrokernelTester()
1978 .cr(8)
1979 .kr(9)
1980 .channels(channels)
1981 .width(3)
1982 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1983 }
1984 }
1985
1986 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_step) {
1987 TEST_REQUIRES_X86_SSE41;
1988 for (size_t channels = 1; channels <= 40; channels += 7) {
1989 for (size_t step = 2; step <= 9; step++) {
1990 DWConvMicrokernelTester()
1991 .cr(8)
1992 .kr(9)
1993 .channels(channels)
1994 .width(3)
1995 .step(step)
1996 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
1997 }
1998 }
1999 }
2000
2001 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
2002 TEST_REQUIRES_X86_SSE41;
2003 for (size_t channels = 1; channels <= 40; channels += 7) {
2004 DWConvMicrokernelTester()
2005 .cr(8)
2006 .kr(9)
2007 .channels(8)
2008 .width(5)
2009 .output_stride(43)
2010 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2011 }
2012 }
2013
2014 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
2015 TEST_REQUIRES_X86_SSE41;
2016 for (size_t channels = 1; channels <= 40; channels += 7) {
2017 DWConvMicrokernelTester()
2018 .cr(8)
2019 .kr(9)
2020 .channels(channels)
2021 .width(3)
2022 .qmin(128)
2023 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2024 }
2025 }
2026
2027 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
2028 TEST_REQUIRES_X86_SSE41;
2029 for (size_t channels = 1; channels <= 40; channels += 7) {
2030 DWConvMicrokernelTester()
2031 .cr(8)
2032 .kr(9)
2033 .channels(channels)
2034 .width(3)
2035 .qmax(128)
2036 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2037 }
2038 }
2039
2040 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, input_offset) {
2041 TEST_REQUIRES_X86_SSE41;
2042 for (uint32_t channels = 16; channels < 128; channels += 24) {
2043 DWConvMicrokernelTester()
2044 .cr(8)
2045 .kr(9)
2046 .channels(channels)
2047 .input_offset(176)
2048 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2049 }
2050 }
2051
2052 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, zero) {
2053 TEST_REQUIRES_X86_SSE41;
2054 for (uint32_t mz = 0; mz < 9; mz++) {
2055 for (uint32_t channels = 16; channels < 128; channels += 24) {
2056 DWConvMicrokernelTester()
2057 .cr(8)
2058 .kr(9)
2059 .channels(channels)
2060 .input_offset(176)
2061 .zero_index(mz)
2062 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16);
2063 }
2064 }
2065 }
2066#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2067
2068
2069#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2070 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_eq_16) {
2071 TEST_REQUIRES_X86_SSE41;
2072 DWConvMicrokernelTester()
2073 .cr(16)
2074 .kr(9)
2075 .channels(16)
2076 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2077 }
2078
2079 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_div_16) {
2080 TEST_REQUIRES_X86_SSE41;
2081 for (uint32_t channels = 32; channels < 256; channels += 48) {
2082 DWConvMicrokernelTester()
2083 .cr(16)
2084 .kr(9)
2085 .channels(channels)
2086 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2087 }
2088 }
2089
2090 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
2091 TEST_REQUIRES_X86_SSE41;
2092 for (uint32_t channels = 32; channels < 256; channels += 48) {
2093 DWConvMicrokernelTester()
2094 .cr(16)
2095 .kr(9)
2096 .channels(channels)
2097 .qmin(128)
2098 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2099 }
2100 }
2101
2102 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
2103 TEST_REQUIRES_X86_SSE41;
2104 for (uint32_t channels = 32; channels < 256; channels += 48) {
2105 DWConvMicrokernelTester()
2106 .cr(16)
2107 .kr(9)
2108 .channels(channels)
2109 .qmax(128)
2110 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2111 }
2112 }
2113
2114 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_lt_16) {
2115 TEST_REQUIRES_X86_SSE41;
2116 for (uint32_t channels = 1; channels < 16; channels++) {
2117 DWConvMicrokernelTester()
2118 .cr(16)
2119 .kr(9)
2120 .channels(channels)
2121 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2122 }
2123 }
2124
2125 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_gt_16) {
2126 TEST_REQUIRES_X86_SSE41;
2127 for (uint32_t channels = 17; channels < 32; channels++) {
2128 DWConvMicrokernelTester()
2129 .cr(16)
2130 .kr(9)
2131 .channels(channels)
2132 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2133 }
2134 }
2135
2136 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
2137 TEST_REQUIRES_X86_SSE41;
2138 for (uint32_t channels = 17; channels < 32; channels++) {
2139 DWConvMicrokernelTester()
2140 .cr(16)
2141 .kr(9)
2142 .channels(channels)
2143 .qmin(128)
2144 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2145 }
2146 }
2147
2148 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
2149 TEST_REQUIRES_X86_SSE41;
2150 for (uint32_t channels = 17; channels < 32; channels++) {
2151 DWConvMicrokernelTester()
2152 .cr(16)
2153 .kr(9)
2154 .channels(channels)
2155 .qmax(128)
2156 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2157 }
2158 }
2159
2160 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel) {
2161 TEST_REQUIRES_X86_SSE41;
2162 for (size_t channels = 1; channels <= 80; channels += 15) {
2163 DWConvMicrokernelTester()
2164 .cr(16)
2165 .kr(9)
2166 .channels(channels)
2167 .width(3)
2168 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2169 }
2170 }
2171
2172 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_step) {
2173 TEST_REQUIRES_X86_SSE41;
2174 for (size_t channels = 1; channels <= 80; channels += 15) {
2175 for (size_t step = 2; step <= 9; step++) {
2176 DWConvMicrokernelTester()
2177 .cr(16)
2178 .kr(9)
2179 .channels(channels)
2180 .width(3)
2181 .step(step)
2182 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2183 }
2184 }
2185 }
2186
2187 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
2188 TEST_REQUIRES_X86_SSE41;
2189 for (size_t channels = 1; channels <= 80; channels += 15) {
2190 DWConvMicrokernelTester()
2191 .cr(16)
2192 .kr(9)
2193 .channels(16)
2194 .width(5)
2195 .output_stride(83)
2196 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2197 }
2198 }
2199
2200 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
2201 TEST_REQUIRES_X86_SSE41;
2202 for (size_t channels = 1; channels <= 80; channels += 15) {
2203 DWConvMicrokernelTester()
2204 .cr(16)
2205 .kr(9)
2206 .channels(channels)
2207 .width(3)
2208 .qmin(128)
2209 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2210 }
2211 }
2212
2213 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
2214 TEST_REQUIRES_X86_SSE41;
2215 for (size_t channels = 1; channels <= 80; channels += 15) {
2216 DWConvMicrokernelTester()
2217 .cr(16)
2218 .kr(9)
2219 .channels(channels)
2220 .width(3)
2221 .qmax(128)
2222 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2223 }
2224 }
2225
2226 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, input_offset) {
2227 TEST_REQUIRES_X86_SSE41;
2228 for (uint32_t channels = 32; channels < 256; channels += 48) {
2229 DWConvMicrokernelTester()
2230 .cr(16)
2231 .kr(9)
2232 .channels(channels)
2233 .input_offset(304)
2234 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2235 }
2236 }
2237
2238 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, zero) {
2239 TEST_REQUIRES_X86_SSE41;
2240 for (uint32_t mz = 0; mz < 9; mz++) {
2241 for (uint32_t channels = 32; channels < 256; channels += 48) {
2242 DWConvMicrokernelTester()
2243 .cr(16)
2244 .kr(9)
2245 .channels(channels)
2246 .input_offset(304)
2247 .zero_index(mz)
2248 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16);
2249 }
2250 }
2251 }
2252#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2253
2254
2255#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2256 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_eq_24) {
2257 TEST_REQUIRES_X86_SSE41;
2258 DWConvMicrokernelTester()
2259 .cr(24)
2260 .kr(9)
2261 .channels(24)
2262 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2263 }
2264
2265 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_div_24) {
2266 TEST_REQUIRES_X86_SSE41;
2267 for (uint32_t channels = 48; channels < 384; channels += 72) {
2268 DWConvMicrokernelTester()
2269 .cr(24)
2270 .kr(9)
2271 .channels(channels)
2272 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2273 }
2274 }
2275
2276 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_div_24_with_qmin) {
2277 TEST_REQUIRES_X86_SSE41;
2278 for (uint32_t channels = 48; channels < 384; channels += 72) {
2279 DWConvMicrokernelTester()
2280 .cr(24)
2281 .kr(9)
2282 .channels(channels)
2283 .qmin(128)
2284 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2285 }
2286 }
2287
2288 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_div_24_with_qmax) {
2289 TEST_REQUIRES_X86_SSE41;
2290 for (uint32_t channels = 48; channels < 384; channels += 72) {
2291 DWConvMicrokernelTester()
2292 .cr(24)
2293 .kr(9)
2294 .channels(channels)
2295 .qmax(128)
2296 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2297 }
2298 }
2299
2300 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_lt_24) {
2301 TEST_REQUIRES_X86_SSE41;
2302 for (uint32_t channels = 1; channels < 24; channels++) {
2303 DWConvMicrokernelTester()
2304 .cr(24)
2305 .kr(9)
2306 .channels(channels)
2307 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2308 }
2309 }
2310
2311 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_gt_24) {
2312 TEST_REQUIRES_X86_SSE41;
2313 for (uint32_t channels = 25; channels < 48; channels++) {
2314 DWConvMicrokernelTester()
2315 .cr(24)
2316 .kr(9)
2317 .channels(channels)
2318 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2319 }
2320 }
2321
2322 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_gt_24_with_qmin) {
2323 TEST_REQUIRES_X86_SSE41;
2324 for (uint32_t channels = 25; channels < 48; channels++) {
2325 DWConvMicrokernelTester()
2326 .cr(24)
2327 .kr(9)
2328 .channels(channels)
2329 .qmin(128)
2330 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2331 }
2332 }
2333
2334 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_gt_24_with_qmax) {
2335 TEST_REQUIRES_X86_SSE41;
2336 for (uint32_t channels = 25; channels < 48; channels++) {
2337 DWConvMicrokernelTester()
2338 .cr(24)
2339 .kr(9)
2340 .channels(channels)
2341 .qmax(128)
2342 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2343 }
2344 }
2345
2346 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel) {
2347 TEST_REQUIRES_X86_SSE41;
2348 for (size_t channels = 1; channels <= 120; channels += 23) {
2349 DWConvMicrokernelTester()
2350 .cr(24)
2351 .kr(9)
2352 .channels(channels)
2353 .width(3)
2354 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2355 }
2356 }
2357
2358 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_step) {
2359 TEST_REQUIRES_X86_SSE41;
2360 for (size_t channels = 1; channels <= 120; channels += 23) {
2361 for (size_t step = 2; step <= 9; step++) {
2362 DWConvMicrokernelTester()
2363 .cr(24)
2364 .kr(9)
2365 .channels(channels)
2366 .width(3)
2367 .step(step)
2368 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2369 }
2370 }
2371 }
2372
2373 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_output_stride) {
2374 TEST_REQUIRES_X86_SSE41;
2375 for (size_t channels = 1; channels <= 120; channels += 23) {
2376 DWConvMicrokernelTester()
2377 .cr(24)
2378 .kr(9)
2379 .channels(24)
2380 .width(5)
2381 .output_stride(127)
2382 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2383 }
2384 }
2385
2386 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_qmin) {
2387 TEST_REQUIRES_X86_SSE41;
2388 for (size_t channels = 1; channels <= 120; channels += 23) {
2389 DWConvMicrokernelTester()
2390 .cr(24)
2391 .kr(9)
2392 .channels(channels)
2393 .width(3)
2394 .qmin(128)
2395 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2396 }
2397 }
2398
2399 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_qmax) {
2400 TEST_REQUIRES_X86_SSE41;
2401 for (size_t channels = 1; channels <= 120; channels += 23) {
2402 DWConvMicrokernelTester()
2403 .cr(24)
2404 .kr(9)
2405 .channels(channels)
2406 .width(3)
2407 .qmax(128)
2408 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2409 }
2410 }
2411
2412 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, input_offset) {
2413 TEST_REQUIRES_X86_SSE41;
2414 for (uint32_t channels = 48; channels < 384; channels += 72) {
2415 DWConvMicrokernelTester()
2416 .cr(24)
2417 .kr(9)
2418 .channels(channels)
2419 .input_offset(464)
2420 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2421 }
2422 }
2423
2424 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, zero) {
2425 TEST_REQUIRES_X86_SSE41;
2426 for (uint32_t mz = 0; mz < 9; mz++) {
2427 for (uint32_t channels = 48; channels < 384; channels += 72) {
2428 DWConvMicrokernelTester()
2429 .cr(24)
2430 .kr(9)
2431 .channels(channels)
2432 .input_offset(464)
2433 .zero_index(mz)
2434 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16);
2435 }
2436 }
2437 }
2438#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2439
2440
2441#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanfa0ab852021-04-02 17:30:49 -07002442 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_eq_8) {
2443 TEST_REQUIRES_X86_AVX;
2444 DWConvMicrokernelTester()
2445 .cr(8)
2446 .kr(9)
2447 .channels(8)
2448 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2449 }
2450
2451 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_div_8) {
2452 TEST_REQUIRES_X86_AVX;
2453 for (uint32_t channels = 16; channels < 128; channels += 24) {
2454 DWConvMicrokernelTester()
2455 .cr(8)
2456 .kr(9)
2457 .channels(channels)
2458 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2459 }
2460 }
2461
2462 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
2463 TEST_REQUIRES_X86_AVX;
2464 for (uint32_t channels = 16; channels < 128; channels += 24) {
2465 DWConvMicrokernelTester()
2466 .cr(8)
2467 .kr(9)
2468 .channels(channels)
2469 .qmin(128)
2470 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2471 }
2472 }
2473
2474 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
2475 TEST_REQUIRES_X86_AVX;
2476 for (uint32_t channels = 16; channels < 128; channels += 24) {
2477 DWConvMicrokernelTester()
2478 .cr(8)
2479 .kr(9)
2480 .channels(channels)
2481 .qmax(128)
2482 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2483 }
2484 }
2485
2486 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_lt_8) {
2487 TEST_REQUIRES_X86_AVX;
2488 for (uint32_t channels = 1; channels < 8; channels++) {
2489 DWConvMicrokernelTester()
2490 .cr(8)
2491 .kr(9)
2492 .channels(channels)
2493 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2494 }
2495 }
2496
2497 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_gt_8) {
2498 TEST_REQUIRES_X86_AVX;
2499 for (uint32_t channels = 9; channels < 16; channels++) {
2500 DWConvMicrokernelTester()
2501 .cr(8)
2502 .kr(9)
2503 .channels(channels)
2504 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2505 }
2506 }
2507
2508 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
2509 TEST_REQUIRES_X86_AVX;
2510 for (uint32_t channels = 9; channels < 16; channels++) {
2511 DWConvMicrokernelTester()
2512 .cr(8)
2513 .kr(9)
2514 .channels(channels)
2515 .qmin(128)
2516 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2517 }
2518 }
2519
2520 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
2521 TEST_REQUIRES_X86_AVX;
2522 for (uint32_t channels = 9; channels < 16; channels++) {
2523 DWConvMicrokernelTester()
2524 .cr(8)
2525 .kr(9)
2526 .channels(channels)
2527 .qmax(128)
2528 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2529 }
2530 }
2531
2532 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel) {
2533 TEST_REQUIRES_X86_AVX;
2534 for (size_t channels = 1; channels <= 40; channels += 7) {
2535 DWConvMicrokernelTester()
2536 .cr(8)
2537 .kr(9)
2538 .channels(channels)
2539 .width(3)
2540 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2541 }
2542 }
2543
2544 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel_with_step) {
2545 TEST_REQUIRES_X86_AVX;
2546 for (size_t channels = 1; channels <= 40; channels += 7) {
2547 for (size_t step = 2; step <= 9; step++) {
2548 DWConvMicrokernelTester()
2549 .cr(8)
2550 .kr(9)
2551 .channels(channels)
2552 .width(3)
2553 .step(step)
2554 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2555 }
2556 }
2557 }
2558
2559 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
2560 TEST_REQUIRES_X86_AVX;
2561 for (size_t channels = 1; channels <= 40; channels += 7) {
2562 DWConvMicrokernelTester()
2563 .cr(8)
2564 .kr(9)
2565 .channels(8)
2566 .width(5)
2567 .output_stride(43)
2568 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2569 }
2570 }
2571
2572 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel_with_qmin) {
2573 TEST_REQUIRES_X86_AVX;
2574 for (size_t channels = 1; channels <= 40; channels += 7) {
2575 DWConvMicrokernelTester()
2576 .cr(8)
2577 .kr(9)
2578 .channels(channels)
2579 .width(3)
2580 .qmin(128)
2581 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2582 }
2583 }
2584
2585 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel_with_qmax) {
2586 TEST_REQUIRES_X86_AVX;
2587 for (size_t channels = 1; channels <= 40; channels += 7) {
2588 DWConvMicrokernelTester()
2589 .cr(8)
2590 .kr(9)
2591 .channels(channels)
2592 .width(3)
2593 .qmax(128)
2594 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2595 }
2596 }
2597
2598 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, input_offset) {
2599 TEST_REQUIRES_X86_AVX;
2600 for (uint32_t channels = 16; channels < 128; channels += 24) {
2601 DWConvMicrokernelTester()
2602 .cr(8)
2603 .kr(9)
2604 .channels(channels)
2605 .input_offset(176)
2606 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2607 }
2608 }
2609
2610 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, zero) {
2611 TEST_REQUIRES_X86_AVX;
2612 for (uint32_t mz = 0; mz < 9; mz++) {
2613 for (uint32_t channels = 16; channels < 128; channels += 24) {
2614 DWConvMicrokernelTester()
2615 .cr(8)
2616 .kr(9)
2617 .channels(channels)
2618 .input_offset(176)
2619 .zero_index(mz)
2620 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16);
2621 }
2622 }
2623 }
2624#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2625
2626
2627#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2628 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_eq_16) {
2629 TEST_REQUIRES_X86_AVX;
2630 DWConvMicrokernelTester()
2631 .cr(16)
2632 .kr(9)
2633 .channels(16)
2634 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2635 }
2636
2637 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_div_16) {
2638 TEST_REQUIRES_X86_AVX;
2639 for (uint32_t channels = 32; channels < 256; channels += 48) {
2640 DWConvMicrokernelTester()
2641 .cr(16)
2642 .kr(9)
2643 .channels(channels)
2644 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2645 }
2646 }
2647
2648 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
2649 TEST_REQUIRES_X86_AVX;
2650 for (uint32_t channels = 32; channels < 256; channels += 48) {
2651 DWConvMicrokernelTester()
2652 .cr(16)
2653 .kr(9)
2654 .channels(channels)
2655 .qmin(128)
2656 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2657 }
2658 }
2659
2660 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
2661 TEST_REQUIRES_X86_AVX;
2662 for (uint32_t channels = 32; channels < 256; channels += 48) {
2663 DWConvMicrokernelTester()
2664 .cr(16)
2665 .kr(9)
2666 .channels(channels)
2667 .qmax(128)
2668 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2669 }
2670 }
2671
2672 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_lt_16) {
2673 TEST_REQUIRES_X86_AVX;
2674 for (uint32_t channels = 1; channels < 16; channels++) {
2675 DWConvMicrokernelTester()
2676 .cr(16)
2677 .kr(9)
2678 .channels(channels)
2679 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2680 }
2681 }
2682
2683 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_gt_16) {
2684 TEST_REQUIRES_X86_AVX;
2685 for (uint32_t channels = 17; channels < 32; channels++) {
2686 DWConvMicrokernelTester()
2687 .cr(16)
2688 .kr(9)
2689 .channels(channels)
2690 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2691 }
2692 }
2693
2694 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
2695 TEST_REQUIRES_X86_AVX;
2696 for (uint32_t channels = 17; channels < 32; channels++) {
2697 DWConvMicrokernelTester()
2698 .cr(16)
2699 .kr(9)
2700 .channels(channels)
2701 .qmin(128)
2702 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2703 }
2704 }
2705
2706 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
2707 TEST_REQUIRES_X86_AVX;
2708 for (uint32_t channels = 17; channels < 32; channels++) {
2709 DWConvMicrokernelTester()
2710 .cr(16)
2711 .kr(9)
2712 .channels(channels)
2713 .qmax(128)
2714 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2715 }
2716 }
2717
2718 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel) {
2719 TEST_REQUIRES_X86_AVX;
2720 for (size_t channels = 1; channels <= 80; channels += 15) {
2721 DWConvMicrokernelTester()
2722 .cr(16)
2723 .kr(9)
2724 .channels(channels)
2725 .width(3)
2726 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2727 }
2728 }
2729
2730 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel_with_step) {
2731 TEST_REQUIRES_X86_AVX;
2732 for (size_t channels = 1; channels <= 80; channels += 15) {
2733 for (size_t step = 2; step <= 9; step++) {
2734 DWConvMicrokernelTester()
2735 .cr(16)
2736 .kr(9)
2737 .channels(channels)
2738 .width(3)
2739 .step(step)
2740 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2741 }
2742 }
2743 }
2744
2745 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
2746 TEST_REQUIRES_X86_AVX;
2747 for (size_t channels = 1; channels <= 80; channels += 15) {
2748 DWConvMicrokernelTester()
2749 .cr(16)
2750 .kr(9)
2751 .channels(16)
2752 .width(5)
2753 .output_stride(83)
2754 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2755 }
2756 }
2757
2758 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel_with_qmin) {
2759 TEST_REQUIRES_X86_AVX;
2760 for (size_t channels = 1; channels <= 80; channels += 15) {
2761 DWConvMicrokernelTester()
2762 .cr(16)
2763 .kr(9)
2764 .channels(channels)
2765 .width(3)
2766 .qmin(128)
2767 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2768 }
2769 }
2770
2771 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel_with_qmax) {
2772 TEST_REQUIRES_X86_AVX;
2773 for (size_t channels = 1; channels <= 80; channels += 15) {
2774 DWConvMicrokernelTester()
2775 .cr(16)
2776 .kr(9)
2777 .channels(channels)
2778 .width(3)
2779 .qmax(128)
2780 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2781 }
2782 }
2783
2784 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, input_offset) {
2785 TEST_REQUIRES_X86_AVX;
2786 for (uint32_t channels = 32; channels < 256; channels += 48) {
2787 DWConvMicrokernelTester()
2788 .cr(16)
2789 .kr(9)
2790 .channels(channels)
2791 .input_offset(304)
2792 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2793 }
2794 }
2795
2796 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, zero) {
2797 TEST_REQUIRES_X86_AVX;
2798 for (uint32_t mz = 0; mz < 9; mz++) {
2799 for (uint32_t channels = 32; channels < 256; channels += 48) {
2800 DWConvMicrokernelTester()
2801 .cr(16)
2802 .kr(9)
2803 .channels(channels)
2804 .input_offset(304)
2805 .zero_index(mz)
2806 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16);
2807 }
2808 }
2809 }
2810#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2811
2812
2813#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2814 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_eq_24) {
2815 TEST_REQUIRES_X86_AVX;
2816 DWConvMicrokernelTester()
2817 .cr(24)
2818 .kr(9)
2819 .channels(24)
2820 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2821 }
2822
2823 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_div_24) {
2824 TEST_REQUIRES_X86_AVX;
2825 for (uint32_t channels = 48; channels < 384; channels += 72) {
2826 DWConvMicrokernelTester()
2827 .cr(24)
2828 .kr(9)
2829 .channels(channels)
2830 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2831 }
2832 }
2833
2834 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_div_24_with_qmin) {
2835 TEST_REQUIRES_X86_AVX;
2836 for (uint32_t channels = 48; channels < 384; channels += 72) {
2837 DWConvMicrokernelTester()
2838 .cr(24)
2839 .kr(9)
2840 .channels(channels)
2841 .qmin(128)
2842 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2843 }
2844 }
2845
2846 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_div_24_with_qmax) {
2847 TEST_REQUIRES_X86_AVX;
2848 for (uint32_t channels = 48; channels < 384; channels += 72) {
2849 DWConvMicrokernelTester()
2850 .cr(24)
2851 .kr(9)
2852 .channels(channels)
2853 .qmax(128)
2854 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2855 }
2856 }
2857
2858 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_lt_24) {
2859 TEST_REQUIRES_X86_AVX;
2860 for (uint32_t channels = 1; channels < 24; channels++) {
2861 DWConvMicrokernelTester()
2862 .cr(24)
2863 .kr(9)
2864 .channels(channels)
2865 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2866 }
2867 }
2868
2869 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_gt_24) {
2870 TEST_REQUIRES_X86_AVX;
2871 for (uint32_t channels = 25; channels < 48; channels++) {
2872 DWConvMicrokernelTester()
2873 .cr(24)
2874 .kr(9)
2875 .channels(channels)
2876 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2877 }
2878 }
2879
2880 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_gt_24_with_qmin) {
2881 TEST_REQUIRES_X86_AVX;
2882 for (uint32_t channels = 25; channels < 48; channels++) {
2883 DWConvMicrokernelTester()
2884 .cr(24)
2885 .kr(9)
2886 .channels(channels)
2887 .qmin(128)
2888 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2889 }
2890 }
2891
2892 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_gt_24_with_qmax) {
2893 TEST_REQUIRES_X86_AVX;
2894 for (uint32_t channels = 25; channels < 48; channels++) {
2895 DWConvMicrokernelTester()
2896 .cr(24)
2897 .kr(9)
2898 .channels(channels)
2899 .qmax(128)
2900 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2901 }
2902 }
2903
2904 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel) {
2905 TEST_REQUIRES_X86_AVX;
2906 for (size_t channels = 1; channels <= 120; channels += 23) {
2907 DWConvMicrokernelTester()
2908 .cr(24)
2909 .kr(9)
2910 .channels(channels)
2911 .width(3)
2912 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2913 }
2914 }
2915
2916 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel_with_step) {
2917 TEST_REQUIRES_X86_AVX;
2918 for (size_t channels = 1; channels <= 120; channels += 23) {
2919 for (size_t step = 2; step <= 9; step++) {
2920 DWConvMicrokernelTester()
2921 .cr(24)
2922 .kr(9)
2923 .channels(channels)
2924 .width(3)
2925 .step(step)
2926 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2927 }
2928 }
2929 }
2930
2931 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel_with_output_stride) {
2932 TEST_REQUIRES_X86_AVX;
2933 for (size_t channels = 1; channels <= 120; channels += 23) {
2934 DWConvMicrokernelTester()
2935 .cr(24)
2936 .kr(9)
2937 .channels(24)
2938 .width(5)
2939 .output_stride(127)
2940 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2941 }
2942 }
2943
2944 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel_with_qmin) {
2945 TEST_REQUIRES_X86_AVX;
2946 for (size_t channels = 1; channels <= 120; channels += 23) {
2947 DWConvMicrokernelTester()
2948 .cr(24)
2949 .kr(9)
2950 .channels(channels)
2951 .width(3)
2952 .qmin(128)
2953 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2954 }
2955 }
2956
2957 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel_with_qmax) {
2958 TEST_REQUIRES_X86_AVX;
2959 for (size_t channels = 1; channels <= 120; channels += 23) {
2960 DWConvMicrokernelTester()
2961 .cr(24)
2962 .kr(9)
2963 .channels(channels)
2964 .width(3)
2965 .qmax(128)
2966 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2967 }
2968 }
2969
2970 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, input_offset) {
2971 TEST_REQUIRES_X86_AVX;
2972 for (uint32_t channels = 48; channels < 384; channels += 72) {
2973 DWConvMicrokernelTester()
2974 .cr(24)
2975 .kr(9)
2976 .channels(channels)
2977 .input_offset(464)
2978 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2979 }
2980 }
2981
2982 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, zero) {
2983 TEST_REQUIRES_X86_AVX;
2984 for (uint32_t mz = 0; mz < 9; mz++) {
2985 for (uint32_t channels = 48; channels < 384; channels += 72) {
2986 DWConvMicrokernelTester()
2987 .cr(24)
2988 .kr(9)
2989 .channels(channels)
2990 .input_offset(464)
2991 .zero_index(mz)
2992 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16);
2993 }
2994 }
2995 }
2996#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2997
2998
2999#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan40135522020-08-07 01:21:00 -07003000 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_eq_16) {
3001 TEST_REQUIRES_X86_AVX2;
3002 DWConvMicrokernelTester()
3003 .cr(16)
3004 .kr(9)
3005 .channels(16)
3006 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3007 }
3008
3009 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_div_16) {
3010 TEST_REQUIRES_X86_AVX2;
3011 for (uint32_t channels = 32; channels < 256; channels += 48) {
3012 DWConvMicrokernelTester()
3013 .cr(16)
3014 .kr(9)
3015 .channels(channels)
3016 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3017 }
3018 }
3019
3020 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_div_16_with_qmin) {
3021 TEST_REQUIRES_X86_AVX2;
3022 for (uint32_t channels = 32; channels < 256; channels += 48) {
3023 DWConvMicrokernelTester()
3024 .cr(16)
3025 .kr(9)
3026 .channels(channels)
3027 .qmin(128)
3028 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3029 }
3030 }
3031
3032 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_div_16_with_qmax) {
3033 TEST_REQUIRES_X86_AVX2;
3034 for (uint32_t channels = 32; channels < 256; channels += 48) {
3035 DWConvMicrokernelTester()
3036 .cr(16)
3037 .kr(9)
3038 .channels(channels)
3039 .qmax(128)
3040 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3041 }
3042 }
3043
3044 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_lt_16) {
3045 TEST_REQUIRES_X86_AVX2;
3046 for (uint32_t channels = 1; channels < 16; channels++) {
3047 DWConvMicrokernelTester()
3048 .cr(16)
3049 .kr(9)
3050 .channels(channels)
3051 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3052 }
3053 }
3054
3055 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_gt_16) {
3056 TEST_REQUIRES_X86_AVX2;
3057 for (uint32_t channels = 17; channels < 32; channels++) {
3058 DWConvMicrokernelTester()
3059 .cr(16)
3060 .kr(9)
3061 .channels(channels)
3062 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3063 }
3064 }
3065
3066 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_gt_16_with_qmin) {
3067 TEST_REQUIRES_X86_AVX2;
3068 for (uint32_t channels = 17; channels < 32; channels++) {
3069 DWConvMicrokernelTester()
3070 .cr(16)
3071 .kr(9)
3072 .channels(channels)
3073 .qmin(128)
3074 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3075 }
3076 }
3077
3078 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_gt_16_with_qmax) {
3079 TEST_REQUIRES_X86_AVX2;
3080 for (uint32_t channels = 17; channels < 32; channels++) {
3081 DWConvMicrokernelTester()
3082 .cr(16)
3083 .kr(9)
3084 .channels(channels)
3085 .qmax(128)
3086 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3087 }
3088 }
3089
3090 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel) {
3091 TEST_REQUIRES_X86_AVX2;
3092 for (size_t channels = 1; channels <= 80; channels += 15) {
3093 DWConvMicrokernelTester()
3094 .cr(16)
3095 .kr(9)
3096 .channels(channels)
3097 .width(3)
3098 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3099 }
3100 }
3101
3102 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_step) {
3103 TEST_REQUIRES_X86_AVX2;
3104 for (size_t channels = 1; channels <= 80; channels += 15) {
3105 for (size_t step = 2; step <= 9; step++) {
3106 DWConvMicrokernelTester()
3107 .cr(16)
3108 .kr(9)
3109 .channels(channels)
3110 .width(3)
3111 .step(step)
3112 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3113 }
3114 }
3115 }
3116
3117 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_output_stride) {
3118 TEST_REQUIRES_X86_AVX2;
3119 for (size_t channels = 1; channels <= 80; channels += 15) {
3120 DWConvMicrokernelTester()
3121 .cr(16)
3122 .kr(9)
3123 .channels(16)
3124 .width(5)
3125 .output_stride(83)
3126 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3127 }
3128 }
3129
3130 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_qmin) {
3131 TEST_REQUIRES_X86_AVX2;
3132 for (size_t channels = 1; channels <= 80; channels += 15) {
3133 DWConvMicrokernelTester()
3134 .cr(16)
3135 .kr(9)
3136 .channels(channels)
3137 .width(3)
3138 .qmin(128)
3139 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3140 }
3141 }
3142
3143 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_qmax) {
3144 TEST_REQUIRES_X86_AVX2;
3145 for (size_t channels = 1; channels <= 80; channels += 15) {
3146 DWConvMicrokernelTester()
3147 .cr(16)
3148 .kr(9)
3149 .channels(channels)
3150 .width(3)
3151 .qmax(128)
3152 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3153 }
3154 }
3155
3156 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, input_offset) {
3157 TEST_REQUIRES_X86_AVX2;
3158 for (uint32_t channels = 32; channels < 256; channels += 48) {
3159 DWConvMicrokernelTester()
3160 .cr(16)
3161 .kr(9)
3162 .channels(channels)
3163 .input_offset(304)
3164 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3165 }
3166 }
3167
3168 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, zero) {
3169 TEST_REQUIRES_X86_AVX2;
3170 for (uint32_t mz = 0; mz < 9; mz++) {
3171 for (uint32_t channels = 32; channels < 256; channels += 48) {
3172 DWConvMicrokernelTester()
3173 .cr(16)
3174 .kr(9)
3175 .channels(channels)
3176 .input_offset(304)
3177 .zero_index(mz)
3178 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16);
3179 }
3180 }
3181 }
3182#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3183
3184
3185#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3186 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_eq_32) {
3187 TEST_REQUIRES_X86_AVX2;
3188 DWConvMicrokernelTester()
3189 .cr(32)
3190 .kr(9)
3191 .channels(32)
3192 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3193 }
3194
3195 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_div_32) {
3196 TEST_REQUIRES_X86_AVX2;
3197 for (uint32_t channels = 64; channels < 512; channels += 96) {
3198 DWConvMicrokernelTester()
3199 .cr(32)
3200 .kr(9)
3201 .channels(channels)
3202 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3203 }
3204 }
3205
3206 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_div_32_with_qmin) {
3207 TEST_REQUIRES_X86_AVX2;
3208 for (uint32_t channels = 64; channels < 512; channels += 96) {
3209 DWConvMicrokernelTester()
3210 .cr(32)
3211 .kr(9)
3212 .channels(channels)
3213 .qmin(128)
3214 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3215 }
3216 }
3217
3218 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_div_32_with_qmax) {
3219 TEST_REQUIRES_X86_AVX2;
3220 for (uint32_t channels = 64; channels < 512; channels += 96) {
3221 DWConvMicrokernelTester()
3222 .cr(32)
3223 .kr(9)
3224 .channels(channels)
3225 .qmax(128)
3226 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3227 }
3228 }
3229
3230 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_lt_32) {
3231 TEST_REQUIRES_X86_AVX2;
3232 for (uint32_t channels = 1; channels < 32; channels++) {
3233 DWConvMicrokernelTester()
3234 .cr(32)
3235 .kr(9)
3236 .channels(channels)
3237 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3238 }
3239 }
3240
3241 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_gt_32) {
3242 TEST_REQUIRES_X86_AVX2;
3243 for (uint32_t channels = 33; channels < 64; channels++) {
3244 DWConvMicrokernelTester()
3245 .cr(32)
3246 .kr(9)
3247 .channels(channels)
3248 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3249 }
3250 }
3251
3252 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_gt_32_with_qmin) {
3253 TEST_REQUIRES_X86_AVX2;
3254 for (uint32_t channels = 33; channels < 64; channels++) {
3255 DWConvMicrokernelTester()
3256 .cr(32)
3257 .kr(9)
3258 .channels(channels)
3259 .qmin(128)
3260 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3261 }
3262 }
3263
3264 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_gt_32_with_qmax) {
3265 TEST_REQUIRES_X86_AVX2;
3266 for (uint32_t channels = 33; channels < 64; channels++) {
3267 DWConvMicrokernelTester()
3268 .cr(32)
3269 .kr(9)
3270 .channels(channels)
3271 .qmax(128)
3272 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3273 }
3274 }
3275
3276 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel) {
3277 TEST_REQUIRES_X86_AVX2;
3278 for (size_t channels = 1; channels <= 160; channels += 31) {
3279 DWConvMicrokernelTester()
3280 .cr(32)
3281 .kr(9)
3282 .channels(channels)
3283 .width(3)
3284 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3285 }
3286 }
3287
3288 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_step) {
3289 TEST_REQUIRES_X86_AVX2;
3290 for (size_t channels = 1; channels <= 160; channels += 31) {
3291 for (size_t step = 2; step <= 9; step++) {
3292 DWConvMicrokernelTester()
3293 .cr(32)
3294 .kr(9)
3295 .channels(channels)
3296 .width(3)
3297 .step(step)
3298 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3299 }
3300 }
3301 }
3302
3303 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_output_stride) {
3304 TEST_REQUIRES_X86_AVX2;
3305 for (size_t channels = 1; channels <= 160; channels += 31) {
3306 DWConvMicrokernelTester()
3307 .cr(32)
3308 .kr(9)
3309 .channels(32)
3310 .width(5)
3311 .output_stride(163)
3312 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3313 }
3314 }
3315
3316 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_qmin) {
3317 TEST_REQUIRES_X86_AVX2;
3318 for (size_t channels = 1; channels <= 160; channels += 31) {
3319 DWConvMicrokernelTester()
3320 .cr(32)
3321 .kr(9)
3322 .channels(channels)
3323 .width(3)
3324 .qmin(128)
3325 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3326 }
3327 }
3328
3329 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_qmax) {
3330 TEST_REQUIRES_X86_AVX2;
3331 for (size_t channels = 1; channels <= 160; channels += 31) {
3332 DWConvMicrokernelTester()
3333 .cr(32)
3334 .kr(9)
3335 .channels(channels)
3336 .width(3)
3337 .qmax(128)
3338 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3339 }
3340 }
3341
3342 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, input_offset) {
3343 TEST_REQUIRES_X86_AVX2;
3344 for (uint32_t channels = 64; channels < 512; channels += 96) {
3345 DWConvMicrokernelTester()
3346 .cr(32)
3347 .kr(9)
3348 .channels(channels)
3349 .input_offset(592)
3350 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3351 }
3352 }
3353
3354 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, zero) {
3355 TEST_REQUIRES_X86_AVX2;
3356 for (uint32_t mz = 0; mz < 9; mz++) {
3357 for (uint32_t channels = 64; channels < 512; channels += 96) {
3358 DWConvMicrokernelTester()
3359 .cr(32)
3360 .kr(9)
3361 .channels(channels)
3362 .input_offset(592)
3363 .zero_index(mz)
3364 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16);
3365 }
3366 }
3367 }
3368#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3369
3370
3371#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan07feec82021-04-02 22:41:15 -07003372 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_eq_8) {
3373 TEST_REQUIRES_X86_SSE41;
3374 DWConvMicrokernelTester()
3375 .cr(8)
3376 .kr(9)
3377 .channels(8)
3378 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3379 }
3380
3381 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_div_8) {
3382 TEST_REQUIRES_X86_SSE41;
3383 for (uint32_t channels = 16; channels < 128; channels += 24) {
3384 DWConvMicrokernelTester()
3385 .cr(8)
3386 .kr(9)
3387 .channels(channels)
3388 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3389 }
3390 }
3391
3392 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
3393 TEST_REQUIRES_X86_SSE41;
3394 for (uint32_t channels = 16; channels < 128; channels += 24) {
3395 DWConvMicrokernelTester()
3396 .cr(8)
3397 .kr(9)
3398 .channels(channels)
3399 .qmin(128)
3400 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3401 }
3402 }
3403
3404 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
3405 TEST_REQUIRES_X86_SSE41;
3406 for (uint32_t channels = 16; channels < 128; channels += 24) {
3407 DWConvMicrokernelTester()
3408 .cr(8)
3409 .kr(9)
3410 .channels(channels)
3411 .qmax(128)
3412 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3413 }
3414 }
3415
3416 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_lt_8) {
3417 TEST_REQUIRES_X86_SSE41;
3418 for (uint32_t channels = 1; channels < 8; channels++) {
3419 DWConvMicrokernelTester()
3420 .cr(8)
3421 .kr(9)
3422 .channels(channels)
3423 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3424 }
3425 }
3426
3427 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_gt_8) {
3428 TEST_REQUIRES_X86_SSE41;
3429 for (uint32_t channels = 9; channels < 16; channels++) {
3430 DWConvMicrokernelTester()
3431 .cr(8)
3432 .kr(9)
3433 .channels(channels)
3434 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3435 }
3436 }
3437
3438 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
3439 TEST_REQUIRES_X86_SSE41;
3440 for (uint32_t channels = 9; channels < 16; channels++) {
3441 DWConvMicrokernelTester()
3442 .cr(8)
3443 .kr(9)
3444 .channels(channels)
3445 .qmin(128)
3446 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3447 }
3448 }
3449
3450 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
3451 TEST_REQUIRES_X86_SSE41;
3452 for (uint32_t channels = 9; channels < 16; channels++) {
3453 DWConvMicrokernelTester()
3454 .cr(8)
3455 .kr(9)
3456 .channels(channels)
3457 .qmax(128)
3458 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3459 }
3460 }
3461
3462 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel) {
3463 TEST_REQUIRES_X86_SSE41;
3464 for (size_t channels = 1; channels <= 40; channels += 7) {
3465 DWConvMicrokernelTester()
3466 .cr(8)
3467 .kr(9)
3468 .channels(channels)
3469 .width(3)
3470 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3471 }
3472 }
3473
3474 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel_with_step) {
3475 TEST_REQUIRES_X86_SSE41;
3476 for (size_t channels = 1; channels <= 40; channels += 7) {
3477 for (size_t step = 2; step <= 9; step++) {
3478 DWConvMicrokernelTester()
3479 .cr(8)
3480 .kr(9)
3481 .channels(channels)
3482 .width(3)
3483 .step(step)
3484 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3485 }
3486 }
3487 }
3488
3489 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
3490 TEST_REQUIRES_X86_SSE41;
3491 for (size_t channels = 1; channels <= 40; channels += 7) {
3492 DWConvMicrokernelTester()
3493 .cr(8)
3494 .kr(9)
3495 .channels(8)
3496 .width(5)
3497 .output_stride(43)
3498 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3499 }
3500 }
3501
3502 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
3503 TEST_REQUIRES_X86_SSE41;
3504 for (size_t channels = 1; channels <= 40; channels += 7) {
3505 DWConvMicrokernelTester()
3506 .cr(8)
3507 .kr(9)
3508 .channels(channels)
3509 .width(3)
3510 .qmin(128)
3511 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3512 }
3513 }
3514
3515 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
3516 TEST_REQUIRES_X86_SSE41;
3517 for (size_t channels = 1; channels <= 40; channels += 7) {
3518 DWConvMicrokernelTester()
3519 .cr(8)
3520 .kr(9)
3521 .channels(channels)
3522 .width(3)
3523 .qmax(128)
3524 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3525 }
3526 }
3527
3528 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, input_offset) {
3529 TEST_REQUIRES_X86_SSE41;
3530 for (uint32_t channels = 16; channels < 128; channels += 24) {
3531 DWConvMicrokernelTester()
3532 .cr(8)
3533 .kr(9)
3534 .channels(channels)
3535 .input_offset(176)
3536 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3537 }
3538 }
3539
3540 TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, zero) {
3541 TEST_REQUIRES_X86_SSE41;
3542 for (uint32_t mz = 0; mz < 9; mz++) {
3543 for (uint32_t channels = 16; channels < 128; channels += 24) {
3544 DWConvMicrokernelTester()
3545 .cr(8)
3546 .kr(9)
3547 .channels(channels)
3548 .input_offset(176)
3549 .zero_index(mz)
3550 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32);
3551 }
3552 }
3553 }
3554#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3555
3556
3557#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3558 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_eq_16) {
3559 TEST_REQUIRES_X86_SSE41;
3560 DWConvMicrokernelTester()
3561 .cr(16)
3562 .kr(9)
3563 .channels(16)
3564 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3565 }
3566
3567 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_div_16) {
3568 TEST_REQUIRES_X86_SSE41;
3569 for (uint32_t channels = 32; channels < 256; channels += 48) {
3570 DWConvMicrokernelTester()
3571 .cr(16)
3572 .kr(9)
3573 .channels(channels)
3574 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3575 }
3576 }
3577
3578 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
3579 TEST_REQUIRES_X86_SSE41;
3580 for (uint32_t channels = 32; channels < 256; channels += 48) {
3581 DWConvMicrokernelTester()
3582 .cr(16)
3583 .kr(9)
3584 .channels(channels)
3585 .qmin(128)
3586 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3587 }
3588 }
3589
3590 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
3591 TEST_REQUIRES_X86_SSE41;
3592 for (uint32_t channels = 32; channels < 256; channels += 48) {
3593 DWConvMicrokernelTester()
3594 .cr(16)
3595 .kr(9)
3596 .channels(channels)
3597 .qmax(128)
3598 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3599 }
3600 }
3601
3602 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_lt_16) {
3603 TEST_REQUIRES_X86_SSE41;
3604 for (uint32_t channels = 1; channels < 16; channels++) {
3605 DWConvMicrokernelTester()
3606 .cr(16)
3607 .kr(9)
3608 .channels(channels)
3609 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3610 }
3611 }
3612
3613 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_gt_16) {
3614 TEST_REQUIRES_X86_SSE41;
3615 for (uint32_t channels = 17; channels < 32; channels++) {
3616 DWConvMicrokernelTester()
3617 .cr(16)
3618 .kr(9)
3619 .channels(channels)
3620 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3621 }
3622 }
3623
3624 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
3625 TEST_REQUIRES_X86_SSE41;
3626 for (uint32_t channels = 17; channels < 32; channels++) {
3627 DWConvMicrokernelTester()
3628 .cr(16)
3629 .kr(9)
3630 .channels(channels)
3631 .qmin(128)
3632 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3633 }
3634 }
3635
3636 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
3637 TEST_REQUIRES_X86_SSE41;
3638 for (uint32_t channels = 17; channels < 32; channels++) {
3639 DWConvMicrokernelTester()
3640 .cr(16)
3641 .kr(9)
3642 .channels(channels)
3643 .qmax(128)
3644 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3645 }
3646 }
3647
3648 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel) {
3649 TEST_REQUIRES_X86_SSE41;
3650 for (size_t channels = 1; channels <= 80; channels += 15) {
3651 DWConvMicrokernelTester()
3652 .cr(16)
3653 .kr(9)
3654 .channels(channels)
3655 .width(3)
3656 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3657 }
3658 }
3659
3660 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel_with_step) {
3661 TEST_REQUIRES_X86_SSE41;
3662 for (size_t channels = 1; channels <= 80; channels += 15) {
3663 for (size_t step = 2; step <= 9; step++) {
3664 DWConvMicrokernelTester()
3665 .cr(16)
3666 .kr(9)
3667 .channels(channels)
3668 .width(3)
3669 .step(step)
3670 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3671 }
3672 }
3673 }
3674
3675 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
3676 TEST_REQUIRES_X86_SSE41;
3677 for (size_t channels = 1; channels <= 80; channels += 15) {
3678 DWConvMicrokernelTester()
3679 .cr(16)
3680 .kr(9)
3681 .channels(16)
3682 .width(5)
3683 .output_stride(83)
3684 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3685 }
3686 }
3687
3688 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
3689 TEST_REQUIRES_X86_SSE41;
3690 for (size_t channels = 1; channels <= 80; channels += 15) {
3691 DWConvMicrokernelTester()
3692 .cr(16)
3693 .kr(9)
3694 .channels(channels)
3695 .width(3)
3696 .qmin(128)
3697 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3698 }
3699 }
3700
3701 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
3702 TEST_REQUIRES_X86_SSE41;
3703 for (size_t channels = 1; channels <= 80; channels += 15) {
3704 DWConvMicrokernelTester()
3705 .cr(16)
3706 .kr(9)
3707 .channels(channels)
3708 .width(3)
3709 .qmax(128)
3710 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3711 }
3712 }
3713
3714 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, input_offset) {
3715 TEST_REQUIRES_X86_SSE41;
3716 for (uint32_t channels = 32; channels < 256; channels += 48) {
3717 DWConvMicrokernelTester()
3718 .cr(16)
3719 .kr(9)
3720 .channels(channels)
3721 .input_offset(304)
3722 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3723 }
3724 }
3725
3726 TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, zero) {
3727 TEST_REQUIRES_X86_SSE41;
3728 for (uint32_t mz = 0; mz < 9; mz++) {
3729 for (uint32_t channels = 32; channels < 256; channels += 48) {
3730 DWConvMicrokernelTester()
3731 .cr(16)
3732 .kr(9)
3733 .channels(channels)
3734 .input_offset(304)
3735 .zero_index(mz)
3736 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32);
3737 }
3738 }
3739 }
3740#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3741
3742
3743#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3744 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_eq_24) {
3745 TEST_REQUIRES_X86_SSE41;
3746 DWConvMicrokernelTester()
3747 .cr(24)
3748 .kr(9)
3749 .channels(24)
3750 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3751 }
3752
3753 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_div_24) {
3754 TEST_REQUIRES_X86_SSE41;
3755 for (uint32_t channels = 48; channels < 384; channels += 72) {
3756 DWConvMicrokernelTester()
3757 .cr(24)
3758 .kr(9)
3759 .channels(channels)
3760 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3761 }
3762 }
3763
3764 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_div_24_with_qmin) {
3765 TEST_REQUIRES_X86_SSE41;
3766 for (uint32_t channels = 48; channels < 384; channels += 72) {
3767 DWConvMicrokernelTester()
3768 .cr(24)
3769 .kr(9)
3770 .channels(channels)
3771 .qmin(128)
3772 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3773 }
3774 }
3775
3776 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_div_24_with_qmax) {
3777 TEST_REQUIRES_X86_SSE41;
3778 for (uint32_t channels = 48; channels < 384; channels += 72) {
3779 DWConvMicrokernelTester()
3780 .cr(24)
3781 .kr(9)
3782 .channels(channels)
3783 .qmax(128)
3784 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3785 }
3786 }
3787
3788 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_lt_24) {
3789 TEST_REQUIRES_X86_SSE41;
3790 for (uint32_t channels = 1; channels < 24; channels++) {
3791 DWConvMicrokernelTester()
3792 .cr(24)
3793 .kr(9)
3794 .channels(channels)
3795 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3796 }
3797 }
3798
3799 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_gt_24) {
3800 TEST_REQUIRES_X86_SSE41;
3801 for (uint32_t channels = 25; channels < 48; channels++) {
3802 DWConvMicrokernelTester()
3803 .cr(24)
3804 .kr(9)
3805 .channels(channels)
3806 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3807 }
3808 }
3809
3810 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_gt_24_with_qmin) {
3811 TEST_REQUIRES_X86_SSE41;
3812 for (uint32_t channels = 25; channels < 48; channels++) {
3813 DWConvMicrokernelTester()
3814 .cr(24)
3815 .kr(9)
3816 .channels(channels)
3817 .qmin(128)
3818 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3819 }
3820 }
3821
3822 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_gt_24_with_qmax) {
3823 TEST_REQUIRES_X86_SSE41;
3824 for (uint32_t channels = 25; channels < 48; channels++) {
3825 DWConvMicrokernelTester()
3826 .cr(24)
3827 .kr(9)
3828 .channels(channels)
3829 .qmax(128)
3830 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3831 }
3832 }
3833
3834 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel) {
3835 TEST_REQUIRES_X86_SSE41;
3836 for (size_t channels = 1; channels <= 120; channels += 23) {
3837 DWConvMicrokernelTester()
3838 .cr(24)
3839 .kr(9)
3840 .channels(channels)
3841 .width(3)
3842 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3843 }
3844 }
3845
3846 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel_with_step) {
3847 TEST_REQUIRES_X86_SSE41;
3848 for (size_t channels = 1; channels <= 120; channels += 23) {
3849 for (size_t step = 2; step <= 9; step++) {
3850 DWConvMicrokernelTester()
3851 .cr(24)
3852 .kr(9)
3853 .channels(channels)
3854 .width(3)
3855 .step(step)
3856 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3857 }
3858 }
3859 }
3860
3861 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel_with_output_stride) {
3862 TEST_REQUIRES_X86_SSE41;
3863 for (size_t channels = 1; channels <= 120; channels += 23) {
3864 DWConvMicrokernelTester()
3865 .cr(24)
3866 .kr(9)
3867 .channels(24)
3868 .width(5)
3869 .output_stride(127)
3870 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3871 }
3872 }
3873
3874 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel_with_qmin) {
3875 TEST_REQUIRES_X86_SSE41;
3876 for (size_t channels = 1; channels <= 120; channels += 23) {
3877 DWConvMicrokernelTester()
3878 .cr(24)
3879 .kr(9)
3880 .channels(channels)
3881 .width(3)
3882 .qmin(128)
3883 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3884 }
3885 }
3886
3887 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel_with_qmax) {
3888 TEST_REQUIRES_X86_SSE41;
3889 for (size_t channels = 1; channels <= 120; channels += 23) {
3890 DWConvMicrokernelTester()
3891 .cr(24)
3892 .kr(9)
3893 .channels(channels)
3894 .width(3)
3895 .qmax(128)
3896 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3897 }
3898 }
3899
3900 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, input_offset) {
3901 TEST_REQUIRES_X86_SSE41;
3902 for (uint32_t channels = 48; channels < 384; channels += 72) {
3903 DWConvMicrokernelTester()
3904 .cr(24)
3905 .kr(9)
3906 .channels(channels)
3907 .input_offset(464)
3908 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3909 }
3910 }
3911
3912 TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, zero) {
3913 TEST_REQUIRES_X86_SSE41;
3914 for (uint32_t mz = 0; mz < 9; mz++) {
3915 for (uint32_t channels = 48; channels < 384; channels += 72) {
3916 DWConvMicrokernelTester()
3917 .cr(24)
3918 .kr(9)
3919 .channels(channels)
3920 .input_offset(464)
3921 .zero_index(mz)
3922 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32);
3923 }
3924 }
3925 }
3926#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3927
3928
3929#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3930 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_eq_8) {
3931 TEST_REQUIRES_X86_AVX;
3932 DWConvMicrokernelTester()
3933 .cr(8)
3934 .kr(9)
3935 .channels(8)
3936 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
3937 }
3938
3939 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_div_8) {
3940 TEST_REQUIRES_X86_AVX;
3941 for (uint32_t channels = 16; channels < 128; channels += 24) {
3942 DWConvMicrokernelTester()
3943 .cr(8)
3944 .kr(9)
3945 .channels(channels)
3946 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
3947 }
3948 }
3949
3950 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
3951 TEST_REQUIRES_X86_AVX;
3952 for (uint32_t channels = 16; channels < 128; channels += 24) {
3953 DWConvMicrokernelTester()
3954 .cr(8)
3955 .kr(9)
3956 .channels(channels)
3957 .qmin(128)
3958 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
3959 }
3960 }
3961
3962 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
3963 TEST_REQUIRES_X86_AVX;
3964 for (uint32_t channels = 16; channels < 128; channels += 24) {
3965 DWConvMicrokernelTester()
3966 .cr(8)
3967 .kr(9)
3968 .channels(channels)
3969 .qmax(128)
3970 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
3971 }
3972 }
3973
3974 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_lt_8) {
3975 TEST_REQUIRES_X86_AVX;
3976 for (uint32_t channels = 1; channels < 8; channels++) {
3977 DWConvMicrokernelTester()
3978 .cr(8)
3979 .kr(9)
3980 .channels(channels)
3981 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
3982 }
3983 }
3984
3985 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_gt_8) {
3986 TEST_REQUIRES_X86_AVX;
3987 for (uint32_t channels = 9; channels < 16; channels++) {
3988 DWConvMicrokernelTester()
3989 .cr(8)
3990 .kr(9)
3991 .channels(channels)
3992 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
3993 }
3994 }
3995
3996 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
3997 TEST_REQUIRES_X86_AVX;
3998 for (uint32_t channels = 9; channels < 16; channels++) {
3999 DWConvMicrokernelTester()
4000 .cr(8)
4001 .kr(9)
4002 .channels(channels)
4003 .qmin(128)
4004 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
4005 }
4006 }
4007
4008 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
4009 TEST_REQUIRES_X86_AVX;
4010 for (uint32_t channels = 9; channels < 16; channels++) {
4011 DWConvMicrokernelTester()
4012 .cr(8)
4013 .kr(9)
4014 .channels(channels)
4015 .qmax(128)
4016 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
4017 }
4018 }
4019
4020 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel) {
4021 TEST_REQUIRES_X86_AVX;
4022 for (size_t channels = 1; channels <= 40; channels += 7) {
4023 DWConvMicrokernelTester()
4024 .cr(8)
4025 .kr(9)
4026 .channels(channels)
4027 .width(3)
4028 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
4029 }
4030 }
4031
4032 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel_with_step) {
4033 TEST_REQUIRES_X86_AVX;
4034 for (size_t channels = 1; channels <= 40; channels += 7) {
4035 for (size_t step = 2; step <= 9; step++) {
4036 DWConvMicrokernelTester()
4037 .cr(8)
4038 .kr(9)
4039 .channels(channels)
4040 .width(3)
4041 .step(step)
4042 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
4043 }
4044 }
4045 }
4046
4047 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
4048 TEST_REQUIRES_X86_AVX;
4049 for (size_t channels = 1; channels <= 40; channels += 7) {
4050 DWConvMicrokernelTester()
4051 .cr(8)
4052 .kr(9)
4053 .channels(8)
4054 .width(5)
4055 .output_stride(43)
4056 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
4057 }
4058 }
4059
4060 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel_with_qmin) {
4061 TEST_REQUIRES_X86_AVX;
4062 for (size_t channels = 1; channels <= 40; channels += 7) {
4063 DWConvMicrokernelTester()
4064 .cr(8)
4065 .kr(9)
4066 .channels(channels)
4067 .width(3)
4068 .qmin(128)
4069 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
4070 }
4071 }
4072
4073 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel_with_qmax) {
4074 TEST_REQUIRES_X86_AVX;
4075 for (size_t channels = 1; channels <= 40; channels += 7) {
4076 DWConvMicrokernelTester()
4077 .cr(8)
4078 .kr(9)
4079 .channels(channels)
4080 .width(3)
4081 .qmax(128)
4082 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
4083 }
4084 }
4085
4086 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, input_offset) {
4087 TEST_REQUIRES_X86_AVX;
4088 for (uint32_t channels = 16; channels < 128; channels += 24) {
4089 DWConvMicrokernelTester()
4090 .cr(8)
4091 .kr(9)
4092 .channels(channels)
4093 .input_offset(176)
4094 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
4095 }
4096 }
4097
4098 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, zero) {
4099 TEST_REQUIRES_X86_AVX;
4100 for (uint32_t mz = 0; mz < 9; mz++) {
4101 for (uint32_t channels = 16; channels < 128; channels += 24) {
4102 DWConvMicrokernelTester()
4103 .cr(8)
4104 .kr(9)
4105 .channels(channels)
4106 .input_offset(176)
4107 .zero_index(mz)
4108 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32);
4109 }
4110 }
4111 }
4112#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4113
4114
4115#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4116 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_eq_16) {
4117 TEST_REQUIRES_X86_AVX;
4118 DWConvMicrokernelTester()
4119 .cr(16)
4120 .kr(9)
4121 .channels(16)
4122 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4123 }
4124
4125 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_div_16) {
4126 TEST_REQUIRES_X86_AVX;
4127 for (uint32_t channels = 32; channels < 256; channels += 48) {
4128 DWConvMicrokernelTester()
4129 .cr(16)
4130 .kr(9)
4131 .channels(channels)
4132 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4133 }
4134 }
4135
4136 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
4137 TEST_REQUIRES_X86_AVX;
4138 for (uint32_t channels = 32; channels < 256; channels += 48) {
4139 DWConvMicrokernelTester()
4140 .cr(16)
4141 .kr(9)
4142 .channels(channels)
4143 .qmin(128)
4144 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4145 }
4146 }
4147
4148 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
4149 TEST_REQUIRES_X86_AVX;
4150 for (uint32_t channels = 32; channels < 256; channels += 48) {
4151 DWConvMicrokernelTester()
4152 .cr(16)
4153 .kr(9)
4154 .channels(channels)
4155 .qmax(128)
4156 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4157 }
4158 }
4159
4160 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_lt_16) {
4161 TEST_REQUIRES_X86_AVX;
4162 for (uint32_t channels = 1; channels < 16; channels++) {
4163 DWConvMicrokernelTester()
4164 .cr(16)
4165 .kr(9)
4166 .channels(channels)
4167 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4168 }
4169 }
4170
4171 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_gt_16) {
4172 TEST_REQUIRES_X86_AVX;
4173 for (uint32_t channels = 17; channels < 32; channels++) {
4174 DWConvMicrokernelTester()
4175 .cr(16)
4176 .kr(9)
4177 .channels(channels)
4178 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4179 }
4180 }
4181
4182 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
4183 TEST_REQUIRES_X86_AVX;
4184 for (uint32_t channels = 17; channels < 32; channels++) {
4185 DWConvMicrokernelTester()
4186 .cr(16)
4187 .kr(9)
4188 .channels(channels)
4189 .qmin(128)
4190 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4191 }
4192 }
4193
4194 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
4195 TEST_REQUIRES_X86_AVX;
4196 for (uint32_t channels = 17; channels < 32; channels++) {
4197 DWConvMicrokernelTester()
4198 .cr(16)
4199 .kr(9)
4200 .channels(channels)
4201 .qmax(128)
4202 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4203 }
4204 }
4205
4206 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel) {
4207 TEST_REQUIRES_X86_AVX;
4208 for (size_t channels = 1; channels <= 80; channels += 15) {
4209 DWConvMicrokernelTester()
4210 .cr(16)
4211 .kr(9)
4212 .channels(channels)
4213 .width(3)
4214 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4215 }
4216 }
4217
4218 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel_with_step) {
4219 TEST_REQUIRES_X86_AVX;
4220 for (size_t channels = 1; channels <= 80; channels += 15) {
4221 for (size_t step = 2; step <= 9; step++) {
4222 DWConvMicrokernelTester()
4223 .cr(16)
4224 .kr(9)
4225 .channels(channels)
4226 .width(3)
4227 .step(step)
4228 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4229 }
4230 }
4231 }
4232
4233 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
4234 TEST_REQUIRES_X86_AVX;
4235 for (size_t channels = 1; channels <= 80; channels += 15) {
4236 DWConvMicrokernelTester()
4237 .cr(16)
4238 .kr(9)
4239 .channels(16)
4240 .width(5)
4241 .output_stride(83)
4242 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4243 }
4244 }
4245
4246 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel_with_qmin) {
4247 TEST_REQUIRES_X86_AVX;
4248 for (size_t channels = 1; channels <= 80; channels += 15) {
4249 DWConvMicrokernelTester()
4250 .cr(16)
4251 .kr(9)
4252 .channels(channels)
4253 .width(3)
4254 .qmin(128)
4255 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4256 }
4257 }
4258
4259 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel_with_qmax) {
4260 TEST_REQUIRES_X86_AVX;
4261 for (size_t channels = 1; channels <= 80; channels += 15) {
4262 DWConvMicrokernelTester()
4263 .cr(16)
4264 .kr(9)
4265 .channels(channels)
4266 .width(3)
4267 .qmax(128)
4268 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4269 }
4270 }
4271
4272 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, input_offset) {
4273 TEST_REQUIRES_X86_AVX;
4274 for (uint32_t channels = 32; channels < 256; channels += 48) {
4275 DWConvMicrokernelTester()
4276 .cr(16)
4277 .kr(9)
4278 .channels(channels)
4279 .input_offset(304)
4280 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4281 }
4282 }
4283
4284 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, zero) {
4285 TEST_REQUIRES_X86_AVX;
4286 for (uint32_t mz = 0; mz < 9; mz++) {
4287 for (uint32_t channels = 32; channels < 256; channels += 48) {
4288 DWConvMicrokernelTester()
4289 .cr(16)
4290 .kr(9)
4291 .channels(channels)
4292 .input_offset(304)
4293 .zero_index(mz)
4294 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32);
4295 }
4296 }
4297 }
4298#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4299
4300
4301#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4302 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_eq_24) {
4303 TEST_REQUIRES_X86_AVX;
4304 DWConvMicrokernelTester()
4305 .cr(24)
4306 .kr(9)
4307 .channels(24)
4308 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4309 }
4310
4311 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_div_24) {
4312 TEST_REQUIRES_X86_AVX;
4313 for (uint32_t channels = 48; channels < 384; channels += 72) {
4314 DWConvMicrokernelTester()
4315 .cr(24)
4316 .kr(9)
4317 .channels(channels)
4318 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4319 }
4320 }
4321
4322 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_div_24_with_qmin) {
4323 TEST_REQUIRES_X86_AVX;
4324 for (uint32_t channels = 48; channels < 384; channels += 72) {
4325 DWConvMicrokernelTester()
4326 .cr(24)
4327 .kr(9)
4328 .channels(channels)
4329 .qmin(128)
4330 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4331 }
4332 }
4333
4334 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_div_24_with_qmax) {
4335 TEST_REQUIRES_X86_AVX;
4336 for (uint32_t channels = 48; channels < 384; channels += 72) {
4337 DWConvMicrokernelTester()
4338 .cr(24)
4339 .kr(9)
4340 .channels(channels)
4341 .qmax(128)
4342 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4343 }
4344 }
4345
4346 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_lt_24) {
4347 TEST_REQUIRES_X86_AVX;
4348 for (uint32_t channels = 1; channels < 24; channels++) {
4349 DWConvMicrokernelTester()
4350 .cr(24)
4351 .kr(9)
4352 .channels(channels)
4353 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4354 }
4355 }
4356
4357 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_gt_24) {
4358 TEST_REQUIRES_X86_AVX;
4359 for (uint32_t channels = 25; channels < 48; channels++) {
4360 DWConvMicrokernelTester()
4361 .cr(24)
4362 .kr(9)
4363 .channels(channels)
4364 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4365 }
4366 }
4367
4368 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_gt_24_with_qmin) {
4369 TEST_REQUIRES_X86_AVX;
4370 for (uint32_t channels = 25; channels < 48; channels++) {
4371 DWConvMicrokernelTester()
4372 .cr(24)
4373 .kr(9)
4374 .channels(channels)
4375 .qmin(128)
4376 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4377 }
4378 }
4379
4380 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_gt_24_with_qmax) {
4381 TEST_REQUIRES_X86_AVX;
4382 for (uint32_t channels = 25; channels < 48; channels++) {
4383 DWConvMicrokernelTester()
4384 .cr(24)
4385 .kr(9)
4386 .channels(channels)
4387 .qmax(128)
4388 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4389 }
4390 }
4391
4392 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel) {
4393 TEST_REQUIRES_X86_AVX;
4394 for (size_t channels = 1; channels <= 120; channels += 23) {
4395 DWConvMicrokernelTester()
4396 .cr(24)
4397 .kr(9)
4398 .channels(channels)
4399 .width(3)
4400 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4401 }
4402 }
4403
4404 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel_with_step) {
4405 TEST_REQUIRES_X86_AVX;
4406 for (size_t channels = 1; channels <= 120; channels += 23) {
4407 for (size_t step = 2; step <= 9; step++) {
4408 DWConvMicrokernelTester()
4409 .cr(24)
4410 .kr(9)
4411 .channels(channels)
4412 .width(3)
4413 .step(step)
4414 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4415 }
4416 }
4417 }
4418
4419 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel_with_output_stride) {
4420 TEST_REQUIRES_X86_AVX;
4421 for (size_t channels = 1; channels <= 120; channels += 23) {
4422 DWConvMicrokernelTester()
4423 .cr(24)
4424 .kr(9)
4425 .channels(24)
4426 .width(5)
4427 .output_stride(127)
4428 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4429 }
4430 }
4431
4432 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel_with_qmin) {
4433 TEST_REQUIRES_X86_AVX;
4434 for (size_t channels = 1; channels <= 120; channels += 23) {
4435 DWConvMicrokernelTester()
4436 .cr(24)
4437 .kr(9)
4438 .channels(channels)
4439 .width(3)
4440 .qmin(128)
4441 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4442 }
4443 }
4444
4445 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel_with_qmax) {
4446 TEST_REQUIRES_X86_AVX;
4447 for (size_t channels = 1; channels <= 120; channels += 23) {
4448 DWConvMicrokernelTester()
4449 .cr(24)
4450 .kr(9)
4451 .channels(channels)
4452 .width(3)
4453 .qmax(128)
4454 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4455 }
4456 }
4457
4458 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, input_offset) {
4459 TEST_REQUIRES_X86_AVX;
4460 for (uint32_t channels = 48; channels < 384; channels += 72) {
4461 DWConvMicrokernelTester()
4462 .cr(24)
4463 .kr(9)
4464 .channels(channels)
4465 .input_offset(464)
4466 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4467 }
4468 }
4469
4470 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, zero) {
4471 TEST_REQUIRES_X86_AVX;
4472 for (uint32_t mz = 0; mz < 9; mz++) {
4473 for (uint32_t channels = 48; channels < 384; channels += 72) {
4474 DWConvMicrokernelTester()
4475 .cr(24)
4476 .kr(9)
4477 .channels(channels)
4478 .input_offset(464)
4479 .zero_index(mz)
4480 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32);
4481 }
4482 }
4483 }
4484#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4485
4486
4487#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan3fd4e272021-04-10 11:16:42 -07004488 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_eq_8) {
4489 TEST_REQUIRES_X86_XOP;
4490 DWConvMicrokernelTester()
4491 .cr(8)
4492 .kr(9)
4493 .channels(8)
4494 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4495 }
4496
4497 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_div_8) {
4498 TEST_REQUIRES_X86_XOP;
4499 for (uint32_t channels = 16; channels < 128; channels += 24) {
4500 DWConvMicrokernelTester()
4501 .cr(8)
4502 .kr(9)
4503 .channels(channels)
4504 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4505 }
4506 }
4507
4508 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
4509 TEST_REQUIRES_X86_XOP;
4510 for (uint32_t channels = 16; channels < 128; channels += 24) {
4511 DWConvMicrokernelTester()
4512 .cr(8)
4513 .kr(9)
4514 .channels(channels)
4515 .qmin(128)
4516 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4517 }
4518 }
4519
4520 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
4521 TEST_REQUIRES_X86_XOP;
4522 for (uint32_t channels = 16; channels < 128; channels += 24) {
4523 DWConvMicrokernelTester()
4524 .cr(8)
4525 .kr(9)
4526 .channels(channels)
4527 .qmax(128)
4528 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4529 }
4530 }
4531
4532 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_lt_8) {
4533 TEST_REQUIRES_X86_XOP;
4534 for (uint32_t channels = 1; channels < 8; channels++) {
4535 DWConvMicrokernelTester()
4536 .cr(8)
4537 .kr(9)
4538 .channels(channels)
4539 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4540 }
4541 }
4542
4543 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_gt_8) {
4544 TEST_REQUIRES_X86_XOP;
4545 for (uint32_t channels = 9; channels < 16; channels++) {
4546 DWConvMicrokernelTester()
4547 .cr(8)
4548 .kr(9)
4549 .channels(channels)
4550 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4551 }
4552 }
4553
4554 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
4555 TEST_REQUIRES_X86_XOP;
4556 for (uint32_t channels = 9; channels < 16; channels++) {
4557 DWConvMicrokernelTester()
4558 .cr(8)
4559 .kr(9)
4560 .channels(channels)
4561 .qmin(128)
4562 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4563 }
4564 }
4565
4566 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
4567 TEST_REQUIRES_X86_XOP;
4568 for (uint32_t channels = 9; channels < 16; channels++) {
4569 DWConvMicrokernelTester()
4570 .cr(8)
4571 .kr(9)
4572 .channels(channels)
4573 .qmax(128)
4574 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4575 }
4576 }
4577
4578 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel) {
4579 TEST_REQUIRES_X86_XOP;
4580 for (size_t channels = 1; channels <= 40; channels += 7) {
4581 DWConvMicrokernelTester()
4582 .cr(8)
4583 .kr(9)
4584 .channels(channels)
4585 .width(3)
4586 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4587 }
4588 }
4589
4590 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel_with_step) {
4591 TEST_REQUIRES_X86_XOP;
4592 for (size_t channels = 1; channels <= 40; channels += 7) {
4593 for (size_t step = 2; step <= 9; step++) {
4594 DWConvMicrokernelTester()
4595 .cr(8)
4596 .kr(9)
4597 .channels(channels)
4598 .width(3)
4599 .step(step)
4600 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4601 }
4602 }
4603 }
4604
4605 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
4606 TEST_REQUIRES_X86_XOP;
4607 for (size_t channels = 1; channels <= 40; channels += 7) {
4608 DWConvMicrokernelTester()
4609 .cr(8)
4610 .kr(9)
4611 .channels(8)
4612 .width(5)
4613 .output_stride(43)
4614 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4615 }
4616 }
4617
4618 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel_with_qmin) {
4619 TEST_REQUIRES_X86_XOP;
4620 for (size_t channels = 1; channels <= 40; channels += 7) {
4621 DWConvMicrokernelTester()
4622 .cr(8)
4623 .kr(9)
4624 .channels(channels)
4625 .width(3)
4626 .qmin(128)
4627 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4628 }
4629 }
4630
4631 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel_with_qmax) {
4632 TEST_REQUIRES_X86_XOP;
4633 for (size_t channels = 1; channels <= 40; channels += 7) {
4634 DWConvMicrokernelTester()
4635 .cr(8)
4636 .kr(9)
4637 .channels(channels)
4638 .width(3)
4639 .qmax(128)
4640 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4641 }
4642 }
4643
4644 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, input_offset) {
4645 TEST_REQUIRES_X86_XOP;
4646 for (uint32_t channels = 16; channels < 128; channels += 24) {
4647 DWConvMicrokernelTester()
4648 .cr(8)
4649 .kr(9)
4650 .channels(channels)
4651 .input_offset(176)
4652 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4653 }
4654 }
4655
4656 TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, zero) {
4657 TEST_REQUIRES_X86_XOP;
4658 for (uint32_t mz = 0; mz < 9; mz++) {
4659 for (uint32_t channels = 16; channels < 128; channels += 24) {
4660 DWConvMicrokernelTester()
4661 .cr(8)
4662 .kr(9)
4663 .channels(channels)
4664 .input_offset(176)
4665 .zero_index(mz)
4666 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32);
4667 }
4668 }
4669 }
4670#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4671
4672
4673#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4674 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_eq_16) {
4675 TEST_REQUIRES_X86_XOP;
4676 DWConvMicrokernelTester()
4677 .cr(16)
4678 .kr(9)
4679 .channels(16)
4680 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4681 }
4682
4683 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_div_16) {
4684 TEST_REQUIRES_X86_XOP;
4685 for (uint32_t channels = 32; channels < 256; channels += 48) {
4686 DWConvMicrokernelTester()
4687 .cr(16)
4688 .kr(9)
4689 .channels(channels)
4690 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4691 }
4692 }
4693
4694 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
4695 TEST_REQUIRES_X86_XOP;
4696 for (uint32_t channels = 32; channels < 256; channels += 48) {
4697 DWConvMicrokernelTester()
4698 .cr(16)
4699 .kr(9)
4700 .channels(channels)
4701 .qmin(128)
4702 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4703 }
4704 }
4705
4706 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
4707 TEST_REQUIRES_X86_XOP;
4708 for (uint32_t channels = 32; channels < 256; channels += 48) {
4709 DWConvMicrokernelTester()
4710 .cr(16)
4711 .kr(9)
4712 .channels(channels)
4713 .qmax(128)
4714 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4715 }
4716 }
4717
4718 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_lt_16) {
4719 TEST_REQUIRES_X86_XOP;
4720 for (uint32_t channels = 1; channels < 16; channels++) {
4721 DWConvMicrokernelTester()
4722 .cr(16)
4723 .kr(9)
4724 .channels(channels)
4725 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4726 }
4727 }
4728
4729 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_gt_16) {
4730 TEST_REQUIRES_X86_XOP;
4731 for (uint32_t channels = 17; channels < 32; channels++) {
4732 DWConvMicrokernelTester()
4733 .cr(16)
4734 .kr(9)
4735 .channels(channels)
4736 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4737 }
4738 }
4739
4740 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
4741 TEST_REQUIRES_X86_XOP;
4742 for (uint32_t channels = 17; channels < 32; channels++) {
4743 DWConvMicrokernelTester()
4744 .cr(16)
4745 .kr(9)
4746 .channels(channels)
4747 .qmin(128)
4748 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4749 }
4750 }
4751
4752 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
4753 TEST_REQUIRES_X86_XOP;
4754 for (uint32_t channels = 17; channels < 32; channels++) {
4755 DWConvMicrokernelTester()
4756 .cr(16)
4757 .kr(9)
4758 .channels(channels)
4759 .qmax(128)
4760 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4761 }
4762 }
4763
4764 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel) {
4765 TEST_REQUIRES_X86_XOP;
4766 for (size_t channels = 1; channels <= 80; channels += 15) {
4767 DWConvMicrokernelTester()
4768 .cr(16)
4769 .kr(9)
4770 .channels(channels)
4771 .width(3)
4772 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4773 }
4774 }
4775
4776 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel_with_step) {
4777 TEST_REQUIRES_X86_XOP;
4778 for (size_t channels = 1; channels <= 80; channels += 15) {
4779 for (size_t step = 2; step <= 9; step++) {
4780 DWConvMicrokernelTester()
4781 .cr(16)
4782 .kr(9)
4783 .channels(channels)
4784 .width(3)
4785 .step(step)
4786 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4787 }
4788 }
4789 }
4790
4791 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
4792 TEST_REQUIRES_X86_XOP;
4793 for (size_t channels = 1; channels <= 80; channels += 15) {
4794 DWConvMicrokernelTester()
4795 .cr(16)
4796 .kr(9)
4797 .channels(16)
4798 .width(5)
4799 .output_stride(83)
4800 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4801 }
4802 }
4803
4804 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel_with_qmin) {
4805 TEST_REQUIRES_X86_XOP;
4806 for (size_t channels = 1; channels <= 80; channels += 15) {
4807 DWConvMicrokernelTester()
4808 .cr(16)
4809 .kr(9)
4810 .channels(channels)
4811 .width(3)
4812 .qmin(128)
4813 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4814 }
4815 }
4816
4817 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel_with_qmax) {
4818 TEST_REQUIRES_X86_XOP;
4819 for (size_t channels = 1; channels <= 80; channels += 15) {
4820 DWConvMicrokernelTester()
4821 .cr(16)
4822 .kr(9)
4823 .channels(channels)
4824 .width(3)
4825 .qmax(128)
4826 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4827 }
4828 }
4829
4830 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, input_offset) {
4831 TEST_REQUIRES_X86_XOP;
4832 for (uint32_t channels = 32; channels < 256; channels += 48) {
4833 DWConvMicrokernelTester()
4834 .cr(16)
4835 .kr(9)
4836 .channels(channels)
4837 .input_offset(304)
4838 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4839 }
4840 }
4841
4842 TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, zero) {
4843 TEST_REQUIRES_X86_XOP;
4844 for (uint32_t mz = 0; mz < 9; mz++) {
4845 for (uint32_t channels = 32; channels < 256; channels += 48) {
4846 DWConvMicrokernelTester()
4847 .cr(16)
4848 .kr(9)
4849 .channels(channels)
4850 .input_offset(304)
4851 .zero_index(mz)
4852 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32);
4853 }
4854 }
4855 }
4856#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4857
4858
4859#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4860 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_eq_24) {
4861 TEST_REQUIRES_X86_XOP;
4862 DWConvMicrokernelTester()
4863 .cr(24)
4864 .kr(9)
4865 .channels(24)
4866 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4867 }
4868
4869 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_div_24) {
4870 TEST_REQUIRES_X86_XOP;
4871 for (uint32_t channels = 48; channels < 384; channels += 72) {
4872 DWConvMicrokernelTester()
4873 .cr(24)
4874 .kr(9)
4875 .channels(channels)
4876 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4877 }
4878 }
4879
4880 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_div_24_with_qmin) {
4881 TEST_REQUIRES_X86_XOP;
4882 for (uint32_t channels = 48; channels < 384; channels += 72) {
4883 DWConvMicrokernelTester()
4884 .cr(24)
4885 .kr(9)
4886 .channels(channels)
4887 .qmin(128)
4888 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4889 }
4890 }
4891
4892 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_div_24_with_qmax) {
4893 TEST_REQUIRES_X86_XOP;
4894 for (uint32_t channels = 48; channels < 384; channels += 72) {
4895 DWConvMicrokernelTester()
4896 .cr(24)
4897 .kr(9)
4898 .channels(channels)
4899 .qmax(128)
4900 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4901 }
4902 }
4903
4904 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_lt_24) {
4905 TEST_REQUIRES_X86_XOP;
4906 for (uint32_t channels = 1; channels < 24; channels++) {
4907 DWConvMicrokernelTester()
4908 .cr(24)
4909 .kr(9)
4910 .channels(channels)
4911 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4912 }
4913 }
4914
4915 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_gt_24) {
4916 TEST_REQUIRES_X86_XOP;
4917 for (uint32_t channels = 25; channels < 48; channels++) {
4918 DWConvMicrokernelTester()
4919 .cr(24)
4920 .kr(9)
4921 .channels(channels)
4922 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4923 }
4924 }
4925
4926 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_gt_24_with_qmin) {
4927 TEST_REQUIRES_X86_XOP;
4928 for (uint32_t channels = 25; channels < 48; channels++) {
4929 DWConvMicrokernelTester()
4930 .cr(24)
4931 .kr(9)
4932 .channels(channels)
4933 .qmin(128)
4934 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4935 }
4936 }
4937
4938 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_gt_24_with_qmax) {
4939 TEST_REQUIRES_X86_XOP;
4940 for (uint32_t channels = 25; channels < 48; channels++) {
4941 DWConvMicrokernelTester()
4942 .cr(24)
4943 .kr(9)
4944 .channels(channels)
4945 .qmax(128)
4946 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4947 }
4948 }
4949
4950 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel) {
4951 TEST_REQUIRES_X86_XOP;
4952 for (size_t channels = 1; channels <= 120; channels += 23) {
4953 DWConvMicrokernelTester()
4954 .cr(24)
4955 .kr(9)
4956 .channels(channels)
4957 .width(3)
4958 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4959 }
4960 }
4961
4962 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel_with_step) {
4963 TEST_REQUIRES_X86_XOP;
4964 for (size_t channels = 1; channels <= 120; channels += 23) {
4965 for (size_t step = 2; step <= 9; step++) {
4966 DWConvMicrokernelTester()
4967 .cr(24)
4968 .kr(9)
4969 .channels(channels)
4970 .width(3)
4971 .step(step)
4972 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4973 }
4974 }
4975 }
4976
4977 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel_with_output_stride) {
4978 TEST_REQUIRES_X86_XOP;
4979 for (size_t channels = 1; channels <= 120; channels += 23) {
4980 DWConvMicrokernelTester()
4981 .cr(24)
4982 .kr(9)
4983 .channels(24)
4984 .width(5)
4985 .output_stride(127)
4986 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
4987 }
4988 }
4989
4990 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel_with_qmin) {
4991 TEST_REQUIRES_X86_XOP;
4992 for (size_t channels = 1; channels <= 120; channels += 23) {
4993 DWConvMicrokernelTester()
4994 .cr(24)
4995 .kr(9)
4996 .channels(channels)
4997 .width(3)
4998 .qmin(128)
4999 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
5000 }
5001 }
5002
5003 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel_with_qmax) {
5004 TEST_REQUIRES_X86_XOP;
5005 for (size_t channels = 1; channels <= 120; channels += 23) {
5006 DWConvMicrokernelTester()
5007 .cr(24)
5008 .kr(9)
5009 .channels(channels)
5010 .width(3)
5011 .qmax(128)
5012 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
5013 }
5014 }
5015
5016 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, input_offset) {
5017 TEST_REQUIRES_X86_XOP;
5018 for (uint32_t channels = 48; channels < 384; channels += 72) {
5019 DWConvMicrokernelTester()
5020 .cr(24)
5021 .kr(9)
5022 .channels(channels)
5023 .input_offset(464)
5024 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
5025 }
5026 }
5027
5028 TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, zero) {
5029 TEST_REQUIRES_X86_XOP;
5030 for (uint32_t mz = 0; mz < 9; mz++) {
5031 for (uint32_t channels = 48; channels < 384; channels += 72) {
5032 DWConvMicrokernelTester()
5033 .cr(24)
5034 .kr(9)
5035 .channels(channels)
5036 .input_offset(464)
5037 .zero_index(mz)
5038 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32);
5039 }
5040 }
5041 }
5042#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5043
5044
5045#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhand65a1522020-08-04 19:28:18 -07005046 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_eq_8) {
5047 TEST_REQUIRES_X86_AVX2;
5048 DWConvMicrokernelTester()
5049 .cr(8)
5050 .kr(9)
5051 .channels(8)
5052 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5053 }
5054
5055 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_div_8) {
5056 TEST_REQUIRES_X86_AVX2;
5057 for (uint32_t channels = 16; channels < 128; channels += 24) {
5058 DWConvMicrokernelTester()
5059 .cr(8)
5060 .kr(9)
5061 .channels(channels)
5062 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5063 }
5064 }
5065
5066 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
5067 TEST_REQUIRES_X86_AVX2;
5068 for (uint32_t channels = 16; channels < 128; channels += 24) {
5069 DWConvMicrokernelTester()
5070 .cr(8)
5071 .kr(9)
5072 .channels(channels)
5073 .qmin(128)
5074 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5075 }
5076 }
5077
5078 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
5079 TEST_REQUIRES_X86_AVX2;
5080 for (uint32_t channels = 16; channels < 128; channels += 24) {
5081 DWConvMicrokernelTester()
5082 .cr(8)
5083 .kr(9)
5084 .channels(channels)
5085 .qmax(128)
5086 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5087 }
5088 }
5089
5090 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_lt_8) {
5091 TEST_REQUIRES_X86_AVX2;
5092 for (uint32_t channels = 1; channels < 8; channels++) {
5093 DWConvMicrokernelTester()
5094 .cr(8)
5095 .kr(9)
5096 .channels(channels)
5097 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5098 }
5099 }
5100
5101 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_gt_8) {
5102 TEST_REQUIRES_X86_AVX2;
5103 for (uint32_t channels = 9; channels < 16; channels++) {
5104 DWConvMicrokernelTester()
5105 .cr(8)
5106 .kr(9)
5107 .channels(channels)
5108 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5109 }
5110 }
5111
5112 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
5113 TEST_REQUIRES_X86_AVX2;
5114 for (uint32_t channels = 9; channels < 16; channels++) {
5115 DWConvMicrokernelTester()
5116 .cr(8)
5117 .kr(9)
5118 .channels(channels)
5119 .qmin(128)
5120 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5121 }
5122 }
5123
5124 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
5125 TEST_REQUIRES_X86_AVX2;
5126 for (uint32_t channels = 9; channels < 16; channels++) {
5127 DWConvMicrokernelTester()
5128 .cr(8)
5129 .kr(9)
5130 .channels(channels)
5131 .qmax(128)
5132 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5133 }
5134 }
5135
5136 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel) {
5137 TEST_REQUIRES_X86_AVX2;
5138 for (size_t channels = 1; channels <= 40; channels += 7) {
5139 DWConvMicrokernelTester()
5140 .cr(8)
5141 .kr(9)
5142 .channels(channels)
5143 .width(3)
5144 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5145 }
5146 }
5147
5148 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_step) {
5149 TEST_REQUIRES_X86_AVX2;
5150 for (size_t channels = 1; channels <= 40; channels += 7) {
5151 for (size_t step = 2; step <= 9; step++) {
5152 DWConvMicrokernelTester()
5153 .cr(8)
5154 .kr(9)
5155 .channels(channels)
5156 .width(3)
5157 .step(step)
5158 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5159 }
5160 }
5161 }
5162
5163 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
5164 TEST_REQUIRES_X86_AVX2;
5165 for (size_t channels = 1; channels <= 40; channels += 7) {
5166 DWConvMicrokernelTester()
5167 .cr(8)
5168 .kr(9)
5169 .channels(8)
5170 .width(5)
5171 .output_stride(43)
5172 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5173 }
5174 }
5175
5176 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
5177 TEST_REQUIRES_X86_AVX2;
5178 for (size_t channels = 1; channels <= 40; channels += 7) {
5179 DWConvMicrokernelTester()
5180 .cr(8)
5181 .kr(9)
5182 .channels(channels)
5183 .width(3)
5184 .qmin(128)
5185 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5186 }
5187 }
5188
5189 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
5190 TEST_REQUIRES_X86_AVX2;
5191 for (size_t channels = 1; channels <= 40; channels += 7) {
5192 DWConvMicrokernelTester()
5193 .cr(8)
5194 .kr(9)
5195 .channels(channels)
5196 .width(3)
5197 .qmax(128)
5198 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5199 }
5200 }
5201
5202 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, input_offset) {
5203 TEST_REQUIRES_X86_AVX2;
5204 for (uint32_t channels = 16; channels < 128; channels += 24) {
5205 DWConvMicrokernelTester()
5206 .cr(8)
5207 .kr(9)
5208 .channels(channels)
5209 .input_offset(176)
5210 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5211 }
5212 }
5213
5214 TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, zero) {
5215 TEST_REQUIRES_X86_AVX2;
5216 for (uint32_t mz = 0; mz < 9; mz++) {
5217 for (uint32_t channels = 16; channels < 128; channels += 24) {
5218 DWConvMicrokernelTester()
5219 .cr(8)
5220 .kr(9)
5221 .channels(channels)
5222 .input_offset(176)
5223 .zero_index(mz)
5224 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32);
5225 }
5226 }
5227 }
5228#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5229
5230
5231#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5232 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_eq_16) {
5233 TEST_REQUIRES_X86_AVX2;
5234 DWConvMicrokernelTester()
5235 .cr(16)
5236 .kr(9)
5237 .channels(16)
5238 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5239 }
5240
5241 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_div_16) {
5242 TEST_REQUIRES_X86_AVX2;
5243 for (uint32_t channels = 32; channels < 256; channels += 48) {
5244 DWConvMicrokernelTester()
5245 .cr(16)
5246 .kr(9)
5247 .channels(channels)
5248 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5249 }
5250 }
5251
5252 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
5253 TEST_REQUIRES_X86_AVX2;
5254 for (uint32_t channels = 32; channels < 256; channels += 48) {
5255 DWConvMicrokernelTester()
5256 .cr(16)
5257 .kr(9)
5258 .channels(channels)
5259 .qmin(128)
5260 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5261 }
5262 }
5263
5264 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
5265 TEST_REQUIRES_X86_AVX2;
5266 for (uint32_t channels = 32; channels < 256; channels += 48) {
5267 DWConvMicrokernelTester()
5268 .cr(16)
5269 .kr(9)
5270 .channels(channels)
5271 .qmax(128)
5272 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5273 }
5274 }
5275
5276 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_lt_16) {
5277 TEST_REQUIRES_X86_AVX2;
5278 for (uint32_t channels = 1; channels < 16; channels++) {
5279 DWConvMicrokernelTester()
5280 .cr(16)
5281 .kr(9)
5282 .channels(channels)
5283 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5284 }
5285 }
5286
5287 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_gt_16) {
5288 TEST_REQUIRES_X86_AVX2;
5289 for (uint32_t channels = 17; channels < 32; channels++) {
5290 DWConvMicrokernelTester()
5291 .cr(16)
5292 .kr(9)
5293 .channels(channels)
5294 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5295 }
5296 }
5297
5298 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
5299 TEST_REQUIRES_X86_AVX2;
5300 for (uint32_t channels = 17; channels < 32; channels++) {
5301 DWConvMicrokernelTester()
5302 .cr(16)
5303 .kr(9)
5304 .channels(channels)
5305 .qmin(128)
5306 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5307 }
5308 }
5309
5310 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
5311 TEST_REQUIRES_X86_AVX2;
5312 for (uint32_t channels = 17; channels < 32; channels++) {
5313 DWConvMicrokernelTester()
5314 .cr(16)
5315 .kr(9)
5316 .channels(channels)
5317 .qmax(128)
5318 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5319 }
5320 }
5321
5322 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel) {
5323 TEST_REQUIRES_X86_AVX2;
5324 for (size_t channels = 1; channels <= 80; channels += 15) {
5325 DWConvMicrokernelTester()
5326 .cr(16)
5327 .kr(9)
5328 .channels(channels)
5329 .width(3)
5330 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5331 }
5332 }
5333
5334 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_step) {
5335 TEST_REQUIRES_X86_AVX2;
5336 for (size_t channels = 1; channels <= 80; channels += 15) {
5337 for (size_t step = 2; step <= 9; step++) {
5338 DWConvMicrokernelTester()
5339 .cr(16)
5340 .kr(9)
5341 .channels(channels)
5342 .width(3)
5343 .step(step)
5344 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5345 }
5346 }
5347 }
5348
5349 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
5350 TEST_REQUIRES_X86_AVX2;
5351 for (size_t channels = 1; channels <= 80; channels += 15) {
5352 DWConvMicrokernelTester()
5353 .cr(16)
5354 .kr(9)
5355 .channels(16)
5356 .width(5)
5357 .output_stride(83)
5358 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5359 }
5360 }
5361
5362 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
5363 TEST_REQUIRES_X86_AVX2;
5364 for (size_t channels = 1; channels <= 80; channels += 15) {
5365 DWConvMicrokernelTester()
5366 .cr(16)
5367 .kr(9)
5368 .channels(channels)
5369 .width(3)
5370 .qmin(128)
5371 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5372 }
5373 }
5374
5375 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
5376 TEST_REQUIRES_X86_AVX2;
5377 for (size_t channels = 1; channels <= 80; channels += 15) {
5378 DWConvMicrokernelTester()
5379 .cr(16)
5380 .kr(9)
5381 .channels(channels)
5382 .width(3)
5383 .qmax(128)
5384 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5385 }
5386 }
5387
5388 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, input_offset) {
5389 TEST_REQUIRES_X86_AVX2;
5390 for (uint32_t channels = 32; channels < 256; channels += 48) {
5391 DWConvMicrokernelTester()
5392 .cr(16)
5393 .kr(9)
5394 .channels(channels)
5395 .input_offset(304)
5396 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5397 }
5398 }
5399
5400 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, zero) {
5401 TEST_REQUIRES_X86_AVX2;
5402 for (uint32_t mz = 0; mz < 9; mz++) {
5403 for (uint32_t channels = 32; channels < 256; channels += 48) {
5404 DWConvMicrokernelTester()
5405 .cr(16)
5406 .kr(9)
5407 .channels(channels)
5408 .input_offset(304)
5409 .zero_index(mz)
5410 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32);
5411 }
5412 }
5413 }
5414#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5415
5416
5417#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5418 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_eq_24) {
5419 TEST_REQUIRES_X86_AVX2;
5420 DWConvMicrokernelTester()
5421 .cr(24)
5422 .kr(9)
5423 .channels(24)
5424 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5425 }
5426
5427 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_div_24) {
5428 TEST_REQUIRES_X86_AVX2;
5429 for (uint32_t channels = 48; channels < 384; channels += 72) {
5430 DWConvMicrokernelTester()
5431 .cr(24)
5432 .kr(9)
5433 .channels(channels)
5434 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5435 }
5436 }
5437
5438 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
5439 TEST_REQUIRES_X86_AVX2;
5440 for (uint32_t channels = 48; channels < 384; channels += 72) {
5441 DWConvMicrokernelTester()
5442 .cr(24)
5443 .kr(9)
5444 .channels(channels)
5445 .qmin(128)
5446 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5447 }
5448 }
5449
5450 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
5451 TEST_REQUIRES_X86_AVX2;
5452 for (uint32_t channels = 48; channels < 384; channels += 72) {
5453 DWConvMicrokernelTester()
5454 .cr(24)
5455 .kr(9)
5456 .channels(channels)
5457 .qmax(128)
5458 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5459 }
5460 }
5461
5462 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_lt_24) {
5463 TEST_REQUIRES_X86_AVX2;
5464 for (uint32_t channels = 1; channels < 24; channels++) {
5465 DWConvMicrokernelTester()
5466 .cr(24)
5467 .kr(9)
5468 .channels(channels)
5469 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5470 }
5471 }
5472
5473 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_gt_24) {
5474 TEST_REQUIRES_X86_AVX2;
5475 for (uint32_t channels = 25; channels < 48; channels++) {
5476 DWConvMicrokernelTester()
5477 .cr(24)
5478 .kr(9)
5479 .channels(channels)
5480 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5481 }
5482 }
5483
5484 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
5485 TEST_REQUIRES_X86_AVX2;
5486 for (uint32_t channels = 25; channels < 48; channels++) {
5487 DWConvMicrokernelTester()
5488 .cr(24)
5489 .kr(9)
5490 .channels(channels)
5491 .qmin(128)
5492 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5493 }
5494 }
5495
5496 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
5497 TEST_REQUIRES_X86_AVX2;
5498 for (uint32_t channels = 25; channels < 48; channels++) {
5499 DWConvMicrokernelTester()
5500 .cr(24)
5501 .kr(9)
5502 .channels(channels)
5503 .qmax(128)
5504 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5505 }
5506 }
5507
5508 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel) {
5509 TEST_REQUIRES_X86_AVX2;
5510 for (size_t channels = 1; channels <= 120; channels += 23) {
5511 DWConvMicrokernelTester()
5512 .cr(24)
5513 .kr(9)
5514 .channels(channels)
5515 .width(3)
5516 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5517 }
5518 }
5519
5520 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_step) {
5521 TEST_REQUIRES_X86_AVX2;
5522 for (size_t channels = 1; channels <= 120; channels += 23) {
5523 for (size_t step = 2; step <= 9; step++) {
5524 DWConvMicrokernelTester()
5525 .cr(24)
5526 .kr(9)
5527 .channels(channels)
5528 .width(3)
5529 .step(step)
5530 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5531 }
5532 }
5533 }
5534
5535 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
5536 TEST_REQUIRES_X86_AVX2;
5537 for (size_t channels = 1; channels <= 120; channels += 23) {
5538 DWConvMicrokernelTester()
5539 .cr(24)
5540 .kr(9)
5541 .channels(24)
5542 .width(5)
5543 .output_stride(127)
5544 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5545 }
5546 }
5547
5548 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
5549 TEST_REQUIRES_X86_AVX2;
5550 for (size_t channels = 1; channels <= 120; channels += 23) {
5551 DWConvMicrokernelTester()
5552 .cr(24)
5553 .kr(9)
5554 .channels(channels)
5555 .width(3)
5556 .qmin(128)
5557 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5558 }
5559 }
5560
5561 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
5562 TEST_REQUIRES_X86_AVX2;
5563 for (size_t channels = 1; channels <= 120; channels += 23) {
5564 DWConvMicrokernelTester()
5565 .cr(24)
5566 .kr(9)
5567 .channels(channels)
5568 .width(3)
5569 .qmax(128)
5570 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5571 }
5572 }
5573
5574 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, input_offset) {
5575 TEST_REQUIRES_X86_AVX2;
5576 for (uint32_t channels = 48; channels < 384; channels += 72) {
5577 DWConvMicrokernelTester()
5578 .cr(24)
5579 .kr(9)
5580 .channels(channels)
5581 .input_offset(464)
5582 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5583 }
5584 }
5585
5586 TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, zero) {
5587 TEST_REQUIRES_X86_AVX2;
5588 for (uint32_t mz = 0; mz < 9; mz++) {
5589 for (uint32_t channels = 48; channels < 384; channels += 72) {
5590 DWConvMicrokernelTester()
5591 .cr(24)
5592 .kr(9)
5593 .channels(channels)
5594 .input_offset(464)
5595 .zero_index(mz)
5596 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32);
5597 }
5598 }
5599 }
5600#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5601
5602
5603#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5604 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_eq_32) {
5605 TEST_REQUIRES_X86_AVX2;
5606 DWConvMicrokernelTester()
5607 .cr(32)
5608 .kr(9)
5609 .channels(32)
5610 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5611 }
5612
5613 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_div_32) {
5614 TEST_REQUIRES_X86_AVX2;
5615 for (uint32_t channels = 64; channels < 512; channels += 96) {
5616 DWConvMicrokernelTester()
5617 .cr(32)
5618 .kr(9)
5619 .channels(channels)
5620 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5621 }
5622 }
5623
5624 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
5625 TEST_REQUIRES_X86_AVX2;
5626 for (uint32_t channels = 64; channels < 512; channels += 96) {
5627 DWConvMicrokernelTester()
5628 .cr(32)
5629 .kr(9)
5630 .channels(channels)
5631 .qmin(128)
5632 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5633 }
5634 }
5635
5636 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
5637 TEST_REQUIRES_X86_AVX2;
5638 for (uint32_t channels = 64; channels < 512; channels += 96) {
5639 DWConvMicrokernelTester()
5640 .cr(32)
5641 .kr(9)
5642 .channels(channels)
5643 .qmax(128)
5644 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5645 }
5646 }
5647
5648 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_lt_32) {
5649 TEST_REQUIRES_X86_AVX2;
5650 for (uint32_t channels = 1; channels < 32; channels++) {
5651 DWConvMicrokernelTester()
5652 .cr(32)
5653 .kr(9)
5654 .channels(channels)
5655 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5656 }
5657 }
5658
5659 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_gt_32) {
5660 TEST_REQUIRES_X86_AVX2;
5661 for (uint32_t channels = 33; channels < 64; channels++) {
5662 DWConvMicrokernelTester()
5663 .cr(32)
5664 .kr(9)
5665 .channels(channels)
5666 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5667 }
5668 }
5669
5670 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
5671 TEST_REQUIRES_X86_AVX2;
5672 for (uint32_t channels = 33; channels < 64; channels++) {
5673 DWConvMicrokernelTester()
5674 .cr(32)
5675 .kr(9)
5676 .channels(channels)
5677 .qmin(128)
5678 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5679 }
5680 }
5681
5682 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
5683 TEST_REQUIRES_X86_AVX2;
5684 for (uint32_t channels = 33; channels < 64; channels++) {
5685 DWConvMicrokernelTester()
5686 .cr(32)
5687 .kr(9)
5688 .channels(channels)
5689 .qmax(128)
5690 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5691 }
5692 }
5693
5694 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel) {
5695 TEST_REQUIRES_X86_AVX2;
5696 for (size_t channels = 1; channels <= 160; channels += 31) {
5697 DWConvMicrokernelTester()
5698 .cr(32)
5699 .kr(9)
5700 .channels(channels)
5701 .width(3)
5702 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5703 }
5704 }
5705
5706 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_step) {
5707 TEST_REQUIRES_X86_AVX2;
5708 for (size_t channels = 1; channels <= 160; channels += 31) {
5709 for (size_t step = 2; step <= 9; step++) {
5710 DWConvMicrokernelTester()
5711 .cr(32)
5712 .kr(9)
5713 .channels(channels)
5714 .width(3)
5715 .step(step)
5716 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5717 }
5718 }
5719 }
5720
5721 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
5722 TEST_REQUIRES_X86_AVX2;
5723 for (size_t channels = 1; channels <= 160; channels += 31) {
5724 DWConvMicrokernelTester()
5725 .cr(32)
5726 .kr(9)
5727 .channels(32)
5728 .width(5)
5729 .output_stride(163)
5730 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5731 }
5732 }
5733
5734 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
5735 TEST_REQUIRES_X86_AVX2;
5736 for (size_t channels = 1; channels <= 160; channels += 31) {
5737 DWConvMicrokernelTester()
5738 .cr(32)
5739 .kr(9)
5740 .channels(channels)
5741 .width(3)
5742 .qmin(128)
5743 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5744 }
5745 }
5746
5747 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
5748 TEST_REQUIRES_X86_AVX2;
5749 for (size_t channels = 1; channels <= 160; channels += 31) {
5750 DWConvMicrokernelTester()
5751 .cr(32)
5752 .kr(9)
5753 .channels(channels)
5754 .width(3)
5755 .qmax(128)
5756 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5757 }
5758 }
5759
5760 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, input_offset) {
5761 TEST_REQUIRES_X86_AVX2;
5762 for (uint32_t channels = 64; channels < 512; channels += 96) {
5763 DWConvMicrokernelTester()
5764 .cr(32)
5765 .kr(9)
5766 .channels(channels)
5767 .input_offset(592)
5768 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5769 }
5770 }
5771
5772 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, zero) {
5773 TEST_REQUIRES_X86_AVX2;
5774 for (uint32_t mz = 0; mz < 9; mz++) {
5775 for (uint32_t channels = 64; channels < 512; channels += 96) {
5776 DWConvMicrokernelTester()
5777 .cr(32)
5778 .kr(9)
5779 .channels(channels)
5780 .input_offset(592)
5781 .zero_index(mz)
5782 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32);
5783 }
5784 }
5785 }
5786#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancc8f34c2020-08-05 16:36:38 -07005787
5788
Marat Dukhan2ffc5e62020-09-06 22:33:38 -07005789#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5790 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_eq_16) {
5791 TEST_REQUIRES_X86_AVX512SKX;
5792 DWConvMicrokernelTester()
5793 .cr(16)
5794 .kr(9)
5795 .channels(16)
5796 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5797 }
5798
5799 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_div_16) {
5800 TEST_REQUIRES_X86_AVX512SKX;
5801 for (uint32_t channels = 32; channels < 256; channels += 48) {
5802 DWConvMicrokernelTester()
5803 .cr(16)
5804 .kr(9)
5805 .channels(channels)
5806 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5807 }
5808 }
5809
5810 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
5811 TEST_REQUIRES_X86_AVX512SKX;
5812 for (uint32_t channels = 32; channels < 256; channels += 48) {
5813 DWConvMicrokernelTester()
5814 .cr(16)
5815 .kr(9)
5816 .channels(channels)
5817 .qmin(128)
5818 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5819 }
5820 }
5821
5822 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
5823 TEST_REQUIRES_X86_AVX512SKX;
5824 for (uint32_t channels = 32; channels < 256; channels += 48) {
5825 DWConvMicrokernelTester()
5826 .cr(16)
5827 .kr(9)
5828 .channels(channels)
5829 .qmax(128)
5830 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5831 }
5832 }
5833
5834 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_lt_16) {
5835 TEST_REQUIRES_X86_AVX512SKX;
5836 for (uint32_t channels = 1; channels < 16; channels++) {
5837 DWConvMicrokernelTester()
5838 .cr(16)
5839 .kr(9)
5840 .channels(channels)
5841 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5842 }
5843 }
5844
5845 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_gt_16) {
5846 TEST_REQUIRES_X86_AVX512SKX;
5847 for (uint32_t channels = 17; channels < 32; channels++) {
5848 DWConvMicrokernelTester()
5849 .cr(16)
5850 .kr(9)
5851 .channels(channels)
5852 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5853 }
5854 }
5855
5856 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
5857 TEST_REQUIRES_X86_AVX512SKX;
5858 for (uint32_t channels = 17; channels < 32; channels++) {
5859 DWConvMicrokernelTester()
5860 .cr(16)
5861 .kr(9)
5862 .channels(channels)
5863 .qmin(128)
5864 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5865 }
5866 }
5867
5868 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
5869 TEST_REQUIRES_X86_AVX512SKX;
5870 for (uint32_t channels = 17; channels < 32; channels++) {
5871 DWConvMicrokernelTester()
5872 .cr(16)
5873 .kr(9)
5874 .channels(channels)
5875 .qmax(128)
5876 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5877 }
5878 }
5879
5880 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel) {
5881 TEST_REQUIRES_X86_AVX512SKX;
5882 for (size_t channels = 1; channels <= 80; channels += 15) {
5883 DWConvMicrokernelTester()
5884 .cr(16)
5885 .kr(9)
5886 .channels(channels)
5887 .width(3)
5888 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5889 }
5890 }
5891
5892 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
5893 TEST_REQUIRES_X86_AVX512SKX;
5894 for (size_t channels = 1; channels <= 80; channels += 15) {
5895 for (size_t step = 2; step <= 9; step++) {
5896 DWConvMicrokernelTester()
5897 .cr(16)
5898 .kr(9)
5899 .channels(channels)
5900 .width(3)
5901 .step(step)
5902 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5903 }
5904 }
5905 }
5906
5907 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
5908 TEST_REQUIRES_X86_AVX512SKX;
5909 for (size_t channels = 1; channels <= 80; channels += 15) {
5910 DWConvMicrokernelTester()
5911 .cr(16)
5912 .kr(9)
5913 .channels(16)
5914 .width(5)
5915 .output_stride(83)
5916 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5917 }
5918 }
5919
5920 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
5921 TEST_REQUIRES_X86_AVX512SKX;
5922 for (size_t channels = 1; channels <= 80; channels += 15) {
5923 DWConvMicrokernelTester()
5924 .cr(16)
5925 .kr(9)
5926 .channels(channels)
5927 .width(3)
5928 .qmin(128)
5929 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5930 }
5931 }
5932
5933 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
5934 TEST_REQUIRES_X86_AVX512SKX;
5935 for (size_t channels = 1; channels <= 80; channels += 15) {
5936 DWConvMicrokernelTester()
5937 .cr(16)
5938 .kr(9)
5939 .channels(channels)
5940 .width(3)
5941 .qmax(128)
5942 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5943 }
5944 }
5945
5946 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, input_offset) {
5947 TEST_REQUIRES_X86_AVX512SKX;
5948 for (uint32_t channels = 32; channels < 256; channels += 48) {
5949 DWConvMicrokernelTester()
5950 .cr(16)
5951 .kr(9)
5952 .channels(channels)
5953 .input_offset(304)
5954 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5955 }
5956 }
5957
5958 TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, zero) {
5959 TEST_REQUIRES_X86_AVX512SKX;
5960 for (uint32_t mz = 0; mz < 9; mz++) {
5961 for (uint32_t channels = 32; channels < 256; channels += 48) {
5962 DWConvMicrokernelTester()
5963 .cr(16)
5964 .kr(9)
5965 .channels(channels)
5966 .input_offset(304)
5967 .zero_index(mz)
5968 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32);
5969 }
5970 }
5971 }
5972#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5973
5974
5975#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5976 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_eq_32) {
5977 TEST_REQUIRES_X86_AVX512SKX;
5978 DWConvMicrokernelTester()
5979 .cr(32)
5980 .kr(9)
5981 .channels(32)
5982 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
5983 }
5984
5985 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_div_32) {
5986 TEST_REQUIRES_X86_AVX512SKX;
5987 for (uint32_t channels = 64; channels < 512; channels += 96) {
5988 DWConvMicrokernelTester()
5989 .cr(32)
5990 .kr(9)
5991 .channels(channels)
5992 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
5993 }
5994 }
5995
5996 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
5997 TEST_REQUIRES_X86_AVX512SKX;
5998 for (uint32_t channels = 64; channels < 512; channels += 96) {
5999 DWConvMicrokernelTester()
6000 .cr(32)
6001 .kr(9)
6002 .channels(channels)
6003 .qmin(128)
6004 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6005 }
6006 }
6007
6008 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
6009 TEST_REQUIRES_X86_AVX512SKX;
6010 for (uint32_t channels = 64; channels < 512; channels += 96) {
6011 DWConvMicrokernelTester()
6012 .cr(32)
6013 .kr(9)
6014 .channels(channels)
6015 .qmax(128)
6016 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6017 }
6018 }
6019
6020 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_lt_32) {
6021 TEST_REQUIRES_X86_AVX512SKX;
6022 for (uint32_t channels = 1; channels < 32; channels++) {
6023 DWConvMicrokernelTester()
6024 .cr(32)
6025 .kr(9)
6026 .channels(channels)
6027 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6028 }
6029 }
6030
6031 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_gt_32) {
6032 TEST_REQUIRES_X86_AVX512SKX;
6033 for (uint32_t channels = 33; channels < 64; channels++) {
6034 DWConvMicrokernelTester()
6035 .cr(32)
6036 .kr(9)
6037 .channels(channels)
6038 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6039 }
6040 }
6041
6042 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
6043 TEST_REQUIRES_X86_AVX512SKX;
6044 for (uint32_t channels = 33; channels < 64; channels++) {
6045 DWConvMicrokernelTester()
6046 .cr(32)
6047 .kr(9)
6048 .channels(channels)
6049 .qmin(128)
6050 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6051 }
6052 }
6053
6054 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
6055 TEST_REQUIRES_X86_AVX512SKX;
6056 for (uint32_t channels = 33; channels < 64; channels++) {
6057 DWConvMicrokernelTester()
6058 .cr(32)
6059 .kr(9)
6060 .channels(channels)
6061 .qmax(128)
6062 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6063 }
6064 }
6065
6066 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel) {
6067 TEST_REQUIRES_X86_AVX512SKX;
6068 for (size_t channels = 1; channels <= 160; channels += 31) {
6069 DWConvMicrokernelTester()
6070 .cr(32)
6071 .kr(9)
6072 .channels(channels)
6073 .width(3)
6074 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6075 }
6076 }
6077
6078 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
6079 TEST_REQUIRES_X86_AVX512SKX;
6080 for (size_t channels = 1; channels <= 160; channels += 31) {
6081 for (size_t step = 2; step <= 9; step++) {
6082 DWConvMicrokernelTester()
6083 .cr(32)
6084 .kr(9)
6085 .channels(channels)
6086 .width(3)
6087 .step(step)
6088 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6089 }
6090 }
6091 }
6092
6093 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
6094 TEST_REQUIRES_X86_AVX512SKX;
6095 for (size_t channels = 1; channels <= 160; channels += 31) {
6096 DWConvMicrokernelTester()
6097 .cr(32)
6098 .kr(9)
6099 .channels(32)
6100 .width(5)
6101 .output_stride(163)
6102 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6103 }
6104 }
6105
6106 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
6107 TEST_REQUIRES_X86_AVX512SKX;
6108 for (size_t channels = 1; channels <= 160; channels += 31) {
6109 DWConvMicrokernelTester()
6110 .cr(32)
6111 .kr(9)
6112 .channels(channels)
6113 .width(3)
6114 .qmin(128)
6115 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6116 }
6117 }
6118
6119 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
6120 TEST_REQUIRES_X86_AVX512SKX;
6121 for (size_t channels = 1; channels <= 160; channels += 31) {
6122 DWConvMicrokernelTester()
6123 .cr(32)
6124 .kr(9)
6125 .channels(channels)
6126 .width(3)
6127 .qmax(128)
6128 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6129 }
6130 }
6131
6132 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, input_offset) {
6133 TEST_REQUIRES_X86_AVX512SKX;
6134 for (uint32_t channels = 64; channels < 512; channels += 96) {
6135 DWConvMicrokernelTester()
6136 .cr(32)
6137 .kr(9)
6138 .channels(channels)
6139 .input_offset(592)
6140 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6141 }
6142 }
6143
6144 TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, zero) {
6145 TEST_REQUIRES_X86_AVX512SKX;
6146 for (uint32_t mz = 0; mz < 9; mz++) {
6147 for (uint32_t channels = 64; channels < 512; channels += 96) {
6148 DWConvMicrokernelTester()
6149 .cr(32)
6150 .kr(9)
6151 .channels(channels)
6152 .input_offset(592)
6153 .zero_index(mz)
6154 .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32);
6155 }
6156 }
6157 }
6158#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6159
6160
Marat Dukhancc8f34c2020-08-05 16:36:38 -07006161#if XNN_ARCH_WASMSIMD
6162 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_eq_8) {
6163 DWConvMicrokernelTester()
6164 .cr(8)
6165 .kr(9)
6166 .channels(8)
6167 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6168 }
6169
6170 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_div_8) {
6171 for (uint32_t channels = 16; channels < 128; channels += 24) {
6172 DWConvMicrokernelTester()
6173 .cr(8)
6174 .kr(9)
6175 .channels(channels)
6176 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6177 }
6178 }
6179
6180 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
6181 for (uint32_t channels = 16; channels < 128; channels += 24) {
6182 DWConvMicrokernelTester()
6183 .cr(8)
6184 .kr(9)
6185 .channels(channels)
6186 .qmin(128)
6187 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6188 }
6189 }
6190
6191 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
6192 for (uint32_t channels = 16; channels < 128; channels += 24) {
6193 DWConvMicrokernelTester()
6194 .cr(8)
6195 .kr(9)
6196 .channels(channels)
6197 .qmax(128)
6198 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6199 }
6200 }
6201
6202 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_lt_8) {
6203 for (uint32_t channels = 1; channels < 8; channels++) {
6204 DWConvMicrokernelTester()
6205 .cr(8)
6206 .kr(9)
6207 .channels(channels)
6208 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6209 }
6210 }
6211
6212 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_gt_8) {
6213 for (uint32_t channels = 9; channels < 16; channels++) {
6214 DWConvMicrokernelTester()
6215 .cr(8)
6216 .kr(9)
6217 .channels(channels)
6218 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6219 }
6220 }
6221
6222 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
6223 for (uint32_t channels = 9; channels < 16; channels++) {
6224 DWConvMicrokernelTester()
6225 .cr(8)
6226 .kr(9)
6227 .channels(channels)
6228 .qmin(128)
6229 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6230 }
6231 }
6232
6233 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
6234 for (uint32_t channels = 9; channels < 16; channels++) {
6235 DWConvMicrokernelTester()
6236 .cr(8)
6237 .kr(9)
6238 .channels(channels)
6239 .qmax(128)
6240 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6241 }
6242 }
6243
6244 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel) {
6245 for (size_t channels = 1; channels <= 40; channels += 7) {
6246 DWConvMicrokernelTester()
6247 .cr(8)
6248 .kr(9)
6249 .channels(channels)
6250 .width(3)
6251 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6252 }
6253 }
6254
6255 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
6256 for (size_t channels = 1; channels <= 40; channels += 7) {
6257 for (size_t step = 2; step <= 9; step++) {
6258 DWConvMicrokernelTester()
6259 .cr(8)
6260 .kr(9)
6261 .channels(channels)
6262 .width(3)
6263 .step(step)
6264 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6265 }
6266 }
6267 }
6268
6269 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
6270 for (size_t channels = 1; channels <= 40; channels += 7) {
6271 DWConvMicrokernelTester()
6272 .cr(8)
6273 .kr(9)
6274 .channels(8)
6275 .width(5)
6276 .output_stride(43)
6277 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6278 }
6279 }
6280
6281 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
6282 for (size_t channels = 1; channels <= 40; channels += 7) {
6283 DWConvMicrokernelTester()
6284 .cr(8)
6285 .kr(9)
6286 .channels(channels)
6287 .width(3)
6288 .qmin(128)
6289 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6290 }
6291 }
6292
6293 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
6294 for (size_t channels = 1; channels <= 40; channels += 7) {
6295 DWConvMicrokernelTester()
6296 .cr(8)
6297 .kr(9)
6298 .channels(channels)
6299 .width(3)
6300 .qmax(128)
6301 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6302 }
6303 }
6304
6305 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, input_offset) {
6306 for (uint32_t channels = 16; channels < 128; channels += 24) {
6307 DWConvMicrokernelTester()
6308 .cr(8)
6309 .kr(9)
6310 .channels(channels)
6311 .input_offset(176)
6312 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6313 }
6314 }
6315
6316 TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, zero) {
6317 for (uint32_t mz = 0; mz < 9; mz++) {
6318 for (uint32_t channels = 16; channels < 128; channels += 24) {
6319 DWConvMicrokernelTester()
6320 .cr(8)
6321 .kr(9)
6322 .channels(channels)
6323 .input_offset(176)
6324 .zero_index(mz)
6325 .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16);
6326 }
6327 }
6328 }
6329#endif // XNN_ARCH_WASMSIMD
6330
6331
6332#if XNN_ARCH_WASMSIMD
6333 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_eq_16) {
6334 DWConvMicrokernelTester()
6335 .cr(16)
6336 .kr(9)
6337 .channels(16)
6338 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6339 }
6340
6341 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_div_16) {
6342 for (uint32_t channels = 32; channels < 256; channels += 48) {
6343 DWConvMicrokernelTester()
6344 .cr(16)
6345 .kr(9)
6346 .channels(channels)
6347 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6348 }
6349 }
6350
6351 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
6352 for (uint32_t channels = 32; channels < 256; channels += 48) {
6353 DWConvMicrokernelTester()
6354 .cr(16)
6355 .kr(9)
6356 .channels(channels)
6357 .qmin(128)
6358 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6359 }
6360 }
6361
6362 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
6363 for (uint32_t channels = 32; channels < 256; channels += 48) {
6364 DWConvMicrokernelTester()
6365 .cr(16)
6366 .kr(9)
6367 .channels(channels)
6368 .qmax(128)
6369 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6370 }
6371 }
6372
6373 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_lt_16) {
6374 for (uint32_t channels = 1; channels < 16; channels++) {
6375 DWConvMicrokernelTester()
6376 .cr(16)
6377 .kr(9)
6378 .channels(channels)
6379 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6380 }
6381 }
6382
6383 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_gt_16) {
6384 for (uint32_t channels = 17; channels < 32; channels++) {
6385 DWConvMicrokernelTester()
6386 .cr(16)
6387 .kr(9)
6388 .channels(channels)
6389 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6390 }
6391 }
6392
6393 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
6394 for (uint32_t channels = 17; channels < 32; channels++) {
6395 DWConvMicrokernelTester()
6396 .cr(16)
6397 .kr(9)
6398 .channels(channels)
6399 .qmin(128)
6400 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6401 }
6402 }
6403
6404 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
6405 for (uint32_t channels = 17; channels < 32; channels++) {
6406 DWConvMicrokernelTester()
6407 .cr(16)
6408 .kr(9)
6409 .channels(channels)
6410 .qmax(128)
6411 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6412 }
6413 }
6414
6415 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel) {
6416 for (size_t channels = 1; channels <= 80; channels += 15) {
6417 DWConvMicrokernelTester()
6418 .cr(16)
6419 .kr(9)
6420 .channels(channels)
6421 .width(3)
6422 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6423 }
6424 }
6425
6426 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
6427 for (size_t channels = 1; channels <= 80; channels += 15) {
6428 for (size_t step = 2; step <= 9; step++) {
6429 DWConvMicrokernelTester()
6430 .cr(16)
6431 .kr(9)
6432 .channels(channels)
6433 .width(3)
6434 .step(step)
6435 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6436 }
6437 }
6438 }
6439
6440 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
6441 for (size_t channels = 1; channels <= 80; channels += 15) {
6442 DWConvMicrokernelTester()
6443 .cr(16)
6444 .kr(9)
6445 .channels(16)
6446 .width(5)
6447 .output_stride(83)
6448 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6449 }
6450 }
6451
6452 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
6453 for (size_t channels = 1; channels <= 80; channels += 15) {
6454 DWConvMicrokernelTester()
6455 .cr(16)
6456 .kr(9)
6457 .channels(channels)
6458 .width(3)
6459 .qmin(128)
6460 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6461 }
6462 }
6463
6464 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
6465 for (size_t channels = 1; channels <= 80; channels += 15) {
6466 DWConvMicrokernelTester()
6467 .cr(16)
6468 .kr(9)
6469 .channels(channels)
6470 .width(3)
6471 .qmax(128)
6472 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6473 }
6474 }
6475
6476 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, input_offset) {
6477 for (uint32_t channels = 32; channels < 256; channels += 48) {
6478 DWConvMicrokernelTester()
6479 .cr(16)
6480 .kr(9)
6481 .channels(channels)
6482 .input_offset(304)
6483 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6484 }
6485 }
6486
6487 TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, zero) {
6488 for (uint32_t mz = 0; mz < 9; mz++) {
6489 for (uint32_t channels = 32; channels < 256; channels += 48) {
6490 DWConvMicrokernelTester()
6491 .cr(16)
6492 .kr(9)
6493 .channels(channels)
6494 .input_offset(304)
6495 .zero_index(mz)
6496 .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16);
6497 }
6498 }
6499 }
6500#endif // XNN_ARCH_WASMSIMD
6501
6502
6503#if XNN_ARCH_WASMSIMD
6504 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_eq_24) {
6505 DWConvMicrokernelTester()
6506 .cr(24)
6507 .kr(9)
6508 .channels(24)
6509 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6510 }
6511
6512 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_div_24) {
6513 for (uint32_t channels = 48; channels < 384; channels += 72) {
6514 DWConvMicrokernelTester()
6515 .cr(24)
6516 .kr(9)
6517 .channels(channels)
6518 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6519 }
6520 }
6521
6522 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
6523 for (uint32_t channels = 48; channels < 384; channels += 72) {
6524 DWConvMicrokernelTester()
6525 .cr(24)
6526 .kr(9)
6527 .channels(channels)
6528 .qmin(128)
6529 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6530 }
6531 }
6532
6533 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
6534 for (uint32_t channels = 48; channels < 384; channels += 72) {
6535 DWConvMicrokernelTester()
6536 .cr(24)
6537 .kr(9)
6538 .channels(channels)
6539 .qmax(128)
6540 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6541 }
6542 }
6543
6544 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_lt_24) {
6545 for (uint32_t channels = 1; channels < 24; channels++) {
6546 DWConvMicrokernelTester()
6547 .cr(24)
6548 .kr(9)
6549 .channels(channels)
6550 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6551 }
6552 }
6553
6554 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_gt_24) {
6555 for (uint32_t channels = 25; channels < 48; channels++) {
6556 DWConvMicrokernelTester()
6557 .cr(24)
6558 .kr(9)
6559 .channels(channels)
6560 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6561 }
6562 }
6563
6564 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
6565 for (uint32_t channels = 25; channels < 48; channels++) {
6566 DWConvMicrokernelTester()
6567 .cr(24)
6568 .kr(9)
6569 .channels(channels)
6570 .qmin(128)
6571 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6572 }
6573 }
6574
6575 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
6576 for (uint32_t channels = 25; channels < 48; channels++) {
6577 DWConvMicrokernelTester()
6578 .cr(24)
6579 .kr(9)
6580 .channels(channels)
6581 .qmax(128)
6582 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6583 }
6584 }
6585
6586 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel) {
6587 for (size_t channels = 1; channels <= 120; channels += 23) {
6588 DWConvMicrokernelTester()
6589 .cr(24)
6590 .kr(9)
6591 .channels(channels)
6592 .width(3)
6593 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6594 }
6595 }
6596
6597 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
6598 for (size_t channels = 1; channels <= 120; channels += 23) {
6599 for (size_t step = 2; step <= 9; step++) {
6600 DWConvMicrokernelTester()
6601 .cr(24)
6602 .kr(9)
6603 .channels(channels)
6604 .width(3)
6605 .step(step)
6606 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6607 }
6608 }
6609 }
6610
6611 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
6612 for (size_t channels = 1; channels <= 120; channels += 23) {
6613 DWConvMicrokernelTester()
6614 .cr(24)
6615 .kr(9)
6616 .channels(24)
6617 .width(5)
6618 .output_stride(127)
6619 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6620 }
6621 }
6622
6623 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
6624 for (size_t channels = 1; channels <= 120; channels += 23) {
6625 DWConvMicrokernelTester()
6626 .cr(24)
6627 .kr(9)
6628 .channels(channels)
6629 .width(3)
6630 .qmin(128)
6631 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6632 }
6633 }
6634
6635 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
6636 for (size_t channels = 1; channels <= 120; channels += 23) {
6637 DWConvMicrokernelTester()
6638 .cr(24)
6639 .kr(9)
6640 .channels(channels)
6641 .width(3)
6642 .qmax(128)
6643 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6644 }
6645 }
6646
6647 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, input_offset) {
6648 for (uint32_t channels = 48; channels < 384; channels += 72) {
6649 DWConvMicrokernelTester()
6650 .cr(24)
6651 .kr(9)
6652 .channels(channels)
6653 .input_offset(464)
6654 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6655 }
6656 }
6657
6658 TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, zero) {
6659 for (uint32_t mz = 0; mz < 9; mz++) {
6660 for (uint32_t channels = 48; channels < 384; channels += 72) {
6661 DWConvMicrokernelTester()
6662 .cr(24)
6663 .kr(9)
6664 .channels(channels)
6665 .input_offset(464)
6666 .zero_index(mz)
6667 .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16);
6668 }
6669 }
6670 }
6671#endif // XNN_ARCH_WASMSIMD