blob: cb54d3ef8435830232257abb3428b5d247e1ab57 [file] [log] [blame]
Marat Dukhan9b474cf2021-05-25 16:37:48 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qs8-dwconv-minmax-fp32.yaml
11// Generator: tools/generate-dwconv-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/dwconv.h>
20#include "dwconv-microkernel-tester.h"
21
22
23#if XNN_ARCH_X86 || XNN_ARCH_X86_64
24 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_eq_16) {
25 TEST_REQUIRES_X86_AVX2;
26 DWConvMicrokernelTester()
27 .cr(16)
28 .kr(9)
29 .channels(16)
30 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
31 }
32
33 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_div_16) {
34 TEST_REQUIRES_X86_AVX2;
35 for (uint32_t channels = 32; channels < 256; channels += 48) {
36 DWConvMicrokernelTester()
37 .cr(16)
38 .kr(9)
39 .channels(channels)
40 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
41 }
42 }
43
44 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_div_16_with_qmin) {
45 TEST_REQUIRES_X86_AVX2;
46 for (uint32_t channels = 32; channels < 256; channels += 48) {
47 DWConvMicrokernelTester()
48 .cr(16)
49 .kr(9)
50 .channels(channels)
51 .qmin(128)
52 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
53 }
54 }
55
56 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_div_16_with_qmax) {
57 TEST_REQUIRES_X86_AVX2;
58 for (uint32_t channels = 32; channels < 256; channels += 48) {
59 DWConvMicrokernelTester()
60 .cr(16)
61 .kr(9)
62 .channels(channels)
63 .qmax(128)
64 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
65 }
66 }
67
68 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_lt_16) {
69 TEST_REQUIRES_X86_AVX2;
70 for (uint32_t channels = 1; channels < 16; channels++) {
71 DWConvMicrokernelTester()
72 .cr(16)
73 .kr(9)
74 .channels(channels)
75 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
76 }
77 }
78
79 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_gt_16) {
80 TEST_REQUIRES_X86_AVX2;
81 for (uint32_t channels = 17; channels < 32; channels++) {
82 DWConvMicrokernelTester()
83 .cr(16)
84 .kr(9)
85 .channels(channels)
86 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
87 }
88 }
89
90 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_gt_16_with_qmin) {
91 TEST_REQUIRES_X86_AVX2;
92 for (uint32_t channels = 17; channels < 32; channels++) {
93 DWConvMicrokernelTester()
94 .cr(16)
95 .kr(9)
96 .channels(channels)
97 .qmin(128)
98 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
99 }
100 }
101
102 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_gt_16_with_qmax) {
103 TEST_REQUIRES_X86_AVX2;
104 for (uint32_t channels = 17; channels < 32; channels++) {
105 DWConvMicrokernelTester()
106 .cr(16)
107 .kr(9)
108 .channels(channels)
109 .qmax(128)
110 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
111 }
112 }
113
114 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel) {
115 TEST_REQUIRES_X86_AVX2;
116 for (size_t channels = 1; channels <= 80; channels += 15) {
117 DWConvMicrokernelTester()
118 .cr(16)
119 .kr(9)
120 .channels(channels)
121 .width(3)
122 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
123 }
124 }
125
126 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel_with_step) {
127 TEST_REQUIRES_X86_AVX2;
128 for (size_t channels = 1; channels <= 80; channels += 15) {
129 for (size_t step = 2; step <= 9; step++) {
130 DWConvMicrokernelTester()
131 .cr(16)
132 .kr(9)
133 .channels(channels)
134 .width(3)
135 .step(step)
136 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
137 }
138 }
139 }
140
141 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel_with_output_stride) {
142 TEST_REQUIRES_X86_AVX2;
143 for (size_t channels = 1; channels <= 80; channels += 15) {
144 DWConvMicrokernelTester()
145 .cr(16)
146 .kr(9)
147 .channels(16)
148 .width(5)
149 .output_stride(83)
150 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
151 }
152 }
153
154 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel_with_qmin) {
155 TEST_REQUIRES_X86_AVX2;
156 for (size_t channels = 1; channels <= 80; channels += 15) {
157 DWConvMicrokernelTester()
158 .cr(16)
159 .kr(9)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
163 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
164 }
165 }
166
167 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel_with_qmax) {
168 TEST_REQUIRES_X86_AVX2;
169 for (size_t channels = 1; channels <= 80; channels += 15) {
170 DWConvMicrokernelTester()
171 .cr(16)
172 .kr(9)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
176 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
177 }
178 }
179
180 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, input_offset) {
181 TEST_REQUIRES_X86_AVX2;
182 for (uint32_t channels = 32; channels < 256; channels += 48) {
183 DWConvMicrokernelTester()
184 .cr(16)
185 .kr(9)
186 .channels(channels)
187 .input_offset(304)
188 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
189 }
190 }
191
192 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, zero) {
193 TEST_REQUIRES_X86_AVX2;
194 for (uint32_t mz = 0; mz < 9; mz++) {
195 for (uint32_t channels = 32; channels < 256; channels += 48) {
196 DWConvMicrokernelTester()
197 .cr(16)
198 .kr(9)
199 .channels(channels)
200 .input_offset(304)
201 .zero_index(mz)
202 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
203 }
204 }
205 }
206#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
207
208
209#if XNN_ARCH_X86 || XNN_ARCH_X86_64
210 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_eq_32) {
211 TEST_REQUIRES_X86_AVX2;
212 DWConvMicrokernelTester()
213 .cr(32)
214 .kr(9)
215 .channels(32)
216 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
217 }
218
219 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_div_32) {
220 TEST_REQUIRES_X86_AVX2;
221 for (uint32_t channels = 64; channels < 512; channels += 96) {
222 DWConvMicrokernelTester()
223 .cr(32)
224 .kr(9)
225 .channels(channels)
226 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
227 }
228 }
229
230 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_div_32_with_qmin) {
231 TEST_REQUIRES_X86_AVX2;
232 for (uint32_t channels = 64; channels < 512; channels += 96) {
233 DWConvMicrokernelTester()
234 .cr(32)
235 .kr(9)
236 .channels(channels)
237 .qmin(128)
238 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
239 }
240 }
241
242 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_div_32_with_qmax) {
243 TEST_REQUIRES_X86_AVX2;
244 for (uint32_t channels = 64; channels < 512; channels += 96) {
245 DWConvMicrokernelTester()
246 .cr(32)
247 .kr(9)
248 .channels(channels)
249 .qmax(128)
250 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
251 }
252 }
253
254 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_lt_32) {
255 TEST_REQUIRES_X86_AVX2;
256 for (uint32_t channels = 1; channels < 32; channels++) {
257 DWConvMicrokernelTester()
258 .cr(32)
259 .kr(9)
260 .channels(channels)
261 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
262 }
263 }
264
265 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_gt_32) {
266 TEST_REQUIRES_X86_AVX2;
267 for (uint32_t channels = 33; channels < 64; channels++) {
268 DWConvMicrokernelTester()
269 .cr(32)
270 .kr(9)
271 .channels(channels)
272 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
273 }
274 }
275
276 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_gt_32_with_qmin) {
277 TEST_REQUIRES_X86_AVX2;
278 for (uint32_t channels = 33; channels < 64; channels++) {
279 DWConvMicrokernelTester()
280 .cr(32)
281 .kr(9)
282 .channels(channels)
283 .qmin(128)
284 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
285 }
286 }
287
288 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_gt_32_with_qmax) {
289 TEST_REQUIRES_X86_AVX2;
290 for (uint32_t channels = 33; channels < 64; channels++) {
291 DWConvMicrokernelTester()
292 .cr(32)
293 .kr(9)
294 .channels(channels)
295 .qmax(128)
296 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
297 }
298 }
299
300 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel) {
301 TEST_REQUIRES_X86_AVX2;
302 for (size_t channels = 1; channels <= 160; channels += 31) {
303 DWConvMicrokernelTester()
304 .cr(32)
305 .kr(9)
306 .channels(channels)
307 .width(3)
308 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
309 }
310 }
311
312 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel_with_step) {
313 TEST_REQUIRES_X86_AVX2;
314 for (size_t channels = 1; channels <= 160; channels += 31) {
315 for (size_t step = 2; step <= 9; step++) {
316 DWConvMicrokernelTester()
317 .cr(32)
318 .kr(9)
319 .channels(channels)
320 .width(3)
321 .step(step)
322 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
323 }
324 }
325 }
326
327 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel_with_output_stride) {
328 TEST_REQUIRES_X86_AVX2;
329 for (size_t channels = 1; channels <= 160; channels += 31) {
330 DWConvMicrokernelTester()
331 .cr(32)
332 .kr(9)
333 .channels(32)
334 .width(5)
335 .output_stride(163)
336 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
337 }
338 }
339
340 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel_with_qmin) {
341 TEST_REQUIRES_X86_AVX2;
342 for (size_t channels = 1; channels <= 160; channels += 31) {
343 DWConvMicrokernelTester()
344 .cr(32)
345 .kr(9)
346 .channels(channels)
347 .width(3)
348 .qmin(128)
349 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
350 }
351 }
352
353 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel_with_qmax) {
354 TEST_REQUIRES_X86_AVX2;
355 for (size_t channels = 1; channels <= 160; channels += 31) {
356 DWConvMicrokernelTester()
357 .cr(32)
358 .kr(9)
359 .channels(channels)
360 .width(3)
361 .qmax(128)
362 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
363 }
364 }
365
366 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, input_offset) {
367 TEST_REQUIRES_X86_AVX2;
368 for (uint32_t channels = 64; channels < 512; channels += 96) {
369 DWConvMicrokernelTester()
370 .cr(32)
371 .kr(9)
372 .channels(channels)
373 .input_offset(592)
374 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
375 }
376 }
377
378 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, zero) {
379 TEST_REQUIRES_X86_AVX2;
380 for (uint32_t mz = 0; mz < 9; mz++) {
381 for (uint32_t channels = 64; channels < 512; channels += 96) {
382 DWConvMicrokernelTester()
383 .cr(32)
384 .kr(9)
385 .channels(channels)
386 .input_offset(592)
387 .zero_index(mz)
388 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
389 }
390 }
391 }
392#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
393
394
395#if XNN_ARCH_X86 || XNN_ARCH_X86_64
396 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_eq_8) {
397 TEST_REQUIRES_X86_AVX2;
398 DWConvMicrokernelTester()
399 .cr(8)
400 .kr(9)
401 .channels(8)
402 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
403 }
404
405 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8) {
406 TEST_REQUIRES_X86_AVX2;
407 for (uint32_t channels = 16; channels < 128; channels += 24) {
408 DWConvMicrokernelTester()
409 .cr(8)
410 .kr(9)
411 .channels(channels)
412 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
413 }
414 }
415
416 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
417 TEST_REQUIRES_X86_AVX2;
418 for (uint32_t channels = 16; channels < 128; channels += 24) {
419 DWConvMicrokernelTester()
420 .cr(8)
421 .kr(9)
422 .channels(channels)
423 .qmin(128)
424 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
425 }
426 }
427
428 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
429 TEST_REQUIRES_X86_AVX2;
430 for (uint32_t channels = 16; channels < 128; channels += 24) {
431 DWConvMicrokernelTester()
432 .cr(8)
433 .kr(9)
434 .channels(channels)
435 .qmax(128)
436 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
437 }
438 }
439
440 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_lt_8) {
441 TEST_REQUIRES_X86_AVX2;
442 for (uint32_t channels = 1; channels < 8; channels++) {
443 DWConvMicrokernelTester()
444 .cr(8)
445 .kr(9)
446 .channels(channels)
447 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
448 }
449 }
450
451 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8) {
452 TEST_REQUIRES_X86_AVX2;
453 for (uint32_t channels = 9; channels < 16; channels++) {
454 DWConvMicrokernelTester()
455 .cr(8)
456 .kr(9)
457 .channels(channels)
458 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
459 }
460 }
461
462 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
463 TEST_REQUIRES_X86_AVX2;
464 for (uint32_t channels = 9; channels < 16; channels++) {
465 DWConvMicrokernelTester()
466 .cr(8)
467 .kr(9)
468 .channels(channels)
469 .qmin(128)
470 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
471 }
472 }
473
474 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
475 TEST_REQUIRES_X86_AVX2;
476 for (uint32_t channels = 9; channels < 16; channels++) {
477 DWConvMicrokernelTester()
478 .cr(8)
479 .kr(9)
480 .channels(channels)
481 .qmax(128)
482 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
483 }
484 }
485
486 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel) {
487 TEST_REQUIRES_X86_AVX2;
488 for (size_t channels = 1; channels <= 40; channels += 7) {
489 DWConvMicrokernelTester()
490 .cr(8)
491 .kr(9)
492 .channels(channels)
493 .width(3)
494 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
495 }
496 }
497
498 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_step) {
499 TEST_REQUIRES_X86_AVX2;
500 for (size_t channels = 1; channels <= 40; channels += 7) {
501 for (size_t step = 2; step <= 9; step++) {
502 DWConvMicrokernelTester()
503 .cr(8)
504 .kr(9)
505 .channels(channels)
506 .width(3)
507 .step(step)
508 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
509 }
510 }
511 }
512
513 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
514 TEST_REQUIRES_X86_AVX2;
515 for (size_t channels = 1; channels <= 40; channels += 7) {
516 DWConvMicrokernelTester()
517 .cr(8)
518 .kr(9)
519 .channels(8)
520 .width(5)
521 .output_stride(43)
522 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
523 }
524 }
525
526 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
527 TEST_REQUIRES_X86_AVX2;
528 for (size_t channels = 1; channels <= 40; channels += 7) {
529 DWConvMicrokernelTester()
530 .cr(8)
531 .kr(9)
532 .channels(channels)
533 .width(3)
534 .qmin(128)
535 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
536 }
537 }
538
539 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
540 TEST_REQUIRES_X86_AVX2;
541 for (size_t channels = 1; channels <= 40; channels += 7) {
542 DWConvMicrokernelTester()
543 .cr(8)
544 .kr(9)
545 .channels(channels)
546 .width(3)
547 .qmax(128)
548 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
549 }
550 }
551
552 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_offset) {
553 TEST_REQUIRES_X86_AVX2;
554 for (uint32_t channels = 16; channels < 128; channels += 24) {
555 DWConvMicrokernelTester()
556 .cr(8)
557 .kr(9)
558 .channels(channels)
559 .input_offset(176)
560 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
561 }
562 }
563
564 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, zero) {
565 TEST_REQUIRES_X86_AVX2;
566 for (uint32_t mz = 0; mz < 9; mz++) {
567 for (uint32_t channels = 16; channels < 128; channels += 24) {
568 DWConvMicrokernelTester()
569 .cr(8)
570 .kr(9)
571 .channels(channels)
572 .input_offset(176)
573 .zero_index(mz)
574 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
575 }
576 }
577 }
578#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
579
580
581#if XNN_ARCH_X86 || XNN_ARCH_X86_64
582 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_eq_16) {
583 TEST_REQUIRES_X86_AVX2;
584 DWConvMicrokernelTester()
585 .cr(16)
586 .kr(9)
587 .channels(16)
588 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
589 }
590
591 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16) {
592 TEST_REQUIRES_X86_AVX2;
593 for (uint32_t channels = 32; channels < 256; channels += 48) {
594 DWConvMicrokernelTester()
595 .cr(16)
596 .kr(9)
597 .channels(channels)
598 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
599 }
600 }
601
602 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
603 TEST_REQUIRES_X86_AVX2;
604 for (uint32_t channels = 32; channels < 256; channels += 48) {
605 DWConvMicrokernelTester()
606 .cr(16)
607 .kr(9)
608 .channels(channels)
609 .qmin(128)
610 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
611 }
612 }
613
614 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
615 TEST_REQUIRES_X86_AVX2;
616 for (uint32_t channels = 32; channels < 256; channels += 48) {
617 DWConvMicrokernelTester()
618 .cr(16)
619 .kr(9)
620 .channels(channels)
621 .qmax(128)
622 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
623 }
624 }
625
626 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_lt_16) {
627 TEST_REQUIRES_X86_AVX2;
628 for (uint32_t channels = 1; channels < 16; channels++) {
629 DWConvMicrokernelTester()
630 .cr(16)
631 .kr(9)
632 .channels(channels)
633 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
634 }
635 }
636
637 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16) {
638 TEST_REQUIRES_X86_AVX2;
639 for (uint32_t channels = 17; channels < 32; channels++) {
640 DWConvMicrokernelTester()
641 .cr(16)
642 .kr(9)
643 .channels(channels)
644 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
645 }
646 }
647
648 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
649 TEST_REQUIRES_X86_AVX2;
650 for (uint32_t channels = 17; channels < 32; channels++) {
651 DWConvMicrokernelTester()
652 .cr(16)
653 .kr(9)
654 .channels(channels)
655 .qmin(128)
656 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
657 }
658 }
659
660 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
661 TEST_REQUIRES_X86_AVX2;
662 for (uint32_t channels = 17; channels < 32; channels++) {
663 DWConvMicrokernelTester()
664 .cr(16)
665 .kr(9)
666 .channels(channels)
667 .qmax(128)
668 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
669 }
670 }
671
672 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel) {
673 TEST_REQUIRES_X86_AVX2;
674 for (size_t channels = 1; channels <= 80; channels += 15) {
675 DWConvMicrokernelTester()
676 .cr(16)
677 .kr(9)
678 .channels(channels)
679 .width(3)
680 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
681 }
682 }
683
684 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_step) {
685 TEST_REQUIRES_X86_AVX2;
686 for (size_t channels = 1; channels <= 80; channels += 15) {
687 for (size_t step = 2; step <= 9; step++) {
688 DWConvMicrokernelTester()
689 .cr(16)
690 .kr(9)
691 .channels(channels)
692 .width(3)
693 .step(step)
694 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
695 }
696 }
697 }
698
699 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
700 TEST_REQUIRES_X86_AVX2;
701 for (size_t channels = 1; channels <= 80; channels += 15) {
702 DWConvMicrokernelTester()
703 .cr(16)
704 .kr(9)
705 .channels(16)
706 .width(5)
707 .output_stride(83)
708 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
709 }
710 }
711
712 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
713 TEST_REQUIRES_X86_AVX2;
714 for (size_t channels = 1; channels <= 80; channels += 15) {
715 DWConvMicrokernelTester()
716 .cr(16)
717 .kr(9)
718 .channels(channels)
719 .width(3)
720 .qmin(128)
721 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
722 }
723 }
724
725 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
726 TEST_REQUIRES_X86_AVX2;
727 for (size_t channels = 1; channels <= 80; channels += 15) {
728 DWConvMicrokernelTester()
729 .cr(16)
730 .kr(9)
731 .channels(channels)
732 .width(3)
733 .qmax(128)
734 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
735 }
736 }
737
738 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_offset) {
739 TEST_REQUIRES_X86_AVX2;
740 for (uint32_t channels = 32; channels < 256; channels += 48) {
741 DWConvMicrokernelTester()
742 .cr(16)
743 .kr(9)
744 .channels(channels)
745 .input_offset(304)
746 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
747 }
748 }
749
750 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, zero) {
751 TEST_REQUIRES_X86_AVX2;
752 for (uint32_t mz = 0; mz < 9; mz++) {
753 for (uint32_t channels = 32; channels < 256; channels += 48) {
754 DWConvMicrokernelTester()
755 .cr(16)
756 .kr(9)
757 .channels(channels)
758 .input_offset(304)
759 .zero_index(mz)
760 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
761 }
762 }
763 }
764#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
765
766
767#if XNN_ARCH_X86 || XNN_ARCH_X86_64
768 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_eq_24) {
769 TEST_REQUIRES_X86_AVX2;
770 DWConvMicrokernelTester()
771 .cr(24)
772 .kr(9)
773 .channels(24)
774 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
775 }
776
777 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24) {
778 TEST_REQUIRES_X86_AVX2;
779 for (uint32_t channels = 48; channels < 384; channels += 72) {
780 DWConvMicrokernelTester()
781 .cr(24)
782 .kr(9)
783 .channels(channels)
784 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
785 }
786 }
787
788 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
789 TEST_REQUIRES_X86_AVX2;
790 for (uint32_t channels = 48; channels < 384; channels += 72) {
791 DWConvMicrokernelTester()
792 .cr(24)
793 .kr(9)
794 .channels(channels)
795 .qmin(128)
796 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
797 }
798 }
799
800 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
801 TEST_REQUIRES_X86_AVX2;
802 for (uint32_t channels = 48; channels < 384; channels += 72) {
803 DWConvMicrokernelTester()
804 .cr(24)
805 .kr(9)
806 .channels(channels)
807 .qmax(128)
808 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
809 }
810 }
811
812 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_lt_24) {
813 TEST_REQUIRES_X86_AVX2;
814 for (uint32_t channels = 1; channels < 24; channels++) {
815 DWConvMicrokernelTester()
816 .cr(24)
817 .kr(9)
818 .channels(channels)
819 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
820 }
821 }
822
823 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24) {
824 TEST_REQUIRES_X86_AVX2;
825 for (uint32_t channels = 25; channels < 48; channels++) {
826 DWConvMicrokernelTester()
827 .cr(24)
828 .kr(9)
829 .channels(channels)
830 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
831 }
832 }
833
834 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
835 TEST_REQUIRES_X86_AVX2;
836 for (uint32_t channels = 25; channels < 48; channels++) {
837 DWConvMicrokernelTester()
838 .cr(24)
839 .kr(9)
840 .channels(channels)
841 .qmin(128)
842 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
843 }
844 }
845
846 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
847 TEST_REQUIRES_X86_AVX2;
848 for (uint32_t channels = 25; channels < 48; channels++) {
849 DWConvMicrokernelTester()
850 .cr(24)
851 .kr(9)
852 .channels(channels)
853 .qmax(128)
854 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
855 }
856 }
857
858 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel) {
859 TEST_REQUIRES_X86_AVX2;
860 for (size_t channels = 1; channels <= 120; channels += 23) {
861 DWConvMicrokernelTester()
862 .cr(24)
863 .kr(9)
864 .channels(channels)
865 .width(3)
866 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
867 }
868 }
869
870 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_step) {
871 TEST_REQUIRES_X86_AVX2;
872 for (size_t channels = 1; channels <= 120; channels += 23) {
873 for (size_t step = 2; step <= 9; step++) {
874 DWConvMicrokernelTester()
875 .cr(24)
876 .kr(9)
877 .channels(channels)
878 .width(3)
879 .step(step)
880 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
881 }
882 }
883 }
884
885 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
886 TEST_REQUIRES_X86_AVX2;
887 for (size_t channels = 1; channels <= 120; channels += 23) {
888 DWConvMicrokernelTester()
889 .cr(24)
890 .kr(9)
891 .channels(24)
892 .width(5)
893 .output_stride(127)
894 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
895 }
896 }
897
898 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
899 TEST_REQUIRES_X86_AVX2;
900 for (size_t channels = 1; channels <= 120; channels += 23) {
901 DWConvMicrokernelTester()
902 .cr(24)
903 .kr(9)
904 .channels(channels)
905 .width(3)
906 .qmin(128)
907 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
908 }
909 }
910
911 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
912 TEST_REQUIRES_X86_AVX2;
913 for (size_t channels = 1; channels <= 120; channels += 23) {
914 DWConvMicrokernelTester()
915 .cr(24)
916 .kr(9)
917 .channels(channels)
918 .width(3)
919 .qmax(128)
920 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
921 }
922 }
923
924 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, input_offset) {
925 TEST_REQUIRES_X86_AVX2;
926 for (uint32_t channels = 48; channels < 384; channels += 72) {
927 DWConvMicrokernelTester()
928 .cr(24)
929 .kr(9)
930 .channels(channels)
931 .input_offset(464)
932 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
933 }
934 }
935
936 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, zero) {
937 TEST_REQUIRES_X86_AVX2;
938 for (uint32_t mz = 0; mz < 9; mz++) {
939 for (uint32_t channels = 48; channels < 384; channels += 72) {
940 DWConvMicrokernelTester()
941 .cr(24)
942 .kr(9)
943 .channels(channels)
944 .input_offset(464)
945 .zero_index(mz)
946 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
947 }
948 }
949 }
950#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
951
952
953#if XNN_ARCH_X86 || XNN_ARCH_X86_64
954 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_eq_32) {
955 TEST_REQUIRES_X86_AVX2;
956 DWConvMicrokernelTester()
957 .cr(32)
958 .kr(9)
959 .channels(32)
960 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
961 }
962
963 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32) {
964 TEST_REQUIRES_X86_AVX2;
965 for (uint32_t channels = 64; channels < 512; channels += 96) {
966 DWConvMicrokernelTester()
967 .cr(32)
968 .kr(9)
969 .channels(channels)
970 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
971 }
972 }
973
974 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
975 TEST_REQUIRES_X86_AVX2;
976 for (uint32_t channels = 64; channels < 512; channels += 96) {
977 DWConvMicrokernelTester()
978 .cr(32)
979 .kr(9)
980 .channels(channels)
981 .qmin(128)
982 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
983 }
984 }
985
986 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
987 TEST_REQUIRES_X86_AVX2;
988 for (uint32_t channels = 64; channels < 512; channels += 96) {
989 DWConvMicrokernelTester()
990 .cr(32)
991 .kr(9)
992 .channels(channels)
993 .qmax(128)
994 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
995 }
996 }
997
998 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_lt_32) {
999 TEST_REQUIRES_X86_AVX2;
1000 for (uint32_t channels = 1; channels < 32; channels++) {
1001 DWConvMicrokernelTester()
1002 .cr(32)
1003 .kr(9)
1004 .channels(channels)
1005 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1006 }
1007 }
1008
1009 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32) {
1010 TEST_REQUIRES_X86_AVX2;
1011 for (uint32_t channels = 33; channels < 64; channels++) {
1012 DWConvMicrokernelTester()
1013 .cr(32)
1014 .kr(9)
1015 .channels(channels)
1016 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1017 }
1018 }
1019
1020 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
1021 TEST_REQUIRES_X86_AVX2;
1022 for (uint32_t channels = 33; channels < 64; channels++) {
1023 DWConvMicrokernelTester()
1024 .cr(32)
1025 .kr(9)
1026 .channels(channels)
1027 .qmin(128)
1028 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1029 }
1030 }
1031
1032 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
1033 TEST_REQUIRES_X86_AVX2;
1034 for (uint32_t channels = 33; channels < 64; channels++) {
1035 DWConvMicrokernelTester()
1036 .cr(32)
1037 .kr(9)
1038 .channels(channels)
1039 .qmax(128)
1040 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1041 }
1042 }
1043
1044 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel) {
1045 TEST_REQUIRES_X86_AVX2;
1046 for (size_t channels = 1; channels <= 160; channels += 31) {
1047 DWConvMicrokernelTester()
1048 .cr(32)
1049 .kr(9)
1050 .channels(channels)
1051 .width(3)
1052 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1053 }
1054 }
1055
1056 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_step) {
1057 TEST_REQUIRES_X86_AVX2;
1058 for (size_t channels = 1; channels <= 160; channels += 31) {
1059 for (size_t step = 2; step <= 9; step++) {
1060 DWConvMicrokernelTester()
1061 .cr(32)
1062 .kr(9)
1063 .channels(channels)
1064 .width(3)
1065 .step(step)
1066 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1067 }
1068 }
1069 }
1070
1071 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
1072 TEST_REQUIRES_X86_AVX2;
1073 for (size_t channels = 1; channels <= 160; channels += 31) {
1074 DWConvMicrokernelTester()
1075 .cr(32)
1076 .kr(9)
1077 .channels(32)
1078 .width(5)
1079 .output_stride(163)
1080 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1081 }
1082 }
1083
1084 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
1085 TEST_REQUIRES_X86_AVX2;
1086 for (size_t channels = 1; channels <= 160; channels += 31) {
1087 DWConvMicrokernelTester()
1088 .cr(32)
1089 .kr(9)
1090 .channels(channels)
1091 .width(3)
1092 .qmin(128)
1093 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1094 }
1095 }
1096
1097 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
1098 TEST_REQUIRES_X86_AVX2;
1099 for (size_t channels = 1; channels <= 160; channels += 31) {
1100 DWConvMicrokernelTester()
1101 .cr(32)
1102 .kr(9)
1103 .channels(channels)
1104 .width(3)
1105 .qmax(128)
1106 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1107 }
1108 }
1109
1110 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_offset) {
1111 TEST_REQUIRES_X86_AVX2;
1112 for (uint32_t channels = 64; channels < 512; channels += 96) {
1113 DWConvMicrokernelTester()
1114 .cr(32)
1115 .kr(9)
1116 .channels(channels)
1117 .input_offset(592)
1118 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1119 }
1120 }
1121
1122 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, zero) {
1123 TEST_REQUIRES_X86_AVX2;
1124 for (uint32_t mz = 0; mz < 9; mz++) {
1125 for (uint32_t channels = 64; channels < 512; channels += 96) {
1126 DWConvMicrokernelTester()
1127 .cr(32)
1128 .kr(9)
1129 .channels(channels)
1130 .input_offset(592)
1131 .zero_index(mz)
1132 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1133 }
1134 }
1135 }
1136#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1137
1138
1139#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan71855ee2021-05-25 19:05:06 -07001140 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_eq_16) {
1141 TEST_REQUIRES_X86_AVX512SKX;
1142 DWConvMicrokernelTester()
1143 .cr(16)
1144 .kr(9)
1145 .channels(16)
1146 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1147 }
1148
1149 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16) {
1150 TEST_REQUIRES_X86_AVX512SKX;
1151 for (uint32_t channels = 32; channels < 256; channels += 48) {
1152 DWConvMicrokernelTester()
1153 .cr(16)
1154 .kr(9)
1155 .channels(channels)
1156 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1157 }
1158 }
1159
1160 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
1161 TEST_REQUIRES_X86_AVX512SKX;
1162 for (uint32_t channels = 32; channels < 256; channels += 48) {
1163 DWConvMicrokernelTester()
1164 .cr(16)
1165 .kr(9)
1166 .channels(channels)
1167 .qmin(128)
1168 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1169 }
1170 }
1171
1172 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
1173 TEST_REQUIRES_X86_AVX512SKX;
1174 for (uint32_t channels = 32; channels < 256; channels += 48) {
1175 DWConvMicrokernelTester()
1176 .cr(16)
1177 .kr(9)
1178 .channels(channels)
1179 .qmax(128)
1180 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1181 }
1182 }
1183
1184 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_lt_16) {
1185 TEST_REQUIRES_X86_AVX512SKX;
1186 for (uint32_t channels = 1; channels < 16; channels++) {
1187 DWConvMicrokernelTester()
1188 .cr(16)
1189 .kr(9)
1190 .channels(channels)
1191 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1192 }
1193 }
1194
1195 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16) {
1196 TEST_REQUIRES_X86_AVX512SKX;
1197 for (uint32_t channels = 17; channels < 32; channels++) {
1198 DWConvMicrokernelTester()
1199 .cr(16)
1200 .kr(9)
1201 .channels(channels)
1202 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1203 }
1204 }
1205
1206 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
1207 TEST_REQUIRES_X86_AVX512SKX;
1208 for (uint32_t channels = 17; channels < 32; channels++) {
1209 DWConvMicrokernelTester()
1210 .cr(16)
1211 .kr(9)
1212 .channels(channels)
1213 .qmin(128)
1214 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1215 }
1216 }
1217
1218 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
1219 TEST_REQUIRES_X86_AVX512SKX;
1220 for (uint32_t channels = 17; channels < 32; channels++) {
1221 DWConvMicrokernelTester()
1222 .cr(16)
1223 .kr(9)
1224 .channels(channels)
1225 .qmax(128)
1226 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1227 }
1228 }
1229
1230 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel) {
1231 TEST_REQUIRES_X86_AVX512SKX;
1232 for (size_t channels = 1; channels <= 80; channels += 15) {
1233 DWConvMicrokernelTester()
1234 .cr(16)
1235 .kr(9)
1236 .channels(channels)
1237 .width(3)
1238 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1239 }
1240 }
1241
1242 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
1243 TEST_REQUIRES_X86_AVX512SKX;
1244 for (size_t channels = 1; channels <= 80; channels += 15) {
1245 for (size_t step = 2; step <= 9; step++) {
1246 DWConvMicrokernelTester()
1247 .cr(16)
1248 .kr(9)
1249 .channels(channels)
1250 .width(3)
1251 .step(step)
1252 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1253 }
1254 }
1255 }
1256
1257 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
1258 TEST_REQUIRES_X86_AVX512SKX;
1259 for (size_t channels = 1; channels <= 80; channels += 15) {
1260 DWConvMicrokernelTester()
1261 .cr(16)
1262 .kr(9)
1263 .channels(16)
1264 .width(5)
1265 .output_stride(83)
1266 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1267 }
1268 }
1269
1270 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
1271 TEST_REQUIRES_X86_AVX512SKX;
1272 for (size_t channels = 1; channels <= 80; channels += 15) {
1273 DWConvMicrokernelTester()
1274 .cr(16)
1275 .kr(9)
1276 .channels(channels)
1277 .width(3)
1278 .qmin(128)
1279 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1280 }
1281 }
1282
1283 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
1284 TEST_REQUIRES_X86_AVX512SKX;
1285 for (size_t channels = 1; channels <= 80; channels += 15) {
1286 DWConvMicrokernelTester()
1287 .cr(16)
1288 .kr(9)
1289 .channels(channels)
1290 .width(3)
1291 .qmax(128)
1292 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1293 }
1294 }
1295
1296 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_offset) {
1297 TEST_REQUIRES_X86_AVX512SKX;
1298 for (uint32_t channels = 32; channels < 256; channels += 48) {
1299 DWConvMicrokernelTester()
1300 .cr(16)
1301 .kr(9)
1302 .channels(channels)
1303 .input_offset(304)
1304 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1305 }
1306 }
1307
1308 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, zero) {
1309 TEST_REQUIRES_X86_AVX512SKX;
1310 for (uint32_t mz = 0; mz < 9; mz++) {
1311 for (uint32_t channels = 32; channels < 256; channels += 48) {
1312 DWConvMicrokernelTester()
1313 .cr(16)
1314 .kr(9)
1315 .channels(channels)
1316 .input_offset(304)
1317 .zero_index(mz)
1318 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1319 }
1320 }
1321 }
1322#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1323
1324
1325#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1326 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_eq_32) {
1327 TEST_REQUIRES_X86_AVX512SKX;
1328 DWConvMicrokernelTester()
1329 .cr(32)
1330 .kr(9)
1331 .channels(32)
1332 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1333 }
1334
1335 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32) {
1336 TEST_REQUIRES_X86_AVX512SKX;
1337 for (uint32_t channels = 64; channels < 512; channels += 96) {
1338 DWConvMicrokernelTester()
1339 .cr(32)
1340 .kr(9)
1341 .channels(channels)
1342 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1343 }
1344 }
1345
1346 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
1347 TEST_REQUIRES_X86_AVX512SKX;
1348 for (uint32_t channels = 64; channels < 512; channels += 96) {
1349 DWConvMicrokernelTester()
1350 .cr(32)
1351 .kr(9)
1352 .channels(channels)
1353 .qmin(128)
1354 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1355 }
1356 }
1357
1358 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
1359 TEST_REQUIRES_X86_AVX512SKX;
1360 for (uint32_t channels = 64; channels < 512; channels += 96) {
1361 DWConvMicrokernelTester()
1362 .cr(32)
1363 .kr(9)
1364 .channels(channels)
1365 .qmax(128)
1366 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1367 }
1368 }
1369
1370 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_lt_32) {
1371 TEST_REQUIRES_X86_AVX512SKX;
1372 for (uint32_t channels = 1; channels < 32; channels++) {
1373 DWConvMicrokernelTester()
1374 .cr(32)
1375 .kr(9)
1376 .channels(channels)
1377 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1378 }
1379 }
1380
1381 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32) {
1382 TEST_REQUIRES_X86_AVX512SKX;
1383 for (uint32_t channels = 33; channels < 64; channels++) {
1384 DWConvMicrokernelTester()
1385 .cr(32)
1386 .kr(9)
1387 .channels(channels)
1388 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1389 }
1390 }
1391
1392 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
1393 TEST_REQUIRES_X86_AVX512SKX;
1394 for (uint32_t channels = 33; channels < 64; channels++) {
1395 DWConvMicrokernelTester()
1396 .cr(32)
1397 .kr(9)
1398 .channels(channels)
1399 .qmin(128)
1400 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1401 }
1402 }
1403
1404 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
1405 TEST_REQUIRES_X86_AVX512SKX;
1406 for (uint32_t channels = 33; channels < 64; channels++) {
1407 DWConvMicrokernelTester()
1408 .cr(32)
1409 .kr(9)
1410 .channels(channels)
1411 .qmax(128)
1412 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1413 }
1414 }
1415
1416 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel) {
1417 TEST_REQUIRES_X86_AVX512SKX;
1418 for (size_t channels = 1; channels <= 160; channels += 31) {
1419 DWConvMicrokernelTester()
1420 .cr(32)
1421 .kr(9)
1422 .channels(channels)
1423 .width(3)
1424 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1425 }
1426 }
1427
1428 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
1429 TEST_REQUIRES_X86_AVX512SKX;
1430 for (size_t channels = 1; channels <= 160; channels += 31) {
1431 for (size_t step = 2; step <= 9; step++) {
1432 DWConvMicrokernelTester()
1433 .cr(32)
1434 .kr(9)
1435 .channels(channels)
1436 .width(3)
1437 .step(step)
1438 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1439 }
1440 }
1441 }
1442
1443 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
1444 TEST_REQUIRES_X86_AVX512SKX;
1445 for (size_t channels = 1; channels <= 160; channels += 31) {
1446 DWConvMicrokernelTester()
1447 .cr(32)
1448 .kr(9)
1449 .channels(32)
1450 .width(5)
1451 .output_stride(163)
1452 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1453 }
1454 }
1455
1456 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
1457 TEST_REQUIRES_X86_AVX512SKX;
1458 for (size_t channels = 1; channels <= 160; channels += 31) {
1459 DWConvMicrokernelTester()
1460 .cr(32)
1461 .kr(9)
1462 .channels(channels)
1463 .width(3)
1464 .qmin(128)
1465 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1466 }
1467 }
1468
1469 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
1470 TEST_REQUIRES_X86_AVX512SKX;
1471 for (size_t channels = 1; channels <= 160; channels += 31) {
1472 DWConvMicrokernelTester()
1473 .cr(32)
1474 .kr(9)
1475 .channels(channels)
1476 .width(3)
1477 .qmax(128)
1478 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1479 }
1480 }
1481
1482 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_offset) {
1483 TEST_REQUIRES_X86_AVX512SKX;
1484 for (uint32_t channels = 64; channels < 512; channels += 96) {
1485 DWConvMicrokernelTester()
1486 .cr(32)
1487 .kr(9)
1488 .channels(channels)
1489 .input_offset(592)
1490 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1491 }
1492 }
1493
1494 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, zero) {
1495 TEST_REQUIRES_X86_AVX512SKX;
1496 for (uint32_t mz = 0; mz < 9; mz++) {
1497 for (uint32_t channels = 64; channels < 512; channels += 96) {
1498 DWConvMicrokernelTester()
1499 .cr(32)
1500 .kr(9)
1501 .channels(channels)
1502 .input_offset(592)
1503 .zero_index(mz)
1504 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1505 }
1506 }
1507 }
1508#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1509
1510
1511#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan9b474cf2021-05-25 16:37:48 -07001512 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_eq_16) {
1513 TEST_REQUIRES_X86_AVX2;
1514 DWConvMicrokernelTester()
1515 .cr(16)
1516 .kr(25)
1517 .channels(16)
1518 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1519 }
1520
1521 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_div_16) {
1522 TEST_REQUIRES_X86_AVX2;
1523 for (uint32_t channels = 32; channels < 256; channels += 48) {
1524 DWConvMicrokernelTester()
1525 .cr(16)
1526 .kr(25)
1527 .channels(channels)
1528 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1529 }
1530 }
1531
1532 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_div_16_with_qmin) {
1533 TEST_REQUIRES_X86_AVX2;
1534 for (uint32_t channels = 32; channels < 256; channels += 48) {
1535 DWConvMicrokernelTester()
1536 .cr(16)
1537 .kr(25)
1538 .channels(channels)
1539 .qmin(128)
1540 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1541 }
1542 }
1543
1544 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_div_16_with_qmax) {
1545 TEST_REQUIRES_X86_AVX2;
1546 for (uint32_t channels = 32; channels < 256; channels += 48) {
1547 DWConvMicrokernelTester()
1548 .cr(16)
1549 .kr(25)
1550 .channels(channels)
1551 .qmax(128)
1552 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1553 }
1554 }
1555
1556 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_lt_16) {
1557 TEST_REQUIRES_X86_AVX2;
1558 for (uint32_t channels = 1; channels < 16; channels++) {
1559 DWConvMicrokernelTester()
1560 .cr(16)
1561 .kr(25)
1562 .channels(channels)
1563 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1564 }
1565 }
1566
1567 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_gt_16) {
1568 TEST_REQUIRES_X86_AVX2;
1569 for (uint32_t channels = 17; channels < 32; channels++) {
1570 DWConvMicrokernelTester()
1571 .cr(16)
1572 .kr(25)
1573 .channels(channels)
1574 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1575 }
1576 }
1577
1578 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_gt_16_with_qmin) {
1579 TEST_REQUIRES_X86_AVX2;
1580 for (uint32_t channels = 17; channels < 32; channels++) {
1581 DWConvMicrokernelTester()
1582 .cr(16)
1583 .kr(25)
1584 .channels(channels)
1585 .qmin(128)
1586 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1587 }
1588 }
1589
1590 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_gt_16_with_qmax) {
1591 TEST_REQUIRES_X86_AVX2;
1592 for (uint32_t channels = 17; channels < 32; channels++) {
1593 DWConvMicrokernelTester()
1594 .cr(16)
1595 .kr(25)
1596 .channels(channels)
1597 .qmax(128)
1598 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1599 }
1600 }
1601
1602 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel) {
1603 TEST_REQUIRES_X86_AVX2;
1604 for (size_t channels = 1; channels <= 80; channels += 15) {
1605 DWConvMicrokernelTester()
1606 .cr(16)
1607 .kr(25)
1608 .channels(channels)
1609 .width(3)
1610 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1611 }
1612 }
1613
1614 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel_with_step) {
1615 TEST_REQUIRES_X86_AVX2;
1616 for (size_t channels = 1; channels <= 80; channels += 15) {
1617 for (size_t step = 2; step <= 25; step++) {
1618 DWConvMicrokernelTester()
1619 .cr(16)
1620 .kr(25)
1621 .channels(channels)
1622 .width(3)
1623 .step(step)
1624 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1625 }
1626 }
1627 }
1628
1629 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel_with_output_stride) {
1630 TEST_REQUIRES_X86_AVX2;
1631 for (size_t channels = 1; channels <= 80; channels += 15) {
1632 DWConvMicrokernelTester()
1633 .cr(16)
1634 .kr(25)
1635 .channels(16)
1636 .width(5)
1637 .output_stride(83)
1638 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1639 }
1640 }
1641
1642 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel_with_qmin) {
1643 TEST_REQUIRES_X86_AVX2;
1644 for (size_t channels = 1; channels <= 80; channels += 15) {
1645 DWConvMicrokernelTester()
1646 .cr(16)
1647 .kr(25)
1648 .channels(channels)
1649 .width(3)
1650 .qmin(128)
1651 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1652 }
1653 }
1654
1655 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel_with_qmax) {
1656 TEST_REQUIRES_X86_AVX2;
1657 for (size_t channels = 1; channels <= 80; channels += 15) {
1658 DWConvMicrokernelTester()
1659 .cr(16)
1660 .kr(25)
1661 .channels(channels)
1662 .width(3)
1663 .qmax(128)
1664 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1665 }
1666 }
1667
1668 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, input_offset) {
1669 TEST_REQUIRES_X86_AVX2;
1670 for (uint32_t channels = 32; channels < 256; channels += 48) {
1671 DWConvMicrokernelTester()
1672 .cr(16)
1673 .kr(25)
1674 .channels(channels)
1675 .input_offset(304)
1676 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1677 }
1678 }
1679
1680 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, zero) {
1681 TEST_REQUIRES_X86_AVX2;
1682 for (uint32_t mz = 0; mz < 25; mz++) {
1683 for (uint32_t channels = 32; channels < 256; channels += 48) {
1684 DWConvMicrokernelTester()
1685 .cr(16)
1686 .kr(25)
1687 .channels(channels)
1688 .input_offset(304)
1689 .zero_index(mz)
1690 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1691 }
1692 }
1693 }
1694#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1695
1696
1697#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1698 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_eq_32) {
1699 TEST_REQUIRES_X86_AVX2;
1700 DWConvMicrokernelTester()
1701 .cr(32)
1702 .kr(25)
1703 .channels(32)
1704 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1705 }
1706
1707 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_div_32) {
1708 TEST_REQUIRES_X86_AVX2;
1709 for (uint32_t channels = 64; channels < 512; channels += 96) {
1710 DWConvMicrokernelTester()
1711 .cr(32)
1712 .kr(25)
1713 .channels(channels)
1714 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1715 }
1716 }
1717
1718 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_div_32_with_qmin) {
1719 TEST_REQUIRES_X86_AVX2;
1720 for (uint32_t channels = 64; channels < 512; channels += 96) {
1721 DWConvMicrokernelTester()
1722 .cr(32)
1723 .kr(25)
1724 .channels(channels)
1725 .qmin(128)
1726 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1727 }
1728 }
1729
1730 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_div_32_with_qmax) {
1731 TEST_REQUIRES_X86_AVX2;
1732 for (uint32_t channels = 64; channels < 512; channels += 96) {
1733 DWConvMicrokernelTester()
1734 .cr(32)
1735 .kr(25)
1736 .channels(channels)
1737 .qmax(128)
1738 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1739 }
1740 }
1741
1742 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_lt_32) {
1743 TEST_REQUIRES_X86_AVX2;
1744 for (uint32_t channels = 1; channels < 32; channels++) {
1745 DWConvMicrokernelTester()
1746 .cr(32)
1747 .kr(25)
1748 .channels(channels)
1749 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1750 }
1751 }
1752
1753 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_gt_32) {
1754 TEST_REQUIRES_X86_AVX2;
1755 for (uint32_t channels = 33; channels < 64; channels++) {
1756 DWConvMicrokernelTester()
1757 .cr(32)
1758 .kr(25)
1759 .channels(channels)
1760 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1761 }
1762 }
1763
1764 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_gt_32_with_qmin) {
1765 TEST_REQUIRES_X86_AVX2;
1766 for (uint32_t channels = 33; channels < 64; channels++) {
1767 DWConvMicrokernelTester()
1768 .cr(32)
1769 .kr(25)
1770 .channels(channels)
1771 .qmin(128)
1772 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1773 }
1774 }
1775
1776 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_gt_32_with_qmax) {
1777 TEST_REQUIRES_X86_AVX2;
1778 for (uint32_t channels = 33; channels < 64; channels++) {
1779 DWConvMicrokernelTester()
1780 .cr(32)
1781 .kr(25)
1782 .channels(channels)
1783 .qmax(128)
1784 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1785 }
1786 }
1787
1788 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel) {
1789 TEST_REQUIRES_X86_AVX2;
1790 for (size_t channels = 1; channels <= 160; channels += 31) {
1791 DWConvMicrokernelTester()
1792 .cr(32)
1793 .kr(25)
1794 .channels(channels)
1795 .width(3)
1796 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1797 }
1798 }
1799
1800 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel_with_step) {
1801 TEST_REQUIRES_X86_AVX2;
1802 for (size_t channels = 1; channels <= 160; channels += 31) {
1803 for (size_t step = 2; step <= 25; step++) {
1804 DWConvMicrokernelTester()
1805 .cr(32)
1806 .kr(25)
1807 .channels(channels)
1808 .width(3)
1809 .step(step)
1810 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1811 }
1812 }
1813 }
1814
1815 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel_with_output_stride) {
1816 TEST_REQUIRES_X86_AVX2;
1817 for (size_t channels = 1; channels <= 160; channels += 31) {
1818 DWConvMicrokernelTester()
1819 .cr(32)
1820 .kr(25)
1821 .channels(32)
1822 .width(5)
1823 .output_stride(163)
1824 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1825 }
1826 }
1827
1828 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel_with_qmin) {
1829 TEST_REQUIRES_X86_AVX2;
1830 for (size_t channels = 1; channels <= 160; channels += 31) {
1831 DWConvMicrokernelTester()
1832 .cr(32)
1833 .kr(25)
1834 .channels(channels)
1835 .width(3)
1836 .qmin(128)
1837 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1838 }
1839 }
1840
1841 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel_with_qmax) {
1842 TEST_REQUIRES_X86_AVX2;
1843 for (size_t channels = 1; channels <= 160; channels += 31) {
1844 DWConvMicrokernelTester()
1845 .cr(32)
1846 .kr(25)
1847 .channels(channels)
1848 .width(3)
1849 .qmax(128)
1850 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1851 }
1852 }
1853
1854 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, input_offset) {
1855 TEST_REQUIRES_X86_AVX2;
1856 for (uint32_t channels = 64; channels < 512; channels += 96) {
1857 DWConvMicrokernelTester()
1858 .cr(32)
1859 .kr(25)
1860 .channels(channels)
1861 .input_offset(592)
1862 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1863 }
1864 }
1865
1866 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, zero) {
1867 TEST_REQUIRES_X86_AVX2;
1868 for (uint32_t mz = 0; mz < 25; mz++) {
1869 for (uint32_t channels = 64; channels < 512; channels += 96) {
1870 DWConvMicrokernelTester()
1871 .cr(32)
1872 .kr(25)
1873 .channels(channels)
1874 .input_offset(592)
1875 .zero_index(mz)
1876 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1877 }
1878 }
1879 }
1880#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1881
1882
1883#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1884 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_eq_8) {
1885 TEST_REQUIRES_X86_AVX2;
1886 DWConvMicrokernelTester()
1887 .cr(8)
1888 .kr(25)
1889 .channels(8)
1890 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1891 }
1892
1893 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8) {
1894 TEST_REQUIRES_X86_AVX2;
1895 for (uint32_t channels = 16; channels < 128; channels += 24) {
1896 DWConvMicrokernelTester()
1897 .cr(8)
1898 .kr(25)
1899 .channels(channels)
1900 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1901 }
1902 }
1903
1904 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
1905 TEST_REQUIRES_X86_AVX2;
1906 for (uint32_t channels = 16; channels < 128; channels += 24) {
1907 DWConvMicrokernelTester()
1908 .cr(8)
1909 .kr(25)
1910 .channels(channels)
1911 .qmin(128)
1912 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1913 }
1914 }
1915
1916 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
1917 TEST_REQUIRES_X86_AVX2;
1918 for (uint32_t channels = 16; channels < 128; channels += 24) {
1919 DWConvMicrokernelTester()
1920 .cr(8)
1921 .kr(25)
1922 .channels(channels)
1923 .qmax(128)
1924 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1925 }
1926 }
1927
1928 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_lt_8) {
1929 TEST_REQUIRES_X86_AVX2;
1930 for (uint32_t channels = 1; channels < 8; channels++) {
1931 DWConvMicrokernelTester()
1932 .cr(8)
1933 .kr(25)
1934 .channels(channels)
1935 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1936 }
1937 }
1938
1939 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8) {
1940 TEST_REQUIRES_X86_AVX2;
1941 for (uint32_t channels = 9; channels < 16; channels++) {
1942 DWConvMicrokernelTester()
1943 .cr(8)
1944 .kr(25)
1945 .channels(channels)
1946 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1947 }
1948 }
1949
1950 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
1951 TEST_REQUIRES_X86_AVX2;
1952 for (uint32_t channels = 9; channels < 16; channels++) {
1953 DWConvMicrokernelTester()
1954 .cr(8)
1955 .kr(25)
1956 .channels(channels)
1957 .qmin(128)
1958 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1959 }
1960 }
1961
1962 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
1963 TEST_REQUIRES_X86_AVX2;
1964 for (uint32_t channels = 9; channels < 16; channels++) {
1965 DWConvMicrokernelTester()
1966 .cr(8)
1967 .kr(25)
1968 .channels(channels)
1969 .qmax(128)
1970 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1971 }
1972 }
1973
1974 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel) {
1975 TEST_REQUIRES_X86_AVX2;
1976 for (size_t channels = 1; channels <= 40; channels += 7) {
1977 DWConvMicrokernelTester()
1978 .cr(8)
1979 .kr(25)
1980 .channels(channels)
1981 .width(3)
1982 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1983 }
1984 }
1985
1986 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_step) {
1987 TEST_REQUIRES_X86_AVX2;
1988 for (size_t channels = 1; channels <= 40; channels += 7) {
1989 for (size_t step = 2; step <= 25; step++) {
1990 DWConvMicrokernelTester()
1991 .cr(8)
1992 .kr(25)
1993 .channels(channels)
1994 .width(3)
1995 .step(step)
1996 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1997 }
1998 }
1999 }
2000
2001 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
2002 TEST_REQUIRES_X86_AVX2;
2003 for (size_t channels = 1; channels <= 40; channels += 7) {
2004 DWConvMicrokernelTester()
2005 .cr(8)
2006 .kr(25)
2007 .channels(8)
2008 .width(5)
2009 .output_stride(43)
2010 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2011 }
2012 }
2013
2014 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
2015 TEST_REQUIRES_X86_AVX2;
2016 for (size_t channels = 1; channels <= 40; channels += 7) {
2017 DWConvMicrokernelTester()
2018 .cr(8)
2019 .kr(25)
2020 .channels(channels)
2021 .width(3)
2022 .qmin(128)
2023 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2024 }
2025 }
2026
2027 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
2028 TEST_REQUIRES_X86_AVX2;
2029 for (size_t channels = 1; channels <= 40; channels += 7) {
2030 DWConvMicrokernelTester()
2031 .cr(8)
2032 .kr(25)
2033 .channels(channels)
2034 .width(3)
2035 .qmax(128)
2036 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2037 }
2038 }
2039
2040 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_offset) {
2041 TEST_REQUIRES_X86_AVX2;
2042 for (uint32_t channels = 16; channels < 128; channels += 24) {
2043 DWConvMicrokernelTester()
2044 .cr(8)
2045 .kr(25)
2046 .channels(channels)
2047 .input_offset(176)
2048 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2049 }
2050 }
2051
2052 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, zero) {
2053 TEST_REQUIRES_X86_AVX2;
2054 for (uint32_t mz = 0; mz < 25; mz++) {
2055 for (uint32_t channels = 16; channels < 128; channels += 24) {
2056 DWConvMicrokernelTester()
2057 .cr(8)
2058 .kr(25)
2059 .channels(channels)
2060 .input_offset(176)
2061 .zero_index(mz)
2062 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2063 }
2064 }
2065 }
2066#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2067
2068
2069#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2070 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_eq_16) {
2071 TEST_REQUIRES_X86_AVX2;
2072 DWConvMicrokernelTester()
2073 .cr(16)
2074 .kr(25)
2075 .channels(16)
2076 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2077 }
2078
2079 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16) {
2080 TEST_REQUIRES_X86_AVX2;
2081 for (uint32_t channels = 32; channels < 256; channels += 48) {
2082 DWConvMicrokernelTester()
2083 .cr(16)
2084 .kr(25)
2085 .channels(channels)
2086 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2087 }
2088 }
2089
2090 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
2091 TEST_REQUIRES_X86_AVX2;
2092 for (uint32_t channels = 32; channels < 256; channels += 48) {
2093 DWConvMicrokernelTester()
2094 .cr(16)
2095 .kr(25)
2096 .channels(channels)
2097 .qmin(128)
2098 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2099 }
2100 }
2101
2102 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
2103 TEST_REQUIRES_X86_AVX2;
2104 for (uint32_t channels = 32; channels < 256; channels += 48) {
2105 DWConvMicrokernelTester()
2106 .cr(16)
2107 .kr(25)
2108 .channels(channels)
2109 .qmax(128)
2110 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2111 }
2112 }
2113
2114 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_lt_16) {
2115 TEST_REQUIRES_X86_AVX2;
2116 for (uint32_t channels = 1; channels < 16; channels++) {
2117 DWConvMicrokernelTester()
2118 .cr(16)
2119 .kr(25)
2120 .channels(channels)
2121 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2122 }
2123 }
2124
2125 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16) {
2126 TEST_REQUIRES_X86_AVX2;
2127 for (uint32_t channels = 17; channels < 32; channels++) {
2128 DWConvMicrokernelTester()
2129 .cr(16)
2130 .kr(25)
2131 .channels(channels)
2132 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2133 }
2134 }
2135
2136 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
2137 TEST_REQUIRES_X86_AVX2;
2138 for (uint32_t channels = 17; channels < 32; channels++) {
2139 DWConvMicrokernelTester()
2140 .cr(16)
2141 .kr(25)
2142 .channels(channels)
2143 .qmin(128)
2144 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2145 }
2146 }
2147
2148 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
2149 TEST_REQUIRES_X86_AVX2;
2150 for (uint32_t channels = 17; channels < 32; channels++) {
2151 DWConvMicrokernelTester()
2152 .cr(16)
2153 .kr(25)
2154 .channels(channels)
2155 .qmax(128)
2156 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2157 }
2158 }
2159
2160 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel) {
2161 TEST_REQUIRES_X86_AVX2;
2162 for (size_t channels = 1; channels <= 80; channels += 15) {
2163 DWConvMicrokernelTester()
2164 .cr(16)
2165 .kr(25)
2166 .channels(channels)
2167 .width(3)
2168 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2169 }
2170 }
2171
2172 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_step) {
2173 TEST_REQUIRES_X86_AVX2;
2174 for (size_t channels = 1; channels <= 80; channels += 15) {
2175 for (size_t step = 2; step <= 25; step++) {
2176 DWConvMicrokernelTester()
2177 .cr(16)
2178 .kr(25)
2179 .channels(channels)
2180 .width(3)
2181 .step(step)
2182 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2183 }
2184 }
2185 }
2186
2187 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
2188 TEST_REQUIRES_X86_AVX2;
2189 for (size_t channels = 1; channels <= 80; channels += 15) {
2190 DWConvMicrokernelTester()
2191 .cr(16)
2192 .kr(25)
2193 .channels(16)
2194 .width(5)
2195 .output_stride(83)
2196 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2197 }
2198 }
2199
2200 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
2201 TEST_REQUIRES_X86_AVX2;
2202 for (size_t channels = 1; channels <= 80; channels += 15) {
2203 DWConvMicrokernelTester()
2204 .cr(16)
2205 .kr(25)
2206 .channels(channels)
2207 .width(3)
2208 .qmin(128)
2209 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2210 }
2211 }
2212
2213 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
2214 TEST_REQUIRES_X86_AVX2;
2215 for (size_t channels = 1; channels <= 80; channels += 15) {
2216 DWConvMicrokernelTester()
2217 .cr(16)
2218 .kr(25)
2219 .channels(channels)
2220 .width(3)
2221 .qmax(128)
2222 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2223 }
2224 }
2225
2226 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_offset) {
2227 TEST_REQUIRES_X86_AVX2;
2228 for (uint32_t channels = 32; channels < 256; channels += 48) {
2229 DWConvMicrokernelTester()
2230 .cr(16)
2231 .kr(25)
2232 .channels(channels)
2233 .input_offset(304)
2234 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2235 }
2236 }
2237
2238 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, zero) {
2239 TEST_REQUIRES_X86_AVX2;
2240 for (uint32_t mz = 0; mz < 25; mz++) {
2241 for (uint32_t channels = 32; channels < 256; channels += 48) {
2242 DWConvMicrokernelTester()
2243 .cr(16)
2244 .kr(25)
2245 .channels(channels)
2246 .input_offset(304)
2247 .zero_index(mz)
2248 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2249 }
2250 }
2251 }
2252#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2253
2254
2255#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2256 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_eq_24) {
2257 TEST_REQUIRES_X86_AVX2;
2258 DWConvMicrokernelTester()
2259 .cr(24)
2260 .kr(25)
2261 .channels(24)
2262 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2263 }
2264
2265 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24) {
2266 TEST_REQUIRES_X86_AVX2;
2267 for (uint32_t channels = 48; channels < 384; channels += 72) {
2268 DWConvMicrokernelTester()
2269 .cr(24)
2270 .kr(25)
2271 .channels(channels)
2272 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2273 }
2274 }
2275
2276 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmin) {
2277 TEST_REQUIRES_X86_AVX2;
2278 for (uint32_t channels = 48; channels < 384; channels += 72) {
2279 DWConvMicrokernelTester()
2280 .cr(24)
2281 .kr(25)
2282 .channels(channels)
2283 .qmin(128)
2284 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2285 }
2286 }
2287
2288 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmax) {
2289 TEST_REQUIRES_X86_AVX2;
2290 for (uint32_t channels = 48; channels < 384; channels += 72) {
2291 DWConvMicrokernelTester()
2292 .cr(24)
2293 .kr(25)
2294 .channels(channels)
2295 .qmax(128)
2296 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2297 }
2298 }
2299
2300 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_lt_24) {
2301 TEST_REQUIRES_X86_AVX2;
2302 for (uint32_t channels = 1; channels < 24; channels++) {
2303 DWConvMicrokernelTester()
2304 .cr(24)
2305 .kr(25)
2306 .channels(channels)
2307 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2308 }
2309 }
2310
2311 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24) {
2312 TEST_REQUIRES_X86_AVX2;
2313 for (uint32_t channels = 25; channels < 48; channels++) {
2314 DWConvMicrokernelTester()
2315 .cr(24)
2316 .kr(25)
2317 .channels(channels)
2318 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2319 }
2320 }
2321
2322 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmin) {
2323 TEST_REQUIRES_X86_AVX2;
2324 for (uint32_t channels = 25; channels < 48; channels++) {
2325 DWConvMicrokernelTester()
2326 .cr(24)
2327 .kr(25)
2328 .channels(channels)
2329 .qmin(128)
2330 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2331 }
2332 }
2333
2334 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmax) {
2335 TEST_REQUIRES_X86_AVX2;
2336 for (uint32_t channels = 25; channels < 48; channels++) {
2337 DWConvMicrokernelTester()
2338 .cr(24)
2339 .kr(25)
2340 .channels(channels)
2341 .qmax(128)
2342 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2343 }
2344 }
2345
2346 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel) {
2347 TEST_REQUIRES_X86_AVX2;
2348 for (size_t channels = 1; channels <= 120; channels += 23) {
2349 DWConvMicrokernelTester()
2350 .cr(24)
2351 .kr(25)
2352 .channels(channels)
2353 .width(3)
2354 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2355 }
2356 }
2357
2358 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_step) {
2359 TEST_REQUIRES_X86_AVX2;
2360 for (size_t channels = 1; channels <= 120; channels += 23) {
2361 for (size_t step = 2; step <= 25; step++) {
2362 DWConvMicrokernelTester()
2363 .cr(24)
2364 .kr(25)
2365 .channels(channels)
2366 .width(3)
2367 .step(step)
2368 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2369 }
2370 }
2371 }
2372
2373 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_output_stride) {
2374 TEST_REQUIRES_X86_AVX2;
2375 for (size_t channels = 1; channels <= 120; channels += 23) {
2376 DWConvMicrokernelTester()
2377 .cr(24)
2378 .kr(25)
2379 .channels(24)
2380 .width(5)
2381 .output_stride(127)
2382 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2383 }
2384 }
2385
2386 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmin) {
2387 TEST_REQUIRES_X86_AVX2;
2388 for (size_t channels = 1; channels <= 120; channels += 23) {
2389 DWConvMicrokernelTester()
2390 .cr(24)
2391 .kr(25)
2392 .channels(channels)
2393 .width(3)
2394 .qmin(128)
2395 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2396 }
2397 }
2398
2399 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmax) {
2400 TEST_REQUIRES_X86_AVX2;
2401 for (size_t channels = 1; channels <= 120; channels += 23) {
2402 DWConvMicrokernelTester()
2403 .cr(24)
2404 .kr(25)
2405 .channels(channels)
2406 .width(3)
2407 .qmax(128)
2408 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2409 }
2410 }
2411
2412 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, input_offset) {
2413 TEST_REQUIRES_X86_AVX2;
2414 for (uint32_t channels = 48; channels < 384; channels += 72) {
2415 DWConvMicrokernelTester()
2416 .cr(24)
2417 .kr(25)
2418 .channels(channels)
2419 .input_offset(464)
2420 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2421 }
2422 }
2423
2424 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, zero) {
2425 TEST_REQUIRES_X86_AVX2;
2426 for (uint32_t mz = 0; mz < 25; mz++) {
2427 for (uint32_t channels = 48; channels < 384; channels += 72) {
2428 DWConvMicrokernelTester()
2429 .cr(24)
2430 .kr(25)
2431 .channels(channels)
2432 .input_offset(464)
2433 .zero_index(mz)
2434 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2435 }
2436 }
2437 }
2438#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2439
2440
2441#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2442 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_eq_32) {
2443 TEST_REQUIRES_X86_AVX2;
2444 DWConvMicrokernelTester()
2445 .cr(32)
2446 .kr(25)
2447 .channels(32)
2448 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2449 }
2450
2451 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32) {
2452 TEST_REQUIRES_X86_AVX2;
2453 for (uint32_t channels = 64; channels < 512; channels += 96) {
2454 DWConvMicrokernelTester()
2455 .cr(32)
2456 .kr(25)
2457 .channels(channels)
2458 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2459 }
2460 }
2461
2462 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
2463 TEST_REQUIRES_X86_AVX2;
2464 for (uint32_t channels = 64; channels < 512; channels += 96) {
2465 DWConvMicrokernelTester()
2466 .cr(32)
2467 .kr(25)
2468 .channels(channels)
2469 .qmin(128)
2470 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2471 }
2472 }
2473
2474 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
2475 TEST_REQUIRES_X86_AVX2;
2476 for (uint32_t channels = 64; channels < 512; channels += 96) {
2477 DWConvMicrokernelTester()
2478 .cr(32)
2479 .kr(25)
2480 .channels(channels)
2481 .qmax(128)
2482 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2483 }
2484 }
2485
2486 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_lt_32) {
2487 TEST_REQUIRES_X86_AVX2;
2488 for (uint32_t channels = 1; channels < 32; channels++) {
2489 DWConvMicrokernelTester()
2490 .cr(32)
2491 .kr(25)
2492 .channels(channels)
2493 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2494 }
2495 }
2496
2497 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32) {
2498 TEST_REQUIRES_X86_AVX2;
2499 for (uint32_t channels = 33; channels < 64; channels++) {
2500 DWConvMicrokernelTester()
2501 .cr(32)
2502 .kr(25)
2503 .channels(channels)
2504 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2505 }
2506 }
2507
2508 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
2509 TEST_REQUIRES_X86_AVX2;
2510 for (uint32_t channels = 33; channels < 64; channels++) {
2511 DWConvMicrokernelTester()
2512 .cr(32)
2513 .kr(25)
2514 .channels(channels)
2515 .qmin(128)
2516 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2517 }
2518 }
2519
2520 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
2521 TEST_REQUIRES_X86_AVX2;
2522 for (uint32_t channels = 33; channels < 64; channels++) {
2523 DWConvMicrokernelTester()
2524 .cr(32)
2525 .kr(25)
2526 .channels(channels)
2527 .qmax(128)
2528 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2529 }
2530 }
2531
2532 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel) {
2533 TEST_REQUIRES_X86_AVX2;
2534 for (size_t channels = 1; channels <= 160; channels += 31) {
2535 DWConvMicrokernelTester()
2536 .cr(32)
2537 .kr(25)
2538 .channels(channels)
2539 .width(3)
2540 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2541 }
2542 }
2543
2544 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_step) {
2545 TEST_REQUIRES_X86_AVX2;
2546 for (size_t channels = 1; channels <= 160; channels += 31) {
2547 for (size_t step = 2; step <= 25; step++) {
2548 DWConvMicrokernelTester()
2549 .cr(32)
2550 .kr(25)
2551 .channels(channels)
2552 .width(3)
2553 .step(step)
2554 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2555 }
2556 }
2557 }
2558
2559 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
2560 TEST_REQUIRES_X86_AVX2;
2561 for (size_t channels = 1; channels <= 160; channels += 31) {
2562 DWConvMicrokernelTester()
2563 .cr(32)
2564 .kr(25)
2565 .channels(32)
2566 .width(5)
2567 .output_stride(163)
2568 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2569 }
2570 }
2571
2572 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
2573 TEST_REQUIRES_X86_AVX2;
2574 for (size_t channels = 1; channels <= 160; channels += 31) {
2575 DWConvMicrokernelTester()
2576 .cr(32)
2577 .kr(25)
2578 .channels(channels)
2579 .width(3)
2580 .qmin(128)
2581 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2582 }
2583 }
2584
2585 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
2586 TEST_REQUIRES_X86_AVX2;
2587 for (size_t channels = 1; channels <= 160; channels += 31) {
2588 DWConvMicrokernelTester()
2589 .cr(32)
2590 .kr(25)
2591 .channels(channels)
2592 .width(3)
2593 .qmax(128)
2594 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2595 }
2596 }
2597
2598 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_offset) {
2599 TEST_REQUIRES_X86_AVX2;
2600 for (uint32_t channels = 64; channels < 512; channels += 96) {
2601 DWConvMicrokernelTester()
2602 .cr(32)
2603 .kr(25)
2604 .channels(channels)
2605 .input_offset(592)
2606 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2607 }
2608 }
2609
2610 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, zero) {
2611 TEST_REQUIRES_X86_AVX2;
2612 for (uint32_t mz = 0; mz < 25; mz++) {
2613 for (uint32_t channels = 64; channels < 512; channels += 96) {
2614 DWConvMicrokernelTester()
2615 .cr(32)
2616 .kr(25)
2617 .channels(channels)
2618 .input_offset(592)
2619 .zero_index(mz)
2620 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2621 }
2622 }
2623 }
2624#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan71855ee2021-05-25 19:05:06 -07002625
2626
2627#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2628 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_eq_16) {
2629 TEST_REQUIRES_X86_AVX512SKX;
2630 DWConvMicrokernelTester()
2631 .cr(16)
2632 .kr(25)
2633 .channels(16)
2634 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2635 }
2636
2637 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16) {
2638 TEST_REQUIRES_X86_AVX512SKX;
2639 for (uint32_t channels = 32; channels < 256; channels += 48) {
2640 DWConvMicrokernelTester()
2641 .cr(16)
2642 .kr(25)
2643 .channels(channels)
2644 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2645 }
2646 }
2647
2648 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
2649 TEST_REQUIRES_X86_AVX512SKX;
2650 for (uint32_t channels = 32; channels < 256; channels += 48) {
2651 DWConvMicrokernelTester()
2652 .cr(16)
2653 .kr(25)
2654 .channels(channels)
2655 .qmin(128)
2656 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2657 }
2658 }
2659
2660 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
2661 TEST_REQUIRES_X86_AVX512SKX;
2662 for (uint32_t channels = 32; channels < 256; channels += 48) {
2663 DWConvMicrokernelTester()
2664 .cr(16)
2665 .kr(25)
2666 .channels(channels)
2667 .qmax(128)
2668 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2669 }
2670 }
2671
2672 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_lt_16) {
2673 TEST_REQUIRES_X86_AVX512SKX;
2674 for (uint32_t channels = 1; channels < 16; channels++) {
2675 DWConvMicrokernelTester()
2676 .cr(16)
2677 .kr(25)
2678 .channels(channels)
2679 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2680 }
2681 }
2682
2683 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16) {
2684 TEST_REQUIRES_X86_AVX512SKX;
2685 for (uint32_t channels = 17; channels < 32; channels++) {
2686 DWConvMicrokernelTester()
2687 .cr(16)
2688 .kr(25)
2689 .channels(channels)
2690 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2691 }
2692 }
2693
2694 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
2695 TEST_REQUIRES_X86_AVX512SKX;
2696 for (uint32_t channels = 17; channels < 32; channels++) {
2697 DWConvMicrokernelTester()
2698 .cr(16)
2699 .kr(25)
2700 .channels(channels)
2701 .qmin(128)
2702 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2703 }
2704 }
2705
2706 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
2707 TEST_REQUIRES_X86_AVX512SKX;
2708 for (uint32_t channels = 17; channels < 32; channels++) {
2709 DWConvMicrokernelTester()
2710 .cr(16)
2711 .kr(25)
2712 .channels(channels)
2713 .qmax(128)
2714 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2715 }
2716 }
2717
2718 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel) {
2719 TEST_REQUIRES_X86_AVX512SKX;
2720 for (size_t channels = 1; channels <= 80; channels += 15) {
2721 DWConvMicrokernelTester()
2722 .cr(16)
2723 .kr(25)
2724 .channels(channels)
2725 .width(3)
2726 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2727 }
2728 }
2729
2730 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
2731 TEST_REQUIRES_X86_AVX512SKX;
2732 for (size_t channels = 1; channels <= 80; channels += 15) {
2733 for (size_t step = 2; step <= 25; step++) {
2734 DWConvMicrokernelTester()
2735 .cr(16)
2736 .kr(25)
2737 .channels(channels)
2738 .width(3)
2739 .step(step)
2740 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2741 }
2742 }
2743 }
2744
2745 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
2746 TEST_REQUIRES_X86_AVX512SKX;
2747 for (size_t channels = 1; channels <= 80; channels += 15) {
2748 DWConvMicrokernelTester()
2749 .cr(16)
2750 .kr(25)
2751 .channels(16)
2752 .width(5)
2753 .output_stride(83)
2754 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2755 }
2756 }
2757
2758 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
2759 TEST_REQUIRES_X86_AVX512SKX;
2760 for (size_t channels = 1; channels <= 80; channels += 15) {
2761 DWConvMicrokernelTester()
2762 .cr(16)
2763 .kr(25)
2764 .channels(channels)
2765 .width(3)
2766 .qmin(128)
2767 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2768 }
2769 }
2770
2771 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
2772 TEST_REQUIRES_X86_AVX512SKX;
2773 for (size_t channels = 1; channels <= 80; channels += 15) {
2774 DWConvMicrokernelTester()
2775 .cr(16)
2776 .kr(25)
2777 .channels(channels)
2778 .width(3)
2779 .qmax(128)
2780 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2781 }
2782 }
2783
2784 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_offset) {
2785 TEST_REQUIRES_X86_AVX512SKX;
2786 for (uint32_t channels = 32; channels < 256; channels += 48) {
2787 DWConvMicrokernelTester()
2788 .cr(16)
2789 .kr(25)
2790 .channels(channels)
2791 .input_offset(304)
2792 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2793 }
2794 }
2795
2796 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, zero) {
2797 TEST_REQUIRES_X86_AVX512SKX;
2798 for (uint32_t mz = 0; mz < 25; mz++) {
2799 for (uint32_t channels = 32; channels < 256; channels += 48) {
2800 DWConvMicrokernelTester()
2801 .cr(16)
2802 .kr(25)
2803 .channels(channels)
2804 .input_offset(304)
2805 .zero_index(mz)
2806 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2807 }
2808 }
2809 }
2810#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2811
2812
2813#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2814 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_eq_32) {
2815 TEST_REQUIRES_X86_AVX512SKX;
2816 DWConvMicrokernelTester()
2817 .cr(32)
2818 .kr(25)
2819 .channels(32)
2820 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2821 }
2822
2823 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32) {
2824 TEST_REQUIRES_X86_AVX512SKX;
2825 for (uint32_t channels = 64; channels < 512; channels += 96) {
2826 DWConvMicrokernelTester()
2827 .cr(32)
2828 .kr(25)
2829 .channels(channels)
2830 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2831 }
2832 }
2833
2834 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
2835 TEST_REQUIRES_X86_AVX512SKX;
2836 for (uint32_t channels = 64; channels < 512; channels += 96) {
2837 DWConvMicrokernelTester()
2838 .cr(32)
2839 .kr(25)
2840 .channels(channels)
2841 .qmin(128)
2842 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2843 }
2844 }
2845
2846 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
2847 TEST_REQUIRES_X86_AVX512SKX;
2848 for (uint32_t channels = 64; channels < 512; channels += 96) {
2849 DWConvMicrokernelTester()
2850 .cr(32)
2851 .kr(25)
2852 .channels(channels)
2853 .qmax(128)
2854 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2855 }
2856 }
2857
2858 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_lt_32) {
2859 TEST_REQUIRES_X86_AVX512SKX;
2860 for (uint32_t channels = 1; channels < 32; channels++) {
2861 DWConvMicrokernelTester()
2862 .cr(32)
2863 .kr(25)
2864 .channels(channels)
2865 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2866 }
2867 }
2868
2869 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32) {
2870 TEST_REQUIRES_X86_AVX512SKX;
2871 for (uint32_t channels = 33; channels < 64; channels++) {
2872 DWConvMicrokernelTester()
2873 .cr(32)
2874 .kr(25)
2875 .channels(channels)
2876 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2877 }
2878 }
2879
2880 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
2881 TEST_REQUIRES_X86_AVX512SKX;
2882 for (uint32_t channels = 33; channels < 64; channels++) {
2883 DWConvMicrokernelTester()
2884 .cr(32)
2885 .kr(25)
2886 .channels(channels)
2887 .qmin(128)
2888 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2889 }
2890 }
2891
2892 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
2893 TEST_REQUIRES_X86_AVX512SKX;
2894 for (uint32_t channels = 33; channels < 64; channels++) {
2895 DWConvMicrokernelTester()
2896 .cr(32)
2897 .kr(25)
2898 .channels(channels)
2899 .qmax(128)
2900 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2901 }
2902 }
2903
2904 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel) {
2905 TEST_REQUIRES_X86_AVX512SKX;
2906 for (size_t channels = 1; channels <= 160; channels += 31) {
2907 DWConvMicrokernelTester()
2908 .cr(32)
2909 .kr(25)
2910 .channels(channels)
2911 .width(3)
2912 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2913 }
2914 }
2915
2916 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
2917 TEST_REQUIRES_X86_AVX512SKX;
2918 for (size_t channels = 1; channels <= 160; channels += 31) {
2919 for (size_t step = 2; step <= 25; step++) {
2920 DWConvMicrokernelTester()
2921 .cr(32)
2922 .kr(25)
2923 .channels(channels)
2924 .width(3)
2925 .step(step)
2926 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2927 }
2928 }
2929 }
2930
2931 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
2932 TEST_REQUIRES_X86_AVX512SKX;
2933 for (size_t channels = 1; channels <= 160; channels += 31) {
2934 DWConvMicrokernelTester()
2935 .cr(32)
2936 .kr(25)
2937 .channels(32)
2938 .width(5)
2939 .output_stride(163)
2940 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2941 }
2942 }
2943
2944 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
2945 TEST_REQUIRES_X86_AVX512SKX;
2946 for (size_t channels = 1; channels <= 160; channels += 31) {
2947 DWConvMicrokernelTester()
2948 .cr(32)
2949 .kr(25)
2950 .channels(channels)
2951 .width(3)
2952 .qmin(128)
2953 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2954 }
2955 }
2956
2957 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
2958 TEST_REQUIRES_X86_AVX512SKX;
2959 for (size_t channels = 1; channels <= 160; channels += 31) {
2960 DWConvMicrokernelTester()
2961 .cr(32)
2962 .kr(25)
2963 .channels(channels)
2964 .width(3)
2965 .qmax(128)
2966 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2967 }
2968 }
2969
2970 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_offset) {
2971 TEST_REQUIRES_X86_AVX512SKX;
2972 for (uint32_t channels = 64; channels < 512; channels += 96) {
2973 DWConvMicrokernelTester()
2974 .cr(32)
2975 .kr(25)
2976 .channels(channels)
2977 .input_offset(592)
2978 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2979 }
2980 }
2981
2982 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, zero) {
2983 TEST_REQUIRES_X86_AVX512SKX;
2984 for (uint32_t mz = 0; mz < 25; mz++) {
2985 for (uint32_t channels = 64; channels < 512; channels += 96) {
2986 DWConvMicrokernelTester()
2987 .cr(32)
2988 .kr(25)
2989 .channels(channels)
2990 .input_offset(592)
2991 .zero_index(mz)
2992 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2993 }
2994 }
2995 }
2996#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64