blob: ae51060d13346f5aeeee4d301c145e32628c256b [file] [log] [blame]
Marat Dukhan9b474cf2021-05-25 16:37:48 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qs8-dwconv-minmax-fp32.yaml
11// Generator: tools/generate-dwconv-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/dwconv.h>
20#include "dwconv-microkernel-tester.h"
21
22
Marat Dukhan6f905292021-06-25 11:12:05 -070023#if XNN_ARCH_ARM || XNN_ARCH_ARM64
24 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_eq_8) {
25 TEST_REQUIRES_ARM_NEON;
26 DWConvMicrokernelTester()
27 .cr(8)
28 .kr(9)
29 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080030 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070031 }
32
33 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8) {
34 TEST_REQUIRES_ARM_NEON;
35 for (uint32_t channels = 16; channels < 128; channels += 24) {
36 DWConvMicrokernelTester()
37 .cr(8)
38 .kr(9)
39 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080040 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070041 }
42 }
43
44 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
45 TEST_REQUIRES_ARM_NEON;
46 for (uint32_t channels = 16; channels < 128; channels += 24) {
47 DWConvMicrokernelTester()
48 .cr(8)
49 .kr(9)
50 .channels(channels)
51 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080052 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070053 }
54 }
55
56 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
57 TEST_REQUIRES_ARM_NEON;
58 for (uint32_t channels = 16; channels < 128; channels += 24) {
59 DWConvMicrokernelTester()
60 .cr(8)
61 .kr(9)
62 .channels(channels)
63 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080064 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070065 }
66 }
67
68 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_lt_8) {
69 TEST_REQUIRES_ARM_NEON;
70 for (uint32_t channels = 1; channels < 8; channels++) {
71 DWConvMicrokernelTester()
72 .cr(8)
73 .kr(9)
74 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080075 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070076 }
77 }
78
79 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8) {
80 TEST_REQUIRES_ARM_NEON;
81 for (uint32_t channels = 9; channels < 16; channels++) {
82 DWConvMicrokernelTester()
83 .cr(8)
84 .kr(9)
85 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080086 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070087 }
88 }
89
90 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
91 TEST_REQUIRES_ARM_NEON;
92 for (uint32_t channels = 9; channels < 16; channels++) {
93 DWConvMicrokernelTester()
94 .cr(8)
95 .kr(9)
96 .channels(channels)
97 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080098 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070099 }
100 }
101
102 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t channels = 9; channels < 16; channels++) {
105 DWConvMicrokernelTester()
106 .cr(8)
107 .kr(9)
108 .channels(channels)
109 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800110 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700111 }
112 }
113
114 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t channels = 1; channels <= 40; channels += 7) {
117 DWConvMicrokernelTester()
118 .cr(8)
119 .kr(9)
120 .channels(channels)
121 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800122 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700123 }
124 }
125
126 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_step) {
127 TEST_REQUIRES_ARM_NEON;
128 for (size_t channels = 1; channels <= 40; channels += 7) {
129 for (size_t step = 2; step <= 9; step++) {
130 DWConvMicrokernelTester()
131 .cr(8)
132 .kr(9)
133 .channels(channels)
134 .width(3)
135 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800136 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700137 }
138 }
139 }
140
141 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
142 TEST_REQUIRES_ARM_NEON;
143 for (size_t channels = 1; channels <= 40; channels += 7) {
144 DWConvMicrokernelTester()
145 .cr(8)
146 .kr(9)
147 .channels(8)
148 .width(5)
149 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -0800150 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700151 }
152 }
153
154 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmin) {
155 TEST_REQUIRES_ARM_NEON;
156 for (size_t channels = 1; channels <= 40; channels += 7) {
157 DWConvMicrokernelTester()
158 .cr(8)
159 .kr(9)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800163 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700164 }
165 }
166
167 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmax) {
168 TEST_REQUIRES_ARM_NEON;
169 for (size_t channels = 1; channels <= 40; channels += 7) {
170 DWConvMicrokernelTester()
171 .cr(8)
172 .kr(9)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800176 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700177 }
178 }
179
180 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, input_offset) {
181 TEST_REQUIRES_ARM_NEON;
182 for (uint32_t channels = 16; channels < 128; channels += 24) {
183 DWConvMicrokernelTester()
184 .cr(8)
185 .kr(9)
186 .channels(channels)
187 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -0800188 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700189 }
190 }
191
192 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, zero) {
193 TEST_REQUIRES_ARM_NEON;
194 for (uint32_t mz = 0; mz < 9; mz++) {
195 for (uint32_t channels = 16; channels < 128; channels += 24) {
196 DWConvMicrokernelTester()
197 .cr(8)
198 .kr(9)
199 .channels(channels)
200 .input_offset(176)
201 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800202 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700203 }
204 }
205 }
206#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
207
208
209#if XNN_ARCH_ARM || XNN_ARCH_ARM64
210 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_eq_16) {
211 TEST_REQUIRES_ARM_NEON;
212 DWConvMicrokernelTester()
213 .cr(16)
214 .kr(9)
215 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800216 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700217 }
218
219 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16) {
220 TEST_REQUIRES_ARM_NEON;
221 for (uint32_t channels = 32; channels < 256; channels += 48) {
222 DWConvMicrokernelTester()
223 .cr(16)
224 .kr(9)
225 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800226 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700227 }
228 }
229
230 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
231 TEST_REQUIRES_ARM_NEON;
232 for (uint32_t channels = 32; channels < 256; channels += 48) {
233 DWConvMicrokernelTester()
234 .cr(16)
235 .kr(9)
236 .channels(channels)
237 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800238 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700239 }
240 }
241
242 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
243 TEST_REQUIRES_ARM_NEON;
244 for (uint32_t channels = 32; channels < 256; channels += 48) {
245 DWConvMicrokernelTester()
246 .cr(16)
247 .kr(9)
248 .channels(channels)
249 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800250 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700251 }
252 }
253
254 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_lt_16) {
255 TEST_REQUIRES_ARM_NEON;
256 for (uint32_t channels = 1; channels < 16; channels++) {
257 DWConvMicrokernelTester()
258 .cr(16)
259 .kr(9)
260 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800261 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700262 }
263 }
264
265 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16) {
266 TEST_REQUIRES_ARM_NEON;
267 for (uint32_t channels = 17; channels < 32; channels++) {
268 DWConvMicrokernelTester()
269 .cr(16)
270 .kr(9)
271 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800272 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700273 }
274 }
275
276 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
277 TEST_REQUIRES_ARM_NEON;
278 for (uint32_t channels = 17; channels < 32; channels++) {
279 DWConvMicrokernelTester()
280 .cr(16)
281 .kr(9)
282 .channels(channels)
283 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800284 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700285 }
286 }
287
288 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
289 TEST_REQUIRES_ARM_NEON;
290 for (uint32_t channels = 17; channels < 32; channels++) {
291 DWConvMicrokernelTester()
292 .cr(16)
293 .kr(9)
294 .channels(channels)
295 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800296 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700297 }
298 }
299
300 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel) {
301 TEST_REQUIRES_ARM_NEON;
302 for (size_t channels = 1; channels <= 80; channels += 15) {
303 DWConvMicrokernelTester()
304 .cr(16)
305 .kr(9)
306 .channels(channels)
307 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800308 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700309 }
310 }
311
312 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_step) {
313 TEST_REQUIRES_ARM_NEON;
314 for (size_t channels = 1; channels <= 80; channels += 15) {
315 for (size_t step = 2; step <= 9; step++) {
316 DWConvMicrokernelTester()
317 .cr(16)
318 .kr(9)
319 .channels(channels)
320 .width(3)
321 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800322 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700323 }
324 }
325 }
326
327 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
328 TEST_REQUIRES_ARM_NEON;
329 for (size_t channels = 1; channels <= 80; channels += 15) {
330 DWConvMicrokernelTester()
331 .cr(16)
332 .kr(9)
333 .channels(16)
334 .width(5)
335 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800336 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700337 }
338 }
339
340 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmin) {
341 TEST_REQUIRES_ARM_NEON;
342 for (size_t channels = 1; channels <= 80; channels += 15) {
343 DWConvMicrokernelTester()
344 .cr(16)
345 .kr(9)
346 .channels(channels)
347 .width(3)
348 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800349 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700350 }
351 }
352
353 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmax) {
354 TEST_REQUIRES_ARM_NEON;
355 for (size_t channels = 1; channels <= 80; channels += 15) {
356 DWConvMicrokernelTester()
357 .cr(16)
358 .kr(9)
359 .channels(channels)
360 .width(3)
361 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800362 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700363 }
364 }
365
366 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, input_offset) {
367 TEST_REQUIRES_ARM_NEON;
368 for (uint32_t channels = 32; channels < 256; channels += 48) {
369 DWConvMicrokernelTester()
370 .cr(16)
371 .kr(9)
372 .channels(channels)
373 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -0800374 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700375 }
376 }
377
378 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, zero) {
379 TEST_REQUIRES_ARM_NEON;
380 for (uint32_t mz = 0; mz < 9; mz++) {
381 for (uint32_t channels = 32; channels < 256; channels += 48) {
382 DWConvMicrokernelTester()
383 .cr(16)
384 .kr(9)
385 .channels(channels)
386 .input_offset(304)
387 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800388 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700389 }
390 }
391 }
392#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
393
394
395#if XNN_ARCH_ARM || XNN_ARCH_ARM64
396 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_eq_24) {
397 TEST_REQUIRES_ARM_NEON;
398 DWConvMicrokernelTester()
399 .cr(24)
400 .kr(9)
401 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -0800402 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700403 }
404
405 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24) {
406 TEST_REQUIRES_ARM_NEON;
407 for (uint32_t channels = 48; channels < 384; channels += 72) {
408 DWConvMicrokernelTester()
409 .cr(24)
410 .kr(9)
411 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800412 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700413 }
414 }
415
416 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
417 TEST_REQUIRES_ARM_NEON;
418 for (uint32_t channels = 48; channels < 384; channels += 72) {
419 DWConvMicrokernelTester()
420 .cr(24)
421 .kr(9)
422 .channels(channels)
423 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800424 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700425 }
426 }
427
428 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
429 TEST_REQUIRES_ARM_NEON;
430 for (uint32_t channels = 48; channels < 384; channels += 72) {
431 DWConvMicrokernelTester()
432 .cr(24)
433 .kr(9)
434 .channels(channels)
435 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800436 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700437 }
438 }
439
440 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_lt_24) {
441 TEST_REQUIRES_ARM_NEON;
442 for (uint32_t channels = 1; channels < 24; channels++) {
443 DWConvMicrokernelTester()
444 .cr(24)
445 .kr(9)
446 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800447 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700448 }
449 }
450
451 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24) {
452 TEST_REQUIRES_ARM_NEON;
453 for (uint32_t channels = 25; channels < 48; channels++) {
454 DWConvMicrokernelTester()
455 .cr(24)
456 .kr(9)
457 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800458 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700459 }
460 }
461
462 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
463 TEST_REQUIRES_ARM_NEON;
464 for (uint32_t channels = 25; channels < 48; channels++) {
465 DWConvMicrokernelTester()
466 .cr(24)
467 .kr(9)
468 .channels(channels)
469 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800470 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700471 }
472 }
473
474 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
475 TEST_REQUIRES_ARM_NEON;
476 for (uint32_t channels = 25; channels < 48; channels++) {
477 DWConvMicrokernelTester()
478 .cr(24)
479 .kr(9)
480 .channels(channels)
481 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800482 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700483 }
484 }
485
486 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel) {
487 TEST_REQUIRES_ARM_NEON;
488 for (size_t channels = 1; channels <= 120; channels += 23) {
489 DWConvMicrokernelTester()
490 .cr(24)
491 .kr(9)
492 .channels(channels)
493 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800494 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700495 }
496 }
497
498 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_step) {
499 TEST_REQUIRES_ARM_NEON;
500 for (size_t channels = 1; channels <= 120; channels += 23) {
501 for (size_t step = 2; step <= 9; step++) {
502 DWConvMicrokernelTester()
503 .cr(24)
504 .kr(9)
505 .channels(channels)
506 .width(3)
507 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800508 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700509 }
510 }
511 }
512
513 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
514 TEST_REQUIRES_ARM_NEON;
515 for (size_t channels = 1; channels <= 120; channels += 23) {
516 DWConvMicrokernelTester()
517 .cr(24)
518 .kr(9)
519 .channels(24)
520 .width(5)
521 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -0800522 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700523 }
524 }
525
526 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmin) {
527 TEST_REQUIRES_ARM_NEON;
528 for (size_t channels = 1; channels <= 120; channels += 23) {
529 DWConvMicrokernelTester()
530 .cr(24)
531 .kr(9)
532 .channels(channels)
533 .width(3)
534 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800535 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700536 }
537 }
538
539 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmax) {
540 TEST_REQUIRES_ARM_NEON;
541 for (size_t channels = 1; channels <= 120; channels += 23) {
542 DWConvMicrokernelTester()
543 .cr(24)
544 .kr(9)
545 .channels(channels)
546 .width(3)
547 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800548 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700549 }
550 }
551
552 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, input_offset) {
553 TEST_REQUIRES_ARM_NEON;
554 for (uint32_t channels = 48; channels < 384; channels += 72) {
555 DWConvMicrokernelTester()
556 .cr(24)
557 .kr(9)
558 .channels(channels)
559 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -0800560 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700561 }
562 }
563
564 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, zero) {
565 TEST_REQUIRES_ARM_NEON;
566 for (uint32_t mz = 0; mz < 9; mz++) {
567 for (uint32_t channels = 48; channels < 384; channels += 72) {
568 DWConvMicrokernelTester()
569 .cr(24)
570 .kr(9)
571 .channels(channels)
572 .input_offset(464)
573 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800574 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700575 }
576 }
577 }
578#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
579
580
581#if XNN_ARCH_ARM || XNN_ARCH_ARM64
582 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_eq_32) {
583 TEST_REQUIRES_ARM_NEON;
584 DWConvMicrokernelTester()
585 .cr(32)
586 .kr(9)
587 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -0800588 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700589 }
590
591 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32) {
592 TEST_REQUIRES_ARM_NEON;
593 for (uint32_t channels = 64; channels < 512; channels += 96) {
594 DWConvMicrokernelTester()
595 .cr(32)
596 .kr(9)
597 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800598 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700599 }
600 }
601
602 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
603 TEST_REQUIRES_ARM_NEON;
604 for (uint32_t channels = 64; channels < 512; channels += 96) {
605 DWConvMicrokernelTester()
606 .cr(32)
607 .kr(9)
608 .channels(channels)
609 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800610 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700611 }
612 }
613
614 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
615 TEST_REQUIRES_ARM_NEON;
616 for (uint32_t channels = 64; channels < 512; channels += 96) {
617 DWConvMicrokernelTester()
618 .cr(32)
619 .kr(9)
620 .channels(channels)
621 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800622 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700623 }
624 }
625
626 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_lt_32) {
627 TEST_REQUIRES_ARM_NEON;
628 for (uint32_t channels = 1; channels < 32; channels++) {
629 DWConvMicrokernelTester()
630 .cr(32)
631 .kr(9)
632 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800633 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700634 }
635 }
636
637 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32) {
638 TEST_REQUIRES_ARM_NEON;
639 for (uint32_t channels = 33; channels < 64; channels++) {
640 DWConvMicrokernelTester()
641 .cr(32)
642 .kr(9)
643 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800644 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700645 }
646 }
647
648 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
649 TEST_REQUIRES_ARM_NEON;
650 for (uint32_t channels = 33; channels < 64; channels++) {
651 DWConvMicrokernelTester()
652 .cr(32)
653 .kr(9)
654 .channels(channels)
655 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800656 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700657 }
658 }
659
660 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
661 TEST_REQUIRES_ARM_NEON;
662 for (uint32_t channels = 33; channels < 64; channels++) {
663 DWConvMicrokernelTester()
664 .cr(32)
665 .kr(9)
666 .channels(channels)
667 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800668 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700669 }
670 }
671
672 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel) {
673 TEST_REQUIRES_ARM_NEON;
674 for (size_t channels = 1; channels <= 160; channels += 31) {
675 DWConvMicrokernelTester()
676 .cr(32)
677 .kr(9)
678 .channels(channels)
679 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800680 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700681 }
682 }
683
684 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_step) {
685 TEST_REQUIRES_ARM_NEON;
686 for (size_t channels = 1; channels <= 160; channels += 31) {
687 for (size_t step = 2; step <= 9; step++) {
688 DWConvMicrokernelTester()
689 .cr(32)
690 .kr(9)
691 .channels(channels)
692 .width(3)
693 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800694 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700695 }
696 }
697 }
698
699 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
700 TEST_REQUIRES_ARM_NEON;
701 for (size_t channels = 1; channels <= 160; channels += 31) {
702 DWConvMicrokernelTester()
703 .cr(32)
704 .kr(9)
705 .channels(32)
706 .width(5)
707 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -0800708 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700709 }
710 }
711
712 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmin) {
713 TEST_REQUIRES_ARM_NEON;
714 for (size_t channels = 1; channels <= 160; channels += 31) {
715 DWConvMicrokernelTester()
716 .cr(32)
717 .kr(9)
718 .channels(channels)
719 .width(3)
720 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800721 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700722 }
723 }
724
725 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmax) {
726 TEST_REQUIRES_ARM_NEON;
727 for (size_t channels = 1; channels <= 160; channels += 31) {
728 DWConvMicrokernelTester()
729 .cr(32)
730 .kr(9)
731 .channels(channels)
732 .width(3)
733 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800734 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700735 }
736 }
737
738 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, input_offset) {
739 TEST_REQUIRES_ARM_NEON;
740 for (uint32_t channels = 64; channels < 512; channels += 96) {
741 DWConvMicrokernelTester()
742 .cr(32)
743 .kr(9)
744 .channels(channels)
745 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -0800746 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700747 }
748 }
749
750 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, zero) {
751 TEST_REQUIRES_ARM_NEON;
752 for (uint32_t mz = 0; mz < 9; mz++) {
753 for (uint32_t channels = 64; channels < 512; channels += 96) {
754 DWConvMicrokernelTester()
755 .cr(32)
756 .kr(9)
757 .channels(channels)
758 .input_offset(592)
759 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800760 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700761 }
762 }
763 }
764#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
765
766
767#if XNN_ARCH_ARM || XNN_ARCH_ARM64
768 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_eq_8) {
769 TEST_REQUIRES_ARM_NEON_V8;
770 DWConvMicrokernelTester()
771 .cr(8)
772 .kr(9)
773 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -0800774 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700775 }
776
777 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8) {
778 TEST_REQUIRES_ARM_NEON_V8;
779 for (uint32_t channels = 16; channels < 128; channels += 24) {
780 DWConvMicrokernelTester()
781 .cr(8)
782 .kr(9)
783 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800784 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700785 }
786 }
787
788 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmin) {
789 TEST_REQUIRES_ARM_NEON_V8;
790 for (uint32_t channels = 16; channels < 128; channels += 24) {
791 DWConvMicrokernelTester()
792 .cr(8)
793 .kr(9)
794 .channels(channels)
795 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800796 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700797 }
798 }
799
800 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmax) {
801 TEST_REQUIRES_ARM_NEON_V8;
802 for (uint32_t channels = 16; channels < 128; channels += 24) {
803 DWConvMicrokernelTester()
804 .cr(8)
805 .kr(9)
806 .channels(channels)
807 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800808 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700809 }
810 }
811
812 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_lt_8) {
813 TEST_REQUIRES_ARM_NEON_V8;
814 for (uint32_t channels = 1; channels < 8; channels++) {
815 DWConvMicrokernelTester()
816 .cr(8)
817 .kr(9)
818 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800819 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700820 }
821 }
822
823 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8) {
824 TEST_REQUIRES_ARM_NEON_V8;
825 for (uint32_t channels = 9; channels < 16; channels++) {
826 DWConvMicrokernelTester()
827 .cr(8)
828 .kr(9)
829 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800830 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700831 }
832 }
833
834 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmin) {
835 TEST_REQUIRES_ARM_NEON_V8;
836 for (uint32_t channels = 9; channels < 16; channels++) {
837 DWConvMicrokernelTester()
838 .cr(8)
839 .kr(9)
840 .channels(channels)
841 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800842 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700843 }
844 }
845
846 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmax) {
847 TEST_REQUIRES_ARM_NEON_V8;
848 for (uint32_t channels = 9; channels < 16; channels++) {
849 DWConvMicrokernelTester()
850 .cr(8)
851 .kr(9)
852 .channels(channels)
853 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800854 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700855 }
856 }
857
858 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel) {
859 TEST_REQUIRES_ARM_NEON_V8;
860 for (size_t channels = 1; channels <= 40; channels += 7) {
861 DWConvMicrokernelTester()
862 .cr(8)
863 .kr(9)
864 .channels(channels)
865 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800866 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700867 }
868 }
869
870 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_step) {
871 TEST_REQUIRES_ARM_NEON_V8;
872 for (size_t channels = 1; channels <= 40; channels += 7) {
873 for (size_t step = 2; step <= 9; step++) {
874 DWConvMicrokernelTester()
875 .cr(8)
876 .kr(9)
877 .channels(channels)
878 .width(3)
879 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800880 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700881 }
882 }
883 }
884
885 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_output_stride) {
886 TEST_REQUIRES_ARM_NEON_V8;
887 for (size_t channels = 1; channels <= 40; channels += 7) {
888 DWConvMicrokernelTester()
889 .cr(8)
890 .kr(9)
891 .channels(8)
892 .width(5)
893 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -0800894 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700895 }
896 }
897
898 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmin) {
899 TEST_REQUIRES_ARM_NEON_V8;
900 for (size_t channels = 1; channels <= 40; channels += 7) {
901 DWConvMicrokernelTester()
902 .cr(8)
903 .kr(9)
904 .channels(channels)
905 .width(3)
906 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800907 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700908 }
909 }
910
911 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmax) {
912 TEST_REQUIRES_ARM_NEON_V8;
913 for (size_t channels = 1; channels <= 40; channels += 7) {
914 DWConvMicrokernelTester()
915 .cr(8)
916 .kr(9)
917 .channels(channels)
918 .width(3)
919 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800920 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700921 }
922 }
923
924 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, input_offset) {
925 TEST_REQUIRES_ARM_NEON_V8;
926 for (uint32_t channels = 16; channels < 128; channels += 24) {
927 DWConvMicrokernelTester()
928 .cr(8)
929 .kr(9)
930 .channels(channels)
931 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -0800932 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700933 }
934 }
935
936 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, zero) {
937 TEST_REQUIRES_ARM_NEON_V8;
938 for (uint32_t mz = 0; mz < 9; mz++) {
939 for (uint32_t channels = 16; channels < 128; channels += 24) {
940 DWConvMicrokernelTester()
941 .cr(8)
942 .kr(9)
943 .channels(channels)
944 .input_offset(176)
945 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800946 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700947 }
948 }
949 }
950#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
951
952
953#if XNN_ARCH_ARM || XNN_ARCH_ARM64
954 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_eq_16) {
955 TEST_REQUIRES_ARM_NEON_V8;
956 DWConvMicrokernelTester()
957 .cr(16)
958 .kr(9)
959 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800960 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700961 }
962
963 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16) {
964 TEST_REQUIRES_ARM_NEON_V8;
965 for (uint32_t channels = 32; channels < 256; channels += 48) {
966 DWConvMicrokernelTester()
967 .cr(16)
968 .kr(9)
969 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800970 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700971 }
972 }
973
974 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmin) {
975 TEST_REQUIRES_ARM_NEON_V8;
976 for (uint32_t channels = 32; channels < 256; channels += 48) {
977 DWConvMicrokernelTester()
978 .cr(16)
979 .kr(9)
980 .channels(channels)
981 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800982 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700983 }
984 }
985
986 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmax) {
987 TEST_REQUIRES_ARM_NEON_V8;
988 for (uint32_t channels = 32; channels < 256; channels += 48) {
989 DWConvMicrokernelTester()
990 .cr(16)
991 .kr(9)
992 .channels(channels)
993 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800994 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -0700995 }
996 }
997
998 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_lt_16) {
999 TEST_REQUIRES_ARM_NEON_V8;
1000 for (uint32_t channels = 1; channels < 16; channels++) {
1001 DWConvMicrokernelTester()
1002 .cr(16)
1003 .kr(9)
1004 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001005 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001006 }
1007 }
1008
1009 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16) {
1010 TEST_REQUIRES_ARM_NEON_V8;
1011 for (uint32_t channels = 17; channels < 32; channels++) {
1012 DWConvMicrokernelTester()
1013 .cr(16)
1014 .kr(9)
1015 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001016 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001017 }
1018 }
1019
1020 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmin) {
1021 TEST_REQUIRES_ARM_NEON_V8;
1022 for (uint32_t channels = 17; channels < 32; channels++) {
1023 DWConvMicrokernelTester()
1024 .cr(16)
1025 .kr(9)
1026 .channels(channels)
1027 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001028 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001029 }
1030 }
1031
1032 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmax) {
1033 TEST_REQUIRES_ARM_NEON_V8;
1034 for (uint32_t channels = 17; channels < 32; channels++) {
1035 DWConvMicrokernelTester()
1036 .cr(16)
1037 .kr(9)
1038 .channels(channels)
1039 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001040 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001041 }
1042 }
1043
1044 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel) {
1045 TEST_REQUIRES_ARM_NEON_V8;
1046 for (size_t channels = 1; channels <= 80; channels += 15) {
1047 DWConvMicrokernelTester()
1048 .cr(16)
1049 .kr(9)
1050 .channels(channels)
1051 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001052 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001053 }
1054 }
1055
1056 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_step) {
1057 TEST_REQUIRES_ARM_NEON_V8;
1058 for (size_t channels = 1; channels <= 80; channels += 15) {
1059 for (size_t step = 2; step <= 9; step++) {
1060 DWConvMicrokernelTester()
1061 .cr(16)
1062 .kr(9)
1063 .channels(channels)
1064 .width(3)
1065 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001066 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001067 }
1068 }
1069 }
1070
1071 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_output_stride) {
1072 TEST_REQUIRES_ARM_NEON_V8;
1073 for (size_t channels = 1; channels <= 80; channels += 15) {
1074 DWConvMicrokernelTester()
1075 .cr(16)
1076 .kr(9)
1077 .channels(16)
1078 .width(5)
1079 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001080 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001081 }
1082 }
1083
1084 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmin) {
1085 TEST_REQUIRES_ARM_NEON_V8;
1086 for (size_t channels = 1; channels <= 80; channels += 15) {
1087 DWConvMicrokernelTester()
1088 .cr(16)
1089 .kr(9)
1090 .channels(channels)
1091 .width(3)
1092 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001093 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001094 }
1095 }
1096
1097 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmax) {
1098 TEST_REQUIRES_ARM_NEON_V8;
1099 for (size_t channels = 1; channels <= 80; channels += 15) {
1100 DWConvMicrokernelTester()
1101 .cr(16)
1102 .kr(9)
1103 .channels(channels)
1104 .width(3)
1105 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001106 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001107 }
1108 }
1109
1110 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, input_offset) {
1111 TEST_REQUIRES_ARM_NEON_V8;
1112 for (uint32_t channels = 32; channels < 256; channels += 48) {
1113 DWConvMicrokernelTester()
1114 .cr(16)
1115 .kr(9)
1116 .channels(channels)
1117 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08001118 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001119 }
1120 }
1121
1122 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, zero) {
1123 TEST_REQUIRES_ARM_NEON_V8;
1124 for (uint32_t mz = 0; mz < 9; mz++) {
1125 for (uint32_t channels = 32; channels < 256; channels += 48) {
1126 DWConvMicrokernelTester()
1127 .cr(16)
1128 .kr(9)
1129 .channels(channels)
1130 .input_offset(304)
1131 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001132 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001133 }
1134 }
1135 }
1136#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1137
1138
1139#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1140 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_eq_24) {
1141 TEST_REQUIRES_ARM_NEON_V8;
1142 DWConvMicrokernelTester()
1143 .cr(24)
1144 .kr(9)
1145 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08001146 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001147 }
1148
1149 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24) {
1150 TEST_REQUIRES_ARM_NEON_V8;
1151 for (uint32_t channels = 48; channels < 384; channels += 72) {
1152 DWConvMicrokernelTester()
1153 .cr(24)
1154 .kr(9)
1155 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001156 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001157 }
1158 }
1159
1160 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmin) {
1161 TEST_REQUIRES_ARM_NEON_V8;
1162 for (uint32_t channels = 48; channels < 384; channels += 72) {
1163 DWConvMicrokernelTester()
1164 .cr(24)
1165 .kr(9)
1166 .channels(channels)
1167 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001168 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001169 }
1170 }
1171
1172 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmax) {
1173 TEST_REQUIRES_ARM_NEON_V8;
1174 for (uint32_t channels = 48; channels < 384; channels += 72) {
1175 DWConvMicrokernelTester()
1176 .cr(24)
1177 .kr(9)
1178 .channels(channels)
1179 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001180 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001181 }
1182 }
1183
1184 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_lt_24) {
1185 TEST_REQUIRES_ARM_NEON_V8;
1186 for (uint32_t channels = 1; channels < 24; channels++) {
1187 DWConvMicrokernelTester()
1188 .cr(24)
1189 .kr(9)
1190 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001191 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001192 }
1193 }
1194
1195 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24) {
1196 TEST_REQUIRES_ARM_NEON_V8;
1197 for (uint32_t channels = 25; channels < 48; channels++) {
1198 DWConvMicrokernelTester()
1199 .cr(24)
1200 .kr(9)
1201 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001202 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001203 }
1204 }
1205
1206 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmin) {
1207 TEST_REQUIRES_ARM_NEON_V8;
1208 for (uint32_t channels = 25; channels < 48; channels++) {
1209 DWConvMicrokernelTester()
1210 .cr(24)
1211 .kr(9)
1212 .channels(channels)
1213 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001214 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001215 }
1216 }
1217
1218 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmax) {
1219 TEST_REQUIRES_ARM_NEON_V8;
1220 for (uint32_t channels = 25; channels < 48; channels++) {
1221 DWConvMicrokernelTester()
1222 .cr(24)
1223 .kr(9)
1224 .channels(channels)
1225 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001226 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001227 }
1228 }
1229
1230 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel) {
1231 TEST_REQUIRES_ARM_NEON_V8;
1232 for (size_t channels = 1; channels <= 120; channels += 23) {
1233 DWConvMicrokernelTester()
1234 .cr(24)
1235 .kr(9)
1236 .channels(channels)
1237 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001238 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001239 }
1240 }
1241
1242 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_step) {
1243 TEST_REQUIRES_ARM_NEON_V8;
1244 for (size_t channels = 1; channels <= 120; channels += 23) {
1245 for (size_t step = 2; step <= 9; step++) {
1246 DWConvMicrokernelTester()
1247 .cr(24)
1248 .kr(9)
1249 .channels(channels)
1250 .width(3)
1251 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001252 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001253 }
1254 }
1255 }
1256
1257 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_output_stride) {
1258 TEST_REQUIRES_ARM_NEON_V8;
1259 for (size_t channels = 1; channels <= 120; channels += 23) {
1260 DWConvMicrokernelTester()
1261 .cr(24)
1262 .kr(9)
1263 .channels(24)
1264 .width(5)
1265 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08001266 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001267 }
1268 }
1269
1270 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmin) {
1271 TEST_REQUIRES_ARM_NEON_V8;
1272 for (size_t channels = 1; channels <= 120; channels += 23) {
1273 DWConvMicrokernelTester()
1274 .cr(24)
1275 .kr(9)
1276 .channels(channels)
1277 .width(3)
1278 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001279 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001280 }
1281 }
1282
1283 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmax) {
1284 TEST_REQUIRES_ARM_NEON_V8;
1285 for (size_t channels = 1; channels <= 120; channels += 23) {
1286 DWConvMicrokernelTester()
1287 .cr(24)
1288 .kr(9)
1289 .channels(channels)
1290 .width(3)
1291 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001292 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001293 }
1294 }
1295
1296 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, input_offset) {
1297 TEST_REQUIRES_ARM_NEON_V8;
1298 for (uint32_t channels = 48; channels < 384; channels += 72) {
1299 DWConvMicrokernelTester()
1300 .cr(24)
1301 .kr(9)
1302 .channels(channels)
1303 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08001304 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001305 }
1306 }
1307
1308 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, zero) {
1309 TEST_REQUIRES_ARM_NEON_V8;
1310 for (uint32_t mz = 0; mz < 9; mz++) {
1311 for (uint32_t channels = 48; channels < 384; channels += 72) {
1312 DWConvMicrokernelTester()
1313 .cr(24)
1314 .kr(9)
1315 .channels(channels)
1316 .input_offset(464)
1317 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001318 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001319 }
1320 }
1321 }
1322#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1323
1324
1325#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1326 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_eq_32) {
1327 TEST_REQUIRES_ARM_NEON_V8;
1328 DWConvMicrokernelTester()
1329 .cr(32)
1330 .kr(9)
1331 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08001332 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001333 }
1334
1335 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32) {
1336 TEST_REQUIRES_ARM_NEON_V8;
1337 for (uint32_t channels = 64; channels < 512; channels += 96) {
1338 DWConvMicrokernelTester()
1339 .cr(32)
1340 .kr(9)
1341 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001342 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001343 }
1344 }
1345
1346 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmin) {
1347 TEST_REQUIRES_ARM_NEON_V8;
1348 for (uint32_t channels = 64; channels < 512; channels += 96) {
1349 DWConvMicrokernelTester()
1350 .cr(32)
1351 .kr(9)
1352 .channels(channels)
1353 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001354 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001355 }
1356 }
1357
1358 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmax) {
1359 TEST_REQUIRES_ARM_NEON_V8;
1360 for (uint32_t channels = 64; channels < 512; channels += 96) {
1361 DWConvMicrokernelTester()
1362 .cr(32)
1363 .kr(9)
1364 .channels(channels)
1365 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001366 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001367 }
1368 }
1369
1370 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_lt_32) {
1371 TEST_REQUIRES_ARM_NEON_V8;
1372 for (uint32_t channels = 1; channels < 32; channels++) {
1373 DWConvMicrokernelTester()
1374 .cr(32)
1375 .kr(9)
1376 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001377 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001378 }
1379 }
1380
1381 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32) {
1382 TEST_REQUIRES_ARM_NEON_V8;
1383 for (uint32_t channels = 33; channels < 64; channels++) {
1384 DWConvMicrokernelTester()
1385 .cr(32)
1386 .kr(9)
1387 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001388 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001389 }
1390 }
1391
1392 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmin) {
1393 TEST_REQUIRES_ARM_NEON_V8;
1394 for (uint32_t channels = 33; channels < 64; channels++) {
1395 DWConvMicrokernelTester()
1396 .cr(32)
1397 .kr(9)
1398 .channels(channels)
1399 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001400 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001401 }
1402 }
1403
1404 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmax) {
1405 TEST_REQUIRES_ARM_NEON_V8;
1406 for (uint32_t channels = 33; channels < 64; channels++) {
1407 DWConvMicrokernelTester()
1408 .cr(32)
1409 .kr(9)
1410 .channels(channels)
1411 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001412 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001413 }
1414 }
1415
1416 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel) {
1417 TEST_REQUIRES_ARM_NEON_V8;
1418 for (size_t channels = 1; channels <= 160; channels += 31) {
1419 DWConvMicrokernelTester()
1420 .cr(32)
1421 .kr(9)
1422 .channels(channels)
1423 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001424 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001425 }
1426 }
1427
1428 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_step) {
1429 TEST_REQUIRES_ARM_NEON_V8;
1430 for (size_t channels = 1; channels <= 160; channels += 31) {
1431 for (size_t step = 2; step <= 9; step++) {
1432 DWConvMicrokernelTester()
1433 .cr(32)
1434 .kr(9)
1435 .channels(channels)
1436 .width(3)
1437 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001438 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001439 }
1440 }
1441 }
1442
1443 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_output_stride) {
1444 TEST_REQUIRES_ARM_NEON_V8;
1445 for (size_t channels = 1; channels <= 160; channels += 31) {
1446 DWConvMicrokernelTester()
1447 .cr(32)
1448 .kr(9)
1449 .channels(32)
1450 .width(5)
1451 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08001452 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001453 }
1454 }
1455
1456 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmin) {
1457 TEST_REQUIRES_ARM_NEON_V8;
1458 for (size_t channels = 1; channels <= 160; channels += 31) {
1459 DWConvMicrokernelTester()
1460 .cr(32)
1461 .kr(9)
1462 .channels(channels)
1463 .width(3)
1464 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001465 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001466 }
1467 }
1468
1469 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmax) {
1470 TEST_REQUIRES_ARM_NEON_V8;
1471 for (size_t channels = 1; channels <= 160; channels += 31) {
1472 DWConvMicrokernelTester()
1473 .cr(32)
1474 .kr(9)
1475 .channels(channels)
1476 .width(3)
1477 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001478 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001479 }
1480 }
1481
1482 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, input_offset) {
1483 TEST_REQUIRES_ARM_NEON_V8;
1484 for (uint32_t channels = 64; channels < 512; channels += 96) {
1485 DWConvMicrokernelTester()
1486 .cr(32)
1487 .kr(9)
1488 .channels(channels)
1489 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08001490 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001491 }
1492 }
1493
1494 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, zero) {
1495 TEST_REQUIRES_ARM_NEON_V8;
1496 for (uint32_t mz = 0; mz < 9; mz++) {
1497 for (uint32_t channels = 64; channels < 512; channels += 96) {
1498 DWConvMicrokernelTester()
1499 .cr(32)
1500 .kr(9)
1501 .channels(channels)
1502 .input_offset(592)
1503 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001504 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -07001505 }
1506 }
1507 }
1508#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1509
1510
Marat Dukhan9b474cf2021-05-25 16:37:48 -07001511#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancaf48312021-06-01 20:20:58 -07001512 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_eq_8) {
1513 TEST_REQUIRES_X86_SSE2;
1514 DWConvMicrokernelTester()
1515 .cr(8)
1516 .kr(9)
1517 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001518 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001519 }
1520
1521 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8) {
1522 TEST_REQUIRES_X86_SSE2;
1523 for (uint32_t channels = 16; channels < 128; channels += 24) {
1524 DWConvMicrokernelTester()
1525 .cr(8)
1526 .kr(9)
1527 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001528 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001529 }
1530 }
1531
1532 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
1533 TEST_REQUIRES_X86_SSE2;
1534 for (uint32_t channels = 16; channels < 128; channels += 24) {
1535 DWConvMicrokernelTester()
1536 .cr(8)
1537 .kr(9)
1538 .channels(channels)
1539 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001540 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001541 }
1542 }
1543
1544 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
1545 TEST_REQUIRES_X86_SSE2;
1546 for (uint32_t channels = 16; channels < 128; channels += 24) {
1547 DWConvMicrokernelTester()
1548 .cr(8)
1549 .kr(9)
1550 .channels(channels)
1551 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001552 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001553 }
1554 }
1555
1556 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_lt_8) {
1557 TEST_REQUIRES_X86_SSE2;
1558 for (uint32_t channels = 1; channels < 8; channels++) {
1559 DWConvMicrokernelTester()
1560 .cr(8)
1561 .kr(9)
1562 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001563 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001564 }
1565 }
1566
1567 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8) {
1568 TEST_REQUIRES_X86_SSE2;
1569 for (uint32_t channels = 9; channels < 16; channels++) {
1570 DWConvMicrokernelTester()
1571 .cr(8)
1572 .kr(9)
1573 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001574 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001575 }
1576 }
1577
1578 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
1579 TEST_REQUIRES_X86_SSE2;
1580 for (uint32_t channels = 9; channels < 16; channels++) {
1581 DWConvMicrokernelTester()
1582 .cr(8)
1583 .kr(9)
1584 .channels(channels)
1585 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001586 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001587 }
1588 }
1589
1590 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
1591 TEST_REQUIRES_X86_SSE2;
1592 for (uint32_t channels = 9; channels < 16; channels++) {
1593 DWConvMicrokernelTester()
1594 .cr(8)
1595 .kr(9)
1596 .channels(channels)
1597 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001598 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001599 }
1600 }
1601
1602 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel) {
1603 TEST_REQUIRES_X86_SSE2;
1604 for (size_t channels = 1; channels <= 40; channels += 7) {
1605 DWConvMicrokernelTester()
1606 .cr(8)
1607 .kr(9)
1608 .channels(channels)
1609 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001610 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001611 }
1612 }
1613
1614 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_step) {
1615 TEST_REQUIRES_X86_SSE2;
1616 for (size_t channels = 1; channels <= 40; channels += 7) {
1617 for (size_t step = 2; step <= 9; step++) {
1618 DWConvMicrokernelTester()
1619 .cr(8)
1620 .kr(9)
1621 .channels(channels)
1622 .width(3)
1623 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001624 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001625 }
1626 }
1627 }
1628
1629 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
1630 TEST_REQUIRES_X86_SSE2;
1631 for (size_t channels = 1; channels <= 40; channels += 7) {
1632 DWConvMicrokernelTester()
1633 .cr(8)
1634 .kr(9)
1635 .channels(8)
1636 .width(5)
1637 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001638 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001639 }
1640 }
1641
1642 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
1643 TEST_REQUIRES_X86_SSE2;
1644 for (size_t channels = 1; channels <= 40; channels += 7) {
1645 DWConvMicrokernelTester()
1646 .cr(8)
1647 .kr(9)
1648 .channels(channels)
1649 .width(3)
1650 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001651 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001652 }
1653 }
1654
1655 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
1656 TEST_REQUIRES_X86_SSE2;
1657 for (size_t channels = 1; channels <= 40; channels += 7) {
1658 DWConvMicrokernelTester()
1659 .cr(8)
1660 .kr(9)
1661 .channels(channels)
1662 .width(3)
1663 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001664 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001665 }
1666 }
1667
1668 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, input_offset) {
1669 TEST_REQUIRES_X86_SSE2;
1670 for (uint32_t channels = 16; channels < 128; channels += 24) {
1671 DWConvMicrokernelTester()
1672 .cr(8)
1673 .kr(9)
1674 .channels(channels)
1675 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08001676 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001677 }
1678 }
1679
1680 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, zero) {
1681 TEST_REQUIRES_X86_SSE2;
1682 for (uint32_t mz = 0; mz < 9; mz++) {
1683 for (uint32_t channels = 16; channels < 128; channels += 24) {
1684 DWConvMicrokernelTester()
1685 .cr(8)
1686 .kr(9)
1687 .channels(channels)
1688 .input_offset(176)
1689 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001690 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001691 }
1692 }
1693 }
1694#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1695
1696
1697#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1698 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_eq_16) {
1699 TEST_REQUIRES_X86_SSE2;
1700 DWConvMicrokernelTester()
1701 .cr(16)
1702 .kr(9)
1703 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001704 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001705 }
1706
1707 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16) {
1708 TEST_REQUIRES_X86_SSE2;
1709 for (uint32_t channels = 32; channels < 256; channels += 48) {
1710 DWConvMicrokernelTester()
1711 .cr(16)
1712 .kr(9)
1713 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001714 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001715 }
1716 }
1717
1718 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
1719 TEST_REQUIRES_X86_SSE2;
1720 for (uint32_t channels = 32; channels < 256; channels += 48) {
1721 DWConvMicrokernelTester()
1722 .cr(16)
1723 .kr(9)
1724 .channels(channels)
1725 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001726 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001727 }
1728 }
1729
1730 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
1731 TEST_REQUIRES_X86_SSE2;
1732 for (uint32_t channels = 32; channels < 256; channels += 48) {
1733 DWConvMicrokernelTester()
1734 .cr(16)
1735 .kr(9)
1736 .channels(channels)
1737 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001738 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001739 }
1740 }
1741
1742 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_lt_16) {
1743 TEST_REQUIRES_X86_SSE2;
1744 for (uint32_t channels = 1; channels < 16; channels++) {
1745 DWConvMicrokernelTester()
1746 .cr(16)
1747 .kr(9)
1748 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001749 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001750 }
1751 }
1752
1753 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16) {
1754 TEST_REQUIRES_X86_SSE2;
1755 for (uint32_t channels = 17; channels < 32; channels++) {
1756 DWConvMicrokernelTester()
1757 .cr(16)
1758 .kr(9)
1759 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001760 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001761 }
1762 }
1763
1764 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
1765 TEST_REQUIRES_X86_SSE2;
1766 for (uint32_t channels = 17; channels < 32; channels++) {
1767 DWConvMicrokernelTester()
1768 .cr(16)
1769 .kr(9)
1770 .channels(channels)
1771 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001772 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001773 }
1774 }
1775
1776 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
1777 TEST_REQUIRES_X86_SSE2;
1778 for (uint32_t channels = 17; channels < 32; channels++) {
1779 DWConvMicrokernelTester()
1780 .cr(16)
1781 .kr(9)
1782 .channels(channels)
1783 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001784 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001785 }
1786 }
1787
1788 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel) {
1789 TEST_REQUIRES_X86_SSE2;
1790 for (size_t channels = 1; channels <= 80; channels += 15) {
1791 DWConvMicrokernelTester()
1792 .cr(16)
1793 .kr(9)
1794 .channels(channels)
1795 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001796 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001797 }
1798 }
1799
1800 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_step) {
1801 TEST_REQUIRES_X86_SSE2;
1802 for (size_t channels = 1; channels <= 80; channels += 15) {
1803 for (size_t step = 2; step <= 9; step++) {
1804 DWConvMicrokernelTester()
1805 .cr(16)
1806 .kr(9)
1807 .channels(channels)
1808 .width(3)
1809 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001810 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001811 }
1812 }
1813 }
1814
1815 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
1816 TEST_REQUIRES_X86_SSE2;
1817 for (size_t channels = 1; channels <= 80; channels += 15) {
1818 DWConvMicrokernelTester()
1819 .cr(16)
1820 .kr(9)
1821 .channels(16)
1822 .width(5)
1823 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001824 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001825 }
1826 }
1827
1828 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
1829 TEST_REQUIRES_X86_SSE2;
1830 for (size_t channels = 1; channels <= 80; channels += 15) {
1831 DWConvMicrokernelTester()
1832 .cr(16)
1833 .kr(9)
1834 .channels(channels)
1835 .width(3)
1836 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001837 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001838 }
1839 }
1840
1841 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
1842 TEST_REQUIRES_X86_SSE2;
1843 for (size_t channels = 1; channels <= 80; channels += 15) {
1844 DWConvMicrokernelTester()
1845 .cr(16)
1846 .kr(9)
1847 .channels(channels)
1848 .width(3)
1849 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001850 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001851 }
1852 }
1853
1854 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, input_offset) {
1855 TEST_REQUIRES_X86_SSE2;
1856 for (uint32_t channels = 32; channels < 256; channels += 48) {
1857 DWConvMicrokernelTester()
1858 .cr(16)
1859 .kr(9)
1860 .channels(channels)
1861 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08001862 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001863 }
1864 }
1865
1866 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, zero) {
1867 TEST_REQUIRES_X86_SSE2;
1868 for (uint32_t mz = 0; mz < 9; mz++) {
1869 for (uint32_t channels = 32; channels < 256; channels += 48) {
1870 DWConvMicrokernelTester()
1871 .cr(16)
1872 .kr(9)
1873 .channels(channels)
1874 .input_offset(304)
1875 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001876 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001877 }
1878 }
1879 }
1880#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1881
1882
1883#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1884 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_eq_24) {
1885 TEST_REQUIRES_X86_SSE2;
1886 DWConvMicrokernelTester()
1887 .cr(24)
1888 .kr(9)
1889 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08001890 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001891 }
1892
1893 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24) {
1894 TEST_REQUIRES_X86_SSE2;
1895 for (uint32_t channels = 48; channels < 384; channels += 72) {
1896 DWConvMicrokernelTester()
1897 .cr(24)
1898 .kr(9)
1899 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001900 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001901 }
1902 }
1903
1904 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmin) {
1905 TEST_REQUIRES_X86_SSE2;
1906 for (uint32_t channels = 48; channels < 384; channels += 72) {
1907 DWConvMicrokernelTester()
1908 .cr(24)
1909 .kr(9)
1910 .channels(channels)
1911 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001912 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001913 }
1914 }
1915
1916 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmax) {
1917 TEST_REQUIRES_X86_SSE2;
1918 for (uint32_t channels = 48; channels < 384; channels += 72) {
1919 DWConvMicrokernelTester()
1920 .cr(24)
1921 .kr(9)
1922 .channels(channels)
1923 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001924 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001925 }
1926 }
1927
1928 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_lt_24) {
1929 TEST_REQUIRES_X86_SSE2;
1930 for (uint32_t channels = 1; channels < 24; channels++) {
1931 DWConvMicrokernelTester()
1932 .cr(24)
1933 .kr(9)
1934 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001935 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001936 }
1937 }
1938
1939 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24) {
1940 TEST_REQUIRES_X86_SSE2;
1941 for (uint32_t channels = 25; channels < 48; channels++) {
1942 DWConvMicrokernelTester()
1943 .cr(24)
1944 .kr(9)
1945 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001946 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001947 }
1948 }
1949
1950 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmin) {
1951 TEST_REQUIRES_X86_SSE2;
1952 for (uint32_t channels = 25; channels < 48; channels++) {
1953 DWConvMicrokernelTester()
1954 .cr(24)
1955 .kr(9)
1956 .channels(channels)
1957 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001958 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001959 }
1960 }
1961
1962 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmax) {
1963 TEST_REQUIRES_X86_SSE2;
1964 for (uint32_t channels = 25; channels < 48; channels++) {
1965 DWConvMicrokernelTester()
1966 .cr(24)
1967 .kr(9)
1968 .channels(channels)
1969 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001970 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001971 }
1972 }
1973
1974 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel) {
1975 TEST_REQUIRES_X86_SSE2;
1976 for (size_t channels = 1; channels <= 120; channels += 23) {
1977 DWConvMicrokernelTester()
1978 .cr(24)
1979 .kr(9)
1980 .channels(channels)
1981 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001982 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001983 }
1984 }
1985
1986 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_step) {
1987 TEST_REQUIRES_X86_SSE2;
1988 for (size_t channels = 1; channels <= 120; channels += 23) {
1989 for (size_t step = 2; step <= 9; step++) {
1990 DWConvMicrokernelTester()
1991 .cr(24)
1992 .kr(9)
1993 .channels(channels)
1994 .width(3)
1995 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001996 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07001997 }
1998 }
1999 }
2000
2001 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_output_stride) {
2002 TEST_REQUIRES_X86_SSE2;
2003 for (size_t channels = 1; channels <= 120; channels += 23) {
2004 DWConvMicrokernelTester()
2005 .cr(24)
2006 .kr(9)
2007 .channels(24)
2008 .width(5)
2009 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08002010 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002011 }
2012 }
2013
2014 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmin) {
2015 TEST_REQUIRES_X86_SSE2;
2016 for (size_t channels = 1; channels <= 120; channels += 23) {
2017 DWConvMicrokernelTester()
2018 .cr(24)
2019 .kr(9)
2020 .channels(channels)
2021 .width(3)
2022 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002023 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002024 }
2025 }
2026
2027 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmax) {
2028 TEST_REQUIRES_X86_SSE2;
2029 for (size_t channels = 1; channels <= 120; channels += 23) {
2030 DWConvMicrokernelTester()
2031 .cr(24)
2032 .kr(9)
2033 .channels(channels)
2034 .width(3)
2035 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002036 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002037 }
2038 }
2039
2040 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, input_offset) {
2041 TEST_REQUIRES_X86_SSE2;
2042 for (uint32_t channels = 48; channels < 384; channels += 72) {
2043 DWConvMicrokernelTester()
2044 .cr(24)
2045 .kr(9)
2046 .channels(channels)
2047 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08002048 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002049 }
2050 }
2051
2052 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, zero) {
2053 TEST_REQUIRES_X86_SSE2;
2054 for (uint32_t mz = 0; mz < 9; mz++) {
2055 for (uint32_t channels = 48; channels < 384; channels += 72) {
2056 DWConvMicrokernelTester()
2057 .cr(24)
2058 .kr(9)
2059 .channels(channels)
2060 .input_offset(464)
2061 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002062 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002063 }
2064 }
2065 }
2066#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2067
2068
2069#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -07002070 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_eq_8) {
2071 TEST_REQUIRES_X86_SSE2;
2072 DWConvMicrokernelTester()
2073 .cr(8)
2074 .kr(9)
2075 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002076 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002077 }
2078
2079 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8) {
2080 TEST_REQUIRES_X86_SSE2;
2081 for (uint32_t channels = 16; channels < 128; channels += 24) {
2082 DWConvMicrokernelTester()
2083 .cr(8)
2084 .kr(9)
2085 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002086 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002087 }
2088 }
2089
2090 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
2091 TEST_REQUIRES_X86_SSE2;
2092 for (uint32_t channels = 16; channels < 128; channels += 24) {
2093 DWConvMicrokernelTester()
2094 .cr(8)
2095 .kr(9)
2096 .channels(channels)
2097 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002098 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002099 }
2100 }
2101
2102 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
2103 TEST_REQUIRES_X86_SSE2;
2104 for (uint32_t channels = 16; channels < 128; channels += 24) {
2105 DWConvMicrokernelTester()
2106 .cr(8)
2107 .kr(9)
2108 .channels(channels)
2109 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002110 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002111 }
2112 }
2113
2114 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_lt_8) {
2115 TEST_REQUIRES_X86_SSE2;
2116 for (uint32_t channels = 1; channels < 8; channels++) {
2117 DWConvMicrokernelTester()
2118 .cr(8)
2119 .kr(9)
2120 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002121 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002122 }
2123 }
2124
2125 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8) {
2126 TEST_REQUIRES_X86_SSE2;
2127 for (uint32_t channels = 9; channels < 16; channels++) {
2128 DWConvMicrokernelTester()
2129 .cr(8)
2130 .kr(9)
2131 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002132 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002133 }
2134 }
2135
2136 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
2137 TEST_REQUIRES_X86_SSE2;
2138 for (uint32_t channels = 9; channels < 16; channels++) {
2139 DWConvMicrokernelTester()
2140 .cr(8)
2141 .kr(9)
2142 .channels(channels)
2143 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002144 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002145 }
2146 }
2147
2148 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
2149 TEST_REQUIRES_X86_SSE2;
2150 for (uint32_t channels = 9; channels < 16; channels++) {
2151 DWConvMicrokernelTester()
2152 .cr(8)
2153 .kr(9)
2154 .channels(channels)
2155 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002156 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002157 }
2158 }
2159
2160 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel) {
2161 TEST_REQUIRES_X86_SSE2;
2162 for (size_t channels = 1; channels <= 40; channels += 7) {
2163 DWConvMicrokernelTester()
2164 .cr(8)
2165 .kr(9)
2166 .channels(channels)
2167 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002168 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002169 }
2170 }
2171
2172 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_step) {
2173 TEST_REQUIRES_X86_SSE2;
2174 for (size_t channels = 1; channels <= 40; channels += 7) {
2175 for (size_t step = 2; step <= 9; step++) {
2176 DWConvMicrokernelTester()
2177 .cr(8)
2178 .kr(9)
2179 .channels(channels)
2180 .width(3)
2181 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002182 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002183 }
2184 }
2185 }
2186
2187 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
2188 TEST_REQUIRES_X86_SSE2;
2189 for (size_t channels = 1; channels <= 40; channels += 7) {
2190 DWConvMicrokernelTester()
2191 .cr(8)
2192 .kr(9)
2193 .channels(8)
2194 .width(5)
2195 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002196 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002197 }
2198 }
2199
2200 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
2201 TEST_REQUIRES_X86_SSE2;
2202 for (size_t channels = 1; channels <= 40; channels += 7) {
2203 DWConvMicrokernelTester()
2204 .cr(8)
2205 .kr(9)
2206 .channels(channels)
2207 .width(3)
2208 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002209 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002210 }
2211 }
2212
2213 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
2214 TEST_REQUIRES_X86_SSE2;
2215 for (size_t channels = 1; channels <= 40; channels += 7) {
2216 DWConvMicrokernelTester()
2217 .cr(8)
2218 .kr(9)
2219 .channels(channels)
2220 .width(3)
2221 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002222 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002223 }
2224 }
2225
2226 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, input_offset) {
2227 TEST_REQUIRES_X86_SSE2;
2228 for (uint32_t channels = 16; channels < 128; channels += 24) {
2229 DWConvMicrokernelTester()
2230 .cr(8)
2231 .kr(9)
2232 .channels(channels)
2233 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08002234 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002235 }
2236 }
2237
2238 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, zero) {
2239 TEST_REQUIRES_X86_SSE2;
2240 for (uint32_t mz = 0; mz < 9; mz++) {
2241 for (uint32_t channels = 16; channels < 128; channels += 24) {
2242 DWConvMicrokernelTester()
2243 .cr(8)
2244 .kr(9)
2245 .channels(channels)
2246 .input_offset(176)
2247 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002248 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002249 }
2250 }
2251 }
2252#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2253
2254
2255#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2256 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_eq_16) {
2257 TEST_REQUIRES_X86_SSE2;
2258 DWConvMicrokernelTester()
2259 .cr(16)
2260 .kr(9)
2261 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002262 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002263 }
2264
2265 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16) {
2266 TEST_REQUIRES_X86_SSE2;
2267 for (uint32_t channels = 32; channels < 256; channels += 48) {
2268 DWConvMicrokernelTester()
2269 .cr(16)
2270 .kr(9)
2271 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002272 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002273 }
2274 }
2275
2276 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
2277 TEST_REQUIRES_X86_SSE2;
2278 for (uint32_t channels = 32; channels < 256; channels += 48) {
2279 DWConvMicrokernelTester()
2280 .cr(16)
2281 .kr(9)
2282 .channels(channels)
2283 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002284 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002285 }
2286 }
2287
2288 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
2289 TEST_REQUIRES_X86_SSE2;
2290 for (uint32_t channels = 32; channels < 256; channels += 48) {
2291 DWConvMicrokernelTester()
2292 .cr(16)
2293 .kr(9)
2294 .channels(channels)
2295 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002296 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002297 }
2298 }
2299
2300 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_lt_16) {
2301 TEST_REQUIRES_X86_SSE2;
2302 for (uint32_t channels = 1; channels < 16; channels++) {
2303 DWConvMicrokernelTester()
2304 .cr(16)
2305 .kr(9)
2306 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002307 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002308 }
2309 }
2310
2311 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16) {
2312 TEST_REQUIRES_X86_SSE2;
2313 for (uint32_t channels = 17; channels < 32; channels++) {
2314 DWConvMicrokernelTester()
2315 .cr(16)
2316 .kr(9)
2317 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002318 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002319 }
2320 }
2321
2322 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
2323 TEST_REQUIRES_X86_SSE2;
2324 for (uint32_t channels = 17; channels < 32; channels++) {
2325 DWConvMicrokernelTester()
2326 .cr(16)
2327 .kr(9)
2328 .channels(channels)
2329 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002330 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002331 }
2332 }
2333
2334 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
2335 TEST_REQUIRES_X86_SSE2;
2336 for (uint32_t channels = 17; channels < 32; channels++) {
2337 DWConvMicrokernelTester()
2338 .cr(16)
2339 .kr(9)
2340 .channels(channels)
2341 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002342 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002343 }
2344 }
2345
2346 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel) {
2347 TEST_REQUIRES_X86_SSE2;
2348 for (size_t channels = 1; channels <= 80; channels += 15) {
2349 DWConvMicrokernelTester()
2350 .cr(16)
2351 .kr(9)
2352 .channels(channels)
2353 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002354 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002355 }
2356 }
2357
2358 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_step) {
2359 TEST_REQUIRES_X86_SSE2;
2360 for (size_t channels = 1; channels <= 80; channels += 15) {
2361 for (size_t step = 2; step <= 9; step++) {
2362 DWConvMicrokernelTester()
2363 .cr(16)
2364 .kr(9)
2365 .channels(channels)
2366 .width(3)
2367 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002368 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002369 }
2370 }
2371 }
2372
2373 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
2374 TEST_REQUIRES_X86_SSE2;
2375 for (size_t channels = 1; channels <= 80; channels += 15) {
2376 DWConvMicrokernelTester()
2377 .cr(16)
2378 .kr(9)
2379 .channels(16)
2380 .width(5)
2381 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002382 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002383 }
2384 }
2385
2386 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
2387 TEST_REQUIRES_X86_SSE2;
2388 for (size_t channels = 1; channels <= 80; channels += 15) {
2389 DWConvMicrokernelTester()
2390 .cr(16)
2391 .kr(9)
2392 .channels(channels)
2393 .width(3)
2394 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002395 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002396 }
2397 }
2398
2399 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
2400 TEST_REQUIRES_X86_SSE2;
2401 for (size_t channels = 1; channels <= 80; channels += 15) {
2402 DWConvMicrokernelTester()
2403 .cr(16)
2404 .kr(9)
2405 .channels(channels)
2406 .width(3)
2407 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002408 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002409 }
2410 }
2411
2412 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, input_offset) {
2413 TEST_REQUIRES_X86_SSE2;
2414 for (uint32_t channels = 32; channels < 256; channels += 48) {
2415 DWConvMicrokernelTester()
2416 .cr(16)
2417 .kr(9)
2418 .channels(channels)
2419 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08002420 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002421 }
2422 }
2423
2424 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, zero) {
2425 TEST_REQUIRES_X86_SSE2;
2426 for (uint32_t mz = 0; mz < 9; mz++) {
2427 for (uint32_t channels = 32; channels < 256; channels += 48) {
2428 DWConvMicrokernelTester()
2429 .cr(16)
2430 .kr(9)
2431 .channels(channels)
2432 .input_offset(304)
2433 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002434 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07002435 }
2436 }
2437 }
2438#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2439
2440
2441#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancaf48312021-06-01 20:20:58 -07002442 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_eq_8) {
2443 TEST_REQUIRES_X86_SSE41;
2444 DWConvMicrokernelTester()
2445 .cr(8)
2446 .kr(9)
2447 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002448 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002449 }
2450
2451 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8) {
2452 TEST_REQUIRES_X86_SSE41;
2453 for (uint32_t channels = 16; channels < 128; channels += 24) {
2454 DWConvMicrokernelTester()
2455 .cr(8)
2456 .kr(9)
2457 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002458 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002459 }
2460 }
2461
2462 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
2463 TEST_REQUIRES_X86_SSE41;
2464 for (uint32_t channels = 16; channels < 128; channels += 24) {
2465 DWConvMicrokernelTester()
2466 .cr(8)
2467 .kr(9)
2468 .channels(channels)
2469 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002470 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002471 }
2472 }
2473
2474 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
2475 TEST_REQUIRES_X86_SSE41;
2476 for (uint32_t channels = 16; channels < 128; channels += 24) {
2477 DWConvMicrokernelTester()
2478 .cr(8)
2479 .kr(9)
2480 .channels(channels)
2481 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002482 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002483 }
2484 }
2485
2486 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_lt_8) {
2487 TEST_REQUIRES_X86_SSE41;
2488 for (uint32_t channels = 1; channels < 8; channels++) {
2489 DWConvMicrokernelTester()
2490 .cr(8)
2491 .kr(9)
2492 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002493 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002494 }
2495 }
2496
2497 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8) {
2498 TEST_REQUIRES_X86_SSE41;
2499 for (uint32_t channels = 9; channels < 16; channels++) {
2500 DWConvMicrokernelTester()
2501 .cr(8)
2502 .kr(9)
2503 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002504 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002505 }
2506 }
2507
2508 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
2509 TEST_REQUIRES_X86_SSE41;
2510 for (uint32_t channels = 9; channels < 16; channels++) {
2511 DWConvMicrokernelTester()
2512 .cr(8)
2513 .kr(9)
2514 .channels(channels)
2515 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002516 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002517 }
2518 }
2519
2520 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
2521 TEST_REQUIRES_X86_SSE41;
2522 for (uint32_t channels = 9; channels < 16; channels++) {
2523 DWConvMicrokernelTester()
2524 .cr(8)
2525 .kr(9)
2526 .channels(channels)
2527 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002528 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002529 }
2530 }
2531
2532 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel) {
2533 TEST_REQUIRES_X86_SSE41;
2534 for (size_t channels = 1; channels <= 40; channels += 7) {
2535 DWConvMicrokernelTester()
2536 .cr(8)
2537 .kr(9)
2538 .channels(channels)
2539 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002540 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002541 }
2542 }
2543
2544 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_step) {
2545 TEST_REQUIRES_X86_SSE41;
2546 for (size_t channels = 1; channels <= 40; channels += 7) {
2547 for (size_t step = 2; step <= 9; step++) {
2548 DWConvMicrokernelTester()
2549 .cr(8)
2550 .kr(9)
2551 .channels(channels)
2552 .width(3)
2553 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002554 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002555 }
2556 }
2557 }
2558
2559 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
2560 TEST_REQUIRES_X86_SSE41;
2561 for (size_t channels = 1; channels <= 40; channels += 7) {
2562 DWConvMicrokernelTester()
2563 .cr(8)
2564 .kr(9)
2565 .channels(8)
2566 .width(5)
2567 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002568 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002569 }
2570 }
2571
2572 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
2573 TEST_REQUIRES_X86_SSE41;
2574 for (size_t channels = 1; channels <= 40; channels += 7) {
2575 DWConvMicrokernelTester()
2576 .cr(8)
2577 .kr(9)
2578 .channels(channels)
2579 .width(3)
2580 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002581 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002582 }
2583 }
2584
2585 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
2586 TEST_REQUIRES_X86_SSE41;
2587 for (size_t channels = 1; channels <= 40; channels += 7) {
2588 DWConvMicrokernelTester()
2589 .cr(8)
2590 .kr(9)
2591 .channels(channels)
2592 .width(3)
2593 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002594 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002595 }
2596 }
2597
2598 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, input_offset) {
2599 TEST_REQUIRES_X86_SSE41;
2600 for (uint32_t channels = 16; channels < 128; channels += 24) {
2601 DWConvMicrokernelTester()
2602 .cr(8)
2603 .kr(9)
2604 .channels(channels)
2605 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08002606 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002607 }
2608 }
2609
2610 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, zero) {
2611 TEST_REQUIRES_X86_SSE41;
2612 for (uint32_t mz = 0; mz < 9; mz++) {
2613 for (uint32_t channels = 16; channels < 128; channels += 24) {
2614 DWConvMicrokernelTester()
2615 .cr(8)
2616 .kr(9)
2617 .channels(channels)
2618 .input_offset(176)
2619 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002620 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002621 }
2622 }
2623 }
2624#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2625
2626
2627#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2628 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_eq_16) {
2629 TEST_REQUIRES_X86_SSE41;
2630 DWConvMicrokernelTester()
2631 .cr(16)
2632 .kr(9)
2633 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002634 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002635 }
2636
2637 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16) {
2638 TEST_REQUIRES_X86_SSE41;
2639 for (uint32_t channels = 32; channels < 256; channels += 48) {
2640 DWConvMicrokernelTester()
2641 .cr(16)
2642 .kr(9)
2643 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002644 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002645 }
2646 }
2647
2648 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
2649 TEST_REQUIRES_X86_SSE41;
2650 for (uint32_t channels = 32; channels < 256; channels += 48) {
2651 DWConvMicrokernelTester()
2652 .cr(16)
2653 .kr(9)
2654 .channels(channels)
2655 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002656 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002657 }
2658 }
2659
2660 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
2661 TEST_REQUIRES_X86_SSE41;
2662 for (uint32_t channels = 32; channels < 256; channels += 48) {
2663 DWConvMicrokernelTester()
2664 .cr(16)
2665 .kr(9)
2666 .channels(channels)
2667 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002668 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002669 }
2670 }
2671
2672 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_lt_16) {
2673 TEST_REQUIRES_X86_SSE41;
2674 for (uint32_t channels = 1; channels < 16; channels++) {
2675 DWConvMicrokernelTester()
2676 .cr(16)
2677 .kr(9)
2678 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002679 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002680 }
2681 }
2682
2683 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16) {
2684 TEST_REQUIRES_X86_SSE41;
2685 for (uint32_t channels = 17; channels < 32; channels++) {
2686 DWConvMicrokernelTester()
2687 .cr(16)
2688 .kr(9)
2689 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002690 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002691 }
2692 }
2693
2694 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
2695 TEST_REQUIRES_X86_SSE41;
2696 for (uint32_t channels = 17; channels < 32; channels++) {
2697 DWConvMicrokernelTester()
2698 .cr(16)
2699 .kr(9)
2700 .channels(channels)
2701 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002702 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002703 }
2704 }
2705
2706 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
2707 TEST_REQUIRES_X86_SSE41;
2708 for (uint32_t channels = 17; channels < 32; channels++) {
2709 DWConvMicrokernelTester()
2710 .cr(16)
2711 .kr(9)
2712 .channels(channels)
2713 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002714 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002715 }
2716 }
2717
2718 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel) {
2719 TEST_REQUIRES_X86_SSE41;
2720 for (size_t channels = 1; channels <= 80; channels += 15) {
2721 DWConvMicrokernelTester()
2722 .cr(16)
2723 .kr(9)
2724 .channels(channels)
2725 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002726 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002727 }
2728 }
2729
2730 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_step) {
2731 TEST_REQUIRES_X86_SSE41;
2732 for (size_t channels = 1; channels <= 80; channels += 15) {
2733 for (size_t step = 2; step <= 9; step++) {
2734 DWConvMicrokernelTester()
2735 .cr(16)
2736 .kr(9)
2737 .channels(channels)
2738 .width(3)
2739 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002740 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002741 }
2742 }
2743 }
2744
2745 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
2746 TEST_REQUIRES_X86_SSE41;
2747 for (size_t channels = 1; channels <= 80; channels += 15) {
2748 DWConvMicrokernelTester()
2749 .cr(16)
2750 .kr(9)
2751 .channels(16)
2752 .width(5)
2753 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002754 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002755 }
2756 }
2757
2758 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
2759 TEST_REQUIRES_X86_SSE41;
2760 for (size_t channels = 1; channels <= 80; channels += 15) {
2761 DWConvMicrokernelTester()
2762 .cr(16)
2763 .kr(9)
2764 .channels(channels)
2765 .width(3)
2766 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002767 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002768 }
2769 }
2770
2771 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
2772 TEST_REQUIRES_X86_SSE41;
2773 for (size_t channels = 1; channels <= 80; channels += 15) {
2774 DWConvMicrokernelTester()
2775 .cr(16)
2776 .kr(9)
2777 .channels(channels)
2778 .width(3)
2779 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002780 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002781 }
2782 }
2783
2784 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, input_offset) {
2785 TEST_REQUIRES_X86_SSE41;
2786 for (uint32_t channels = 32; channels < 256; channels += 48) {
2787 DWConvMicrokernelTester()
2788 .cr(16)
2789 .kr(9)
2790 .channels(channels)
2791 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08002792 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002793 }
2794 }
2795
2796 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, zero) {
2797 TEST_REQUIRES_X86_SSE41;
2798 for (uint32_t mz = 0; mz < 9; mz++) {
2799 for (uint32_t channels = 32; channels < 256; channels += 48) {
2800 DWConvMicrokernelTester()
2801 .cr(16)
2802 .kr(9)
2803 .channels(channels)
2804 .input_offset(304)
2805 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002806 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002807 }
2808 }
2809 }
2810#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2811
2812
2813#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2814 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_eq_24) {
2815 TEST_REQUIRES_X86_SSE41;
2816 DWConvMicrokernelTester()
2817 .cr(24)
2818 .kr(9)
2819 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08002820 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002821 }
2822
2823 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24) {
2824 TEST_REQUIRES_X86_SSE41;
2825 for (uint32_t channels = 48; channels < 384; channels += 72) {
2826 DWConvMicrokernelTester()
2827 .cr(24)
2828 .kr(9)
2829 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002830 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002831 }
2832 }
2833
2834 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmin) {
2835 TEST_REQUIRES_X86_SSE41;
2836 for (uint32_t channels = 48; channels < 384; channels += 72) {
2837 DWConvMicrokernelTester()
2838 .cr(24)
2839 .kr(9)
2840 .channels(channels)
2841 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002842 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002843 }
2844 }
2845
2846 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmax) {
2847 TEST_REQUIRES_X86_SSE41;
2848 for (uint32_t channels = 48; channels < 384; channels += 72) {
2849 DWConvMicrokernelTester()
2850 .cr(24)
2851 .kr(9)
2852 .channels(channels)
2853 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002854 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002855 }
2856 }
2857
2858 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_lt_24) {
2859 TEST_REQUIRES_X86_SSE41;
2860 for (uint32_t channels = 1; channels < 24; channels++) {
2861 DWConvMicrokernelTester()
2862 .cr(24)
2863 .kr(9)
2864 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002865 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002866 }
2867 }
2868
2869 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24) {
2870 TEST_REQUIRES_X86_SSE41;
2871 for (uint32_t channels = 25; channels < 48; channels++) {
2872 DWConvMicrokernelTester()
2873 .cr(24)
2874 .kr(9)
2875 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002876 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002877 }
2878 }
2879
2880 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmin) {
2881 TEST_REQUIRES_X86_SSE41;
2882 for (uint32_t channels = 25; channels < 48; channels++) {
2883 DWConvMicrokernelTester()
2884 .cr(24)
2885 .kr(9)
2886 .channels(channels)
2887 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002888 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002889 }
2890 }
2891
2892 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmax) {
2893 TEST_REQUIRES_X86_SSE41;
2894 for (uint32_t channels = 25; channels < 48; channels++) {
2895 DWConvMicrokernelTester()
2896 .cr(24)
2897 .kr(9)
2898 .channels(channels)
2899 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002900 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002901 }
2902 }
2903
2904 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel) {
2905 TEST_REQUIRES_X86_SSE41;
2906 for (size_t channels = 1; channels <= 120; channels += 23) {
2907 DWConvMicrokernelTester()
2908 .cr(24)
2909 .kr(9)
2910 .channels(channels)
2911 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002912 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002913 }
2914 }
2915
2916 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_step) {
2917 TEST_REQUIRES_X86_SSE41;
2918 for (size_t channels = 1; channels <= 120; channels += 23) {
2919 for (size_t step = 2; step <= 9; step++) {
2920 DWConvMicrokernelTester()
2921 .cr(24)
2922 .kr(9)
2923 .channels(channels)
2924 .width(3)
2925 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002926 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002927 }
2928 }
2929 }
2930
2931 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_output_stride) {
2932 TEST_REQUIRES_X86_SSE41;
2933 for (size_t channels = 1; channels <= 120; channels += 23) {
2934 DWConvMicrokernelTester()
2935 .cr(24)
2936 .kr(9)
2937 .channels(24)
2938 .width(5)
2939 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08002940 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002941 }
2942 }
2943
2944 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmin) {
2945 TEST_REQUIRES_X86_SSE41;
2946 for (size_t channels = 1; channels <= 120; channels += 23) {
2947 DWConvMicrokernelTester()
2948 .cr(24)
2949 .kr(9)
2950 .channels(channels)
2951 .width(3)
2952 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002953 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002954 }
2955 }
2956
2957 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmax) {
2958 TEST_REQUIRES_X86_SSE41;
2959 for (size_t channels = 1; channels <= 120; channels += 23) {
2960 DWConvMicrokernelTester()
2961 .cr(24)
2962 .kr(9)
2963 .channels(channels)
2964 .width(3)
2965 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002966 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002967 }
2968 }
2969
2970 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, input_offset) {
2971 TEST_REQUIRES_X86_SSE41;
2972 for (uint32_t channels = 48; channels < 384; channels += 72) {
2973 DWConvMicrokernelTester()
2974 .cr(24)
2975 .kr(9)
2976 .channels(channels)
2977 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08002978 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002979 }
2980 }
2981
2982 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, zero) {
2983 TEST_REQUIRES_X86_SSE41;
2984 for (uint32_t mz = 0; mz < 9; mz++) {
2985 for (uint32_t channels = 48; channels < 384; channels += 72) {
2986 DWConvMicrokernelTester()
2987 .cr(24)
2988 .kr(9)
2989 .channels(channels)
2990 .input_offset(464)
2991 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002992 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07002993 }
2994 }
2995 }
2996#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2997
2998
2999#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -07003000 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_eq_8) {
3001 TEST_REQUIRES_X86_SSE41;
3002 DWConvMicrokernelTester()
3003 .cr(8)
3004 .kr(9)
3005 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003006 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003007 }
3008
3009 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8) {
3010 TEST_REQUIRES_X86_SSE41;
3011 for (uint32_t channels = 16; channels < 128; channels += 24) {
3012 DWConvMicrokernelTester()
3013 .cr(8)
3014 .kr(9)
3015 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003016 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003017 }
3018 }
3019
3020 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
3021 TEST_REQUIRES_X86_SSE41;
3022 for (uint32_t channels = 16; channels < 128; channels += 24) {
3023 DWConvMicrokernelTester()
3024 .cr(8)
3025 .kr(9)
3026 .channels(channels)
3027 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003028 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003029 }
3030 }
3031
3032 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
3033 TEST_REQUIRES_X86_SSE41;
3034 for (uint32_t channels = 16; channels < 128; channels += 24) {
3035 DWConvMicrokernelTester()
3036 .cr(8)
3037 .kr(9)
3038 .channels(channels)
3039 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003040 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003041 }
3042 }
3043
3044 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_lt_8) {
3045 TEST_REQUIRES_X86_SSE41;
3046 for (uint32_t channels = 1; channels < 8; channels++) {
3047 DWConvMicrokernelTester()
3048 .cr(8)
3049 .kr(9)
3050 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003051 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003052 }
3053 }
3054
3055 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8) {
3056 TEST_REQUIRES_X86_SSE41;
3057 for (uint32_t channels = 9; channels < 16; channels++) {
3058 DWConvMicrokernelTester()
3059 .cr(8)
3060 .kr(9)
3061 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003062 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003063 }
3064 }
3065
3066 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
3067 TEST_REQUIRES_X86_SSE41;
3068 for (uint32_t channels = 9; channels < 16; channels++) {
3069 DWConvMicrokernelTester()
3070 .cr(8)
3071 .kr(9)
3072 .channels(channels)
3073 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003074 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003075 }
3076 }
3077
3078 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
3079 TEST_REQUIRES_X86_SSE41;
3080 for (uint32_t channels = 9; channels < 16; channels++) {
3081 DWConvMicrokernelTester()
3082 .cr(8)
3083 .kr(9)
3084 .channels(channels)
3085 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003086 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003087 }
3088 }
3089
3090 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel) {
3091 TEST_REQUIRES_X86_SSE41;
3092 for (size_t channels = 1; channels <= 40; channels += 7) {
3093 DWConvMicrokernelTester()
3094 .cr(8)
3095 .kr(9)
3096 .channels(channels)
3097 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003098 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003099 }
3100 }
3101
3102 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_step) {
3103 TEST_REQUIRES_X86_SSE41;
3104 for (size_t channels = 1; channels <= 40; channels += 7) {
3105 for (size_t step = 2; step <= 9; step++) {
3106 DWConvMicrokernelTester()
3107 .cr(8)
3108 .kr(9)
3109 .channels(channels)
3110 .width(3)
3111 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003112 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003113 }
3114 }
3115 }
3116
3117 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
3118 TEST_REQUIRES_X86_SSE41;
3119 for (size_t channels = 1; channels <= 40; channels += 7) {
3120 DWConvMicrokernelTester()
3121 .cr(8)
3122 .kr(9)
3123 .channels(8)
3124 .width(5)
3125 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003126 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003127 }
3128 }
3129
3130 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
3131 TEST_REQUIRES_X86_SSE41;
3132 for (size_t channels = 1; channels <= 40; channels += 7) {
3133 DWConvMicrokernelTester()
3134 .cr(8)
3135 .kr(9)
3136 .channels(channels)
3137 .width(3)
3138 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003139 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003140 }
3141 }
3142
3143 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
3144 TEST_REQUIRES_X86_SSE41;
3145 for (size_t channels = 1; channels <= 40; channels += 7) {
3146 DWConvMicrokernelTester()
3147 .cr(8)
3148 .kr(9)
3149 .channels(channels)
3150 .width(3)
3151 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003152 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003153 }
3154 }
3155
3156 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, input_offset) {
3157 TEST_REQUIRES_X86_SSE41;
3158 for (uint32_t channels = 16; channels < 128; channels += 24) {
3159 DWConvMicrokernelTester()
3160 .cr(8)
3161 .kr(9)
3162 .channels(channels)
3163 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08003164 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003165 }
3166 }
3167
3168 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, zero) {
3169 TEST_REQUIRES_X86_SSE41;
3170 for (uint32_t mz = 0; mz < 9; mz++) {
3171 for (uint32_t channels = 16; channels < 128; channels += 24) {
3172 DWConvMicrokernelTester()
3173 .cr(8)
3174 .kr(9)
3175 .channels(channels)
3176 .input_offset(176)
3177 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003178 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003179 }
3180 }
3181 }
3182#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3183
3184
3185#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3186 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_eq_16) {
3187 TEST_REQUIRES_X86_SSE41;
3188 DWConvMicrokernelTester()
3189 .cr(16)
3190 .kr(9)
3191 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003192 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003193 }
3194
3195 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16) {
3196 TEST_REQUIRES_X86_SSE41;
3197 for (uint32_t channels = 32; channels < 256; channels += 48) {
3198 DWConvMicrokernelTester()
3199 .cr(16)
3200 .kr(9)
3201 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003202 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003203 }
3204 }
3205
3206 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
3207 TEST_REQUIRES_X86_SSE41;
3208 for (uint32_t channels = 32; channels < 256; channels += 48) {
3209 DWConvMicrokernelTester()
3210 .cr(16)
3211 .kr(9)
3212 .channels(channels)
3213 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003214 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003215 }
3216 }
3217
3218 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
3219 TEST_REQUIRES_X86_SSE41;
3220 for (uint32_t channels = 32; channels < 256; channels += 48) {
3221 DWConvMicrokernelTester()
3222 .cr(16)
3223 .kr(9)
3224 .channels(channels)
3225 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003226 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003227 }
3228 }
3229
3230 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_lt_16) {
3231 TEST_REQUIRES_X86_SSE41;
3232 for (uint32_t channels = 1; channels < 16; channels++) {
3233 DWConvMicrokernelTester()
3234 .cr(16)
3235 .kr(9)
3236 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003237 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003238 }
3239 }
3240
3241 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16) {
3242 TEST_REQUIRES_X86_SSE41;
3243 for (uint32_t channels = 17; channels < 32; channels++) {
3244 DWConvMicrokernelTester()
3245 .cr(16)
3246 .kr(9)
3247 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003248 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003249 }
3250 }
3251
3252 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
3253 TEST_REQUIRES_X86_SSE41;
3254 for (uint32_t channels = 17; channels < 32; channels++) {
3255 DWConvMicrokernelTester()
3256 .cr(16)
3257 .kr(9)
3258 .channels(channels)
3259 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003260 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003261 }
3262 }
3263
3264 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
3265 TEST_REQUIRES_X86_SSE41;
3266 for (uint32_t channels = 17; channels < 32; channels++) {
3267 DWConvMicrokernelTester()
3268 .cr(16)
3269 .kr(9)
3270 .channels(channels)
3271 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003272 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003273 }
3274 }
3275
3276 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel) {
3277 TEST_REQUIRES_X86_SSE41;
3278 for (size_t channels = 1; channels <= 80; channels += 15) {
3279 DWConvMicrokernelTester()
3280 .cr(16)
3281 .kr(9)
3282 .channels(channels)
3283 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003284 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003285 }
3286 }
3287
3288 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_step) {
3289 TEST_REQUIRES_X86_SSE41;
3290 for (size_t channels = 1; channels <= 80; channels += 15) {
3291 for (size_t step = 2; step <= 9; step++) {
3292 DWConvMicrokernelTester()
3293 .cr(16)
3294 .kr(9)
3295 .channels(channels)
3296 .width(3)
3297 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003298 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003299 }
3300 }
3301 }
3302
3303 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
3304 TEST_REQUIRES_X86_SSE41;
3305 for (size_t channels = 1; channels <= 80; channels += 15) {
3306 DWConvMicrokernelTester()
3307 .cr(16)
3308 .kr(9)
3309 .channels(16)
3310 .width(5)
3311 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003312 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003313 }
3314 }
3315
3316 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
3317 TEST_REQUIRES_X86_SSE41;
3318 for (size_t channels = 1; channels <= 80; channels += 15) {
3319 DWConvMicrokernelTester()
3320 .cr(16)
3321 .kr(9)
3322 .channels(channels)
3323 .width(3)
3324 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003325 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003326 }
3327 }
3328
3329 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
3330 TEST_REQUIRES_X86_SSE41;
3331 for (size_t channels = 1; channels <= 80; channels += 15) {
3332 DWConvMicrokernelTester()
3333 .cr(16)
3334 .kr(9)
3335 .channels(channels)
3336 .width(3)
3337 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003338 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003339 }
3340 }
3341
3342 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, input_offset) {
3343 TEST_REQUIRES_X86_SSE41;
3344 for (uint32_t channels = 32; channels < 256; channels += 48) {
3345 DWConvMicrokernelTester()
3346 .cr(16)
3347 .kr(9)
3348 .channels(channels)
3349 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08003350 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003351 }
3352 }
3353
3354 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, zero) {
3355 TEST_REQUIRES_X86_SSE41;
3356 for (uint32_t mz = 0; mz < 9; mz++) {
3357 for (uint32_t channels = 32; channels < 256; channels += 48) {
3358 DWConvMicrokernelTester()
3359 .cr(16)
3360 .kr(9)
3361 .channels(channels)
3362 .input_offset(304)
3363 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003364 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003365 }
3366 }
3367 }
3368#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3369
3370
3371#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancaf48312021-06-01 20:20:58 -07003372 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_eq_8) {
3373 TEST_REQUIRES_X86_AVX;
3374 DWConvMicrokernelTester()
3375 .cr(8)
3376 .kr(9)
3377 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003378 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003379 }
3380
3381 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8) {
3382 TEST_REQUIRES_X86_AVX;
3383 for (uint32_t channels = 16; channels < 128; channels += 24) {
3384 DWConvMicrokernelTester()
3385 .cr(8)
3386 .kr(9)
3387 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003388 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003389 }
3390 }
3391
3392 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
3393 TEST_REQUIRES_X86_AVX;
3394 for (uint32_t channels = 16; channels < 128; channels += 24) {
3395 DWConvMicrokernelTester()
3396 .cr(8)
3397 .kr(9)
3398 .channels(channels)
3399 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003400 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003401 }
3402 }
3403
3404 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
3405 TEST_REQUIRES_X86_AVX;
3406 for (uint32_t channels = 16; channels < 128; channels += 24) {
3407 DWConvMicrokernelTester()
3408 .cr(8)
3409 .kr(9)
3410 .channels(channels)
3411 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003412 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003413 }
3414 }
3415
3416 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_lt_8) {
3417 TEST_REQUIRES_X86_AVX;
3418 for (uint32_t channels = 1; channels < 8; channels++) {
3419 DWConvMicrokernelTester()
3420 .cr(8)
3421 .kr(9)
3422 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003423 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003424 }
3425 }
3426
3427 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8) {
3428 TEST_REQUIRES_X86_AVX;
3429 for (uint32_t channels = 9; channels < 16; channels++) {
3430 DWConvMicrokernelTester()
3431 .cr(8)
3432 .kr(9)
3433 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003434 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003435 }
3436 }
3437
3438 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
3439 TEST_REQUIRES_X86_AVX;
3440 for (uint32_t channels = 9; channels < 16; channels++) {
3441 DWConvMicrokernelTester()
3442 .cr(8)
3443 .kr(9)
3444 .channels(channels)
3445 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003446 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003447 }
3448 }
3449
3450 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
3451 TEST_REQUIRES_X86_AVX;
3452 for (uint32_t channels = 9; channels < 16; channels++) {
3453 DWConvMicrokernelTester()
3454 .cr(8)
3455 .kr(9)
3456 .channels(channels)
3457 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003458 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003459 }
3460 }
3461
3462 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel) {
3463 TEST_REQUIRES_X86_AVX;
3464 for (size_t channels = 1; channels <= 40; channels += 7) {
3465 DWConvMicrokernelTester()
3466 .cr(8)
3467 .kr(9)
3468 .channels(channels)
3469 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003470 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003471 }
3472 }
3473
3474 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_step) {
3475 TEST_REQUIRES_X86_AVX;
3476 for (size_t channels = 1; channels <= 40; channels += 7) {
3477 for (size_t step = 2; step <= 9; step++) {
3478 DWConvMicrokernelTester()
3479 .cr(8)
3480 .kr(9)
3481 .channels(channels)
3482 .width(3)
3483 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003484 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003485 }
3486 }
3487 }
3488
3489 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
3490 TEST_REQUIRES_X86_AVX;
3491 for (size_t channels = 1; channels <= 40; channels += 7) {
3492 DWConvMicrokernelTester()
3493 .cr(8)
3494 .kr(9)
3495 .channels(8)
3496 .width(5)
3497 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003498 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003499 }
3500 }
3501
3502 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmin) {
3503 TEST_REQUIRES_X86_AVX;
3504 for (size_t channels = 1; channels <= 40; channels += 7) {
3505 DWConvMicrokernelTester()
3506 .cr(8)
3507 .kr(9)
3508 .channels(channels)
3509 .width(3)
3510 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003511 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003512 }
3513 }
3514
3515 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmax) {
3516 TEST_REQUIRES_X86_AVX;
3517 for (size_t channels = 1; channels <= 40; channels += 7) {
3518 DWConvMicrokernelTester()
3519 .cr(8)
3520 .kr(9)
3521 .channels(channels)
3522 .width(3)
3523 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003524 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003525 }
3526 }
3527
3528 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, input_offset) {
3529 TEST_REQUIRES_X86_AVX;
3530 for (uint32_t channels = 16; channels < 128; channels += 24) {
3531 DWConvMicrokernelTester()
3532 .cr(8)
3533 .kr(9)
3534 .channels(channels)
3535 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08003536 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003537 }
3538 }
3539
3540 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, zero) {
3541 TEST_REQUIRES_X86_AVX;
3542 for (uint32_t mz = 0; mz < 9; mz++) {
3543 for (uint32_t channels = 16; channels < 128; channels += 24) {
3544 DWConvMicrokernelTester()
3545 .cr(8)
3546 .kr(9)
3547 .channels(channels)
3548 .input_offset(176)
3549 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003550 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003551 }
3552 }
3553 }
3554#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3555
3556
3557#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3558 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_eq_16) {
3559 TEST_REQUIRES_X86_AVX;
3560 DWConvMicrokernelTester()
3561 .cr(16)
3562 .kr(9)
3563 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003564 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003565 }
3566
3567 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16) {
3568 TEST_REQUIRES_X86_AVX;
3569 for (uint32_t channels = 32; channels < 256; channels += 48) {
3570 DWConvMicrokernelTester()
3571 .cr(16)
3572 .kr(9)
3573 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003574 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003575 }
3576 }
3577
3578 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
3579 TEST_REQUIRES_X86_AVX;
3580 for (uint32_t channels = 32; channels < 256; channels += 48) {
3581 DWConvMicrokernelTester()
3582 .cr(16)
3583 .kr(9)
3584 .channels(channels)
3585 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003586 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003587 }
3588 }
3589
3590 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
3591 TEST_REQUIRES_X86_AVX;
3592 for (uint32_t channels = 32; channels < 256; channels += 48) {
3593 DWConvMicrokernelTester()
3594 .cr(16)
3595 .kr(9)
3596 .channels(channels)
3597 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003598 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003599 }
3600 }
3601
3602 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_lt_16) {
3603 TEST_REQUIRES_X86_AVX;
3604 for (uint32_t channels = 1; channels < 16; channels++) {
3605 DWConvMicrokernelTester()
3606 .cr(16)
3607 .kr(9)
3608 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003609 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003610 }
3611 }
3612
3613 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16) {
3614 TEST_REQUIRES_X86_AVX;
3615 for (uint32_t channels = 17; channels < 32; channels++) {
3616 DWConvMicrokernelTester()
3617 .cr(16)
3618 .kr(9)
3619 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003620 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003621 }
3622 }
3623
3624 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
3625 TEST_REQUIRES_X86_AVX;
3626 for (uint32_t channels = 17; channels < 32; channels++) {
3627 DWConvMicrokernelTester()
3628 .cr(16)
3629 .kr(9)
3630 .channels(channels)
3631 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003632 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003633 }
3634 }
3635
3636 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
3637 TEST_REQUIRES_X86_AVX;
3638 for (uint32_t channels = 17; channels < 32; channels++) {
3639 DWConvMicrokernelTester()
3640 .cr(16)
3641 .kr(9)
3642 .channels(channels)
3643 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003644 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003645 }
3646 }
3647
3648 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel) {
3649 TEST_REQUIRES_X86_AVX;
3650 for (size_t channels = 1; channels <= 80; channels += 15) {
3651 DWConvMicrokernelTester()
3652 .cr(16)
3653 .kr(9)
3654 .channels(channels)
3655 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003656 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003657 }
3658 }
3659
3660 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_step) {
3661 TEST_REQUIRES_X86_AVX;
3662 for (size_t channels = 1; channels <= 80; channels += 15) {
3663 for (size_t step = 2; step <= 9; step++) {
3664 DWConvMicrokernelTester()
3665 .cr(16)
3666 .kr(9)
3667 .channels(channels)
3668 .width(3)
3669 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003670 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003671 }
3672 }
3673 }
3674
3675 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
3676 TEST_REQUIRES_X86_AVX;
3677 for (size_t channels = 1; channels <= 80; channels += 15) {
3678 DWConvMicrokernelTester()
3679 .cr(16)
3680 .kr(9)
3681 .channels(16)
3682 .width(5)
3683 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003684 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003685 }
3686 }
3687
3688 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmin) {
3689 TEST_REQUIRES_X86_AVX;
3690 for (size_t channels = 1; channels <= 80; channels += 15) {
3691 DWConvMicrokernelTester()
3692 .cr(16)
3693 .kr(9)
3694 .channels(channels)
3695 .width(3)
3696 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003697 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003698 }
3699 }
3700
3701 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmax) {
3702 TEST_REQUIRES_X86_AVX;
3703 for (size_t channels = 1; channels <= 80; channels += 15) {
3704 DWConvMicrokernelTester()
3705 .cr(16)
3706 .kr(9)
3707 .channels(channels)
3708 .width(3)
3709 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003710 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003711 }
3712 }
3713
3714 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, input_offset) {
3715 TEST_REQUIRES_X86_AVX;
3716 for (uint32_t channels = 32; channels < 256; channels += 48) {
3717 DWConvMicrokernelTester()
3718 .cr(16)
3719 .kr(9)
3720 .channels(channels)
3721 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08003722 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003723 }
3724 }
3725
3726 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, zero) {
3727 TEST_REQUIRES_X86_AVX;
3728 for (uint32_t mz = 0; mz < 9; mz++) {
3729 for (uint32_t channels = 32; channels < 256; channels += 48) {
3730 DWConvMicrokernelTester()
3731 .cr(16)
3732 .kr(9)
3733 .channels(channels)
3734 .input_offset(304)
3735 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003736 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003737 }
3738 }
3739 }
3740#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3741
3742
3743#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3744 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_eq_24) {
3745 TEST_REQUIRES_X86_AVX;
3746 DWConvMicrokernelTester()
3747 .cr(24)
3748 .kr(9)
3749 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08003750 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003751 }
3752
3753 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24) {
3754 TEST_REQUIRES_X86_AVX;
3755 for (uint32_t channels = 48; channels < 384; channels += 72) {
3756 DWConvMicrokernelTester()
3757 .cr(24)
3758 .kr(9)
3759 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003760 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003761 }
3762 }
3763
3764 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmin) {
3765 TEST_REQUIRES_X86_AVX;
3766 for (uint32_t channels = 48; channels < 384; channels += 72) {
3767 DWConvMicrokernelTester()
3768 .cr(24)
3769 .kr(9)
3770 .channels(channels)
3771 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003772 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003773 }
3774 }
3775
3776 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmax) {
3777 TEST_REQUIRES_X86_AVX;
3778 for (uint32_t channels = 48; channels < 384; channels += 72) {
3779 DWConvMicrokernelTester()
3780 .cr(24)
3781 .kr(9)
3782 .channels(channels)
3783 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003784 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003785 }
3786 }
3787
3788 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_lt_24) {
3789 TEST_REQUIRES_X86_AVX;
3790 for (uint32_t channels = 1; channels < 24; channels++) {
3791 DWConvMicrokernelTester()
3792 .cr(24)
3793 .kr(9)
3794 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003795 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003796 }
3797 }
3798
3799 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24) {
3800 TEST_REQUIRES_X86_AVX;
3801 for (uint32_t channels = 25; channels < 48; channels++) {
3802 DWConvMicrokernelTester()
3803 .cr(24)
3804 .kr(9)
3805 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003806 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003807 }
3808 }
3809
3810 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmin) {
3811 TEST_REQUIRES_X86_AVX;
3812 for (uint32_t channels = 25; channels < 48; channels++) {
3813 DWConvMicrokernelTester()
3814 .cr(24)
3815 .kr(9)
3816 .channels(channels)
3817 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003818 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003819 }
3820 }
3821
3822 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmax) {
3823 TEST_REQUIRES_X86_AVX;
3824 for (uint32_t channels = 25; channels < 48; channels++) {
3825 DWConvMicrokernelTester()
3826 .cr(24)
3827 .kr(9)
3828 .channels(channels)
3829 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003830 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003831 }
3832 }
3833
3834 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel) {
3835 TEST_REQUIRES_X86_AVX;
3836 for (size_t channels = 1; channels <= 120; channels += 23) {
3837 DWConvMicrokernelTester()
3838 .cr(24)
3839 .kr(9)
3840 .channels(channels)
3841 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003842 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003843 }
3844 }
3845
3846 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_step) {
3847 TEST_REQUIRES_X86_AVX;
3848 for (size_t channels = 1; channels <= 120; channels += 23) {
3849 for (size_t step = 2; step <= 9; step++) {
3850 DWConvMicrokernelTester()
3851 .cr(24)
3852 .kr(9)
3853 .channels(channels)
3854 .width(3)
3855 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003856 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003857 }
3858 }
3859 }
3860
3861 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_output_stride) {
3862 TEST_REQUIRES_X86_AVX;
3863 for (size_t channels = 1; channels <= 120; channels += 23) {
3864 DWConvMicrokernelTester()
3865 .cr(24)
3866 .kr(9)
3867 .channels(24)
3868 .width(5)
3869 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08003870 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003871 }
3872 }
3873
3874 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmin) {
3875 TEST_REQUIRES_X86_AVX;
3876 for (size_t channels = 1; channels <= 120; channels += 23) {
3877 DWConvMicrokernelTester()
3878 .cr(24)
3879 .kr(9)
3880 .channels(channels)
3881 .width(3)
3882 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003883 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003884 }
3885 }
3886
3887 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmax) {
3888 TEST_REQUIRES_X86_AVX;
3889 for (size_t channels = 1; channels <= 120; channels += 23) {
3890 DWConvMicrokernelTester()
3891 .cr(24)
3892 .kr(9)
3893 .channels(channels)
3894 .width(3)
3895 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003896 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003897 }
3898 }
3899
3900 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, input_offset) {
3901 TEST_REQUIRES_X86_AVX;
3902 for (uint32_t channels = 48; channels < 384; channels += 72) {
3903 DWConvMicrokernelTester()
3904 .cr(24)
3905 .kr(9)
3906 .channels(channels)
3907 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08003908 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003909 }
3910 }
3911
3912 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, zero) {
3913 TEST_REQUIRES_X86_AVX;
3914 for (uint32_t mz = 0; mz < 9; mz++) {
3915 for (uint32_t channels = 48; channels < 384; channels += 72) {
3916 DWConvMicrokernelTester()
3917 .cr(24)
3918 .kr(9)
3919 .channels(channels)
3920 .input_offset(464)
3921 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003922 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07003923 }
3924 }
3925 }
3926#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3927
3928
3929#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -07003930 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_eq_8) {
3931 TEST_REQUIRES_X86_AVX;
3932 DWConvMicrokernelTester()
3933 .cr(8)
3934 .kr(9)
3935 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003936 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003937 }
3938
3939 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8) {
3940 TEST_REQUIRES_X86_AVX;
3941 for (uint32_t channels = 16; channels < 128; channels += 24) {
3942 DWConvMicrokernelTester()
3943 .cr(8)
3944 .kr(9)
3945 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003946 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003947 }
3948 }
3949
3950 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmin) {
3951 TEST_REQUIRES_X86_AVX;
3952 for (uint32_t channels = 16; channels < 128; channels += 24) {
3953 DWConvMicrokernelTester()
3954 .cr(8)
3955 .kr(9)
3956 .channels(channels)
3957 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003958 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003959 }
3960 }
3961
3962 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmax) {
3963 TEST_REQUIRES_X86_AVX;
3964 for (uint32_t channels = 16; channels < 128; channels += 24) {
3965 DWConvMicrokernelTester()
3966 .cr(8)
3967 .kr(9)
3968 .channels(channels)
3969 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003970 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003971 }
3972 }
3973
3974 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_lt_8) {
3975 TEST_REQUIRES_X86_AVX;
3976 for (uint32_t channels = 1; channels < 8; channels++) {
3977 DWConvMicrokernelTester()
3978 .cr(8)
3979 .kr(9)
3980 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003981 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003982 }
3983 }
3984
3985 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8) {
3986 TEST_REQUIRES_X86_AVX;
3987 for (uint32_t channels = 9; channels < 16; channels++) {
3988 DWConvMicrokernelTester()
3989 .cr(8)
3990 .kr(9)
3991 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003992 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07003993 }
3994 }
3995
3996 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
3997 TEST_REQUIRES_X86_AVX;
3998 for (uint32_t channels = 9; channels < 16; channels++) {
3999 DWConvMicrokernelTester()
4000 .cr(8)
4001 .kr(9)
4002 .channels(channels)
4003 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004004 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004005 }
4006 }
4007
4008 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
4009 TEST_REQUIRES_X86_AVX;
4010 for (uint32_t channels = 9; channels < 16; channels++) {
4011 DWConvMicrokernelTester()
4012 .cr(8)
4013 .kr(9)
4014 .channels(channels)
4015 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004016 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004017 }
4018 }
4019
4020 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel) {
4021 TEST_REQUIRES_X86_AVX;
4022 for (size_t channels = 1; channels <= 40; channels += 7) {
4023 DWConvMicrokernelTester()
4024 .cr(8)
4025 .kr(9)
4026 .channels(channels)
4027 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004028 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004029 }
4030 }
4031
4032 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_step) {
4033 TEST_REQUIRES_X86_AVX;
4034 for (size_t channels = 1; channels <= 40; channels += 7) {
4035 for (size_t step = 2; step <= 9; step++) {
4036 DWConvMicrokernelTester()
4037 .cr(8)
4038 .kr(9)
4039 .channels(channels)
4040 .width(3)
4041 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004042 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004043 }
4044 }
4045 }
4046
4047 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
4048 TEST_REQUIRES_X86_AVX;
4049 for (size_t channels = 1; channels <= 40; channels += 7) {
4050 DWConvMicrokernelTester()
4051 .cr(8)
4052 .kr(9)
4053 .channels(8)
4054 .width(5)
4055 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004056 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004057 }
4058 }
4059
4060 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
4061 TEST_REQUIRES_X86_AVX;
4062 for (size_t channels = 1; channels <= 40; channels += 7) {
4063 DWConvMicrokernelTester()
4064 .cr(8)
4065 .kr(9)
4066 .channels(channels)
4067 .width(3)
4068 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004069 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004070 }
4071 }
4072
4073 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
4074 TEST_REQUIRES_X86_AVX;
4075 for (size_t channels = 1; channels <= 40; channels += 7) {
4076 DWConvMicrokernelTester()
4077 .cr(8)
4078 .kr(9)
4079 .channels(channels)
4080 .width(3)
4081 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004082 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004083 }
4084 }
4085
4086 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, input_offset) {
4087 TEST_REQUIRES_X86_AVX;
4088 for (uint32_t channels = 16; channels < 128; channels += 24) {
4089 DWConvMicrokernelTester()
4090 .cr(8)
4091 .kr(9)
4092 .channels(channels)
4093 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08004094 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004095 }
4096 }
4097
4098 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, zero) {
4099 TEST_REQUIRES_X86_AVX;
4100 for (uint32_t mz = 0; mz < 9; mz++) {
4101 for (uint32_t channels = 16; channels < 128; channels += 24) {
4102 DWConvMicrokernelTester()
4103 .cr(8)
4104 .kr(9)
4105 .channels(channels)
4106 .input_offset(176)
4107 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004108 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004109 }
4110 }
4111 }
4112#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4113
4114
4115#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4116 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_eq_16) {
4117 TEST_REQUIRES_X86_AVX;
4118 DWConvMicrokernelTester()
4119 .cr(16)
4120 .kr(9)
4121 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004122 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004123 }
4124
4125 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16) {
4126 TEST_REQUIRES_X86_AVX;
4127 for (uint32_t channels = 32; channels < 256; channels += 48) {
4128 DWConvMicrokernelTester()
4129 .cr(16)
4130 .kr(9)
4131 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004132 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004133 }
4134 }
4135
4136 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmin) {
4137 TEST_REQUIRES_X86_AVX;
4138 for (uint32_t channels = 32; channels < 256; channels += 48) {
4139 DWConvMicrokernelTester()
4140 .cr(16)
4141 .kr(9)
4142 .channels(channels)
4143 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004144 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004145 }
4146 }
4147
4148 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmax) {
4149 TEST_REQUIRES_X86_AVX;
4150 for (uint32_t channels = 32; channels < 256; channels += 48) {
4151 DWConvMicrokernelTester()
4152 .cr(16)
4153 .kr(9)
4154 .channels(channels)
4155 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004156 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004157 }
4158 }
4159
4160 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_lt_16) {
4161 TEST_REQUIRES_X86_AVX;
4162 for (uint32_t channels = 1; channels < 16; channels++) {
4163 DWConvMicrokernelTester()
4164 .cr(16)
4165 .kr(9)
4166 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004167 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004168 }
4169 }
4170
4171 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16) {
4172 TEST_REQUIRES_X86_AVX;
4173 for (uint32_t channels = 17; channels < 32; channels++) {
4174 DWConvMicrokernelTester()
4175 .cr(16)
4176 .kr(9)
4177 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004178 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004179 }
4180 }
4181
4182 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
4183 TEST_REQUIRES_X86_AVX;
4184 for (uint32_t channels = 17; channels < 32; channels++) {
4185 DWConvMicrokernelTester()
4186 .cr(16)
4187 .kr(9)
4188 .channels(channels)
4189 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004190 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004191 }
4192 }
4193
4194 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
4195 TEST_REQUIRES_X86_AVX;
4196 for (uint32_t channels = 17; channels < 32; channels++) {
4197 DWConvMicrokernelTester()
4198 .cr(16)
4199 .kr(9)
4200 .channels(channels)
4201 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004202 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004203 }
4204 }
4205
4206 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel) {
4207 TEST_REQUIRES_X86_AVX;
4208 for (size_t channels = 1; channels <= 80; channels += 15) {
4209 DWConvMicrokernelTester()
4210 .cr(16)
4211 .kr(9)
4212 .channels(channels)
4213 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004214 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004215 }
4216 }
4217
4218 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_step) {
4219 TEST_REQUIRES_X86_AVX;
4220 for (size_t channels = 1; channels <= 80; channels += 15) {
4221 for (size_t step = 2; step <= 9; step++) {
4222 DWConvMicrokernelTester()
4223 .cr(16)
4224 .kr(9)
4225 .channels(channels)
4226 .width(3)
4227 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004228 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004229 }
4230 }
4231 }
4232
4233 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
4234 TEST_REQUIRES_X86_AVX;
4235 for (size_t channels = 1; channels <= 80; channels += 15) {
4236 DWConvMicrokernelTester()
4237 .cr(16)
4238 .kr(9)
4239 .channels(16)
4240 .width(5)
4241 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004242 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004243 }
4244 }
4245
4246 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
4247 TEST_REQUIRES_X86_AVX;
4248 for (size_t channels = 1; channels <= 80; channels += 15) {
4249 DWConvMicrokernelTester()
4250 .cr(16)
4251 .kr(9)
4252 .channels(channels)
4253 .width(3)
4254 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004255 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004256 }
4257 }
4258
4259 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
4260 TEST_REQUIRES_X86_AVX;
4261 for (size_t channels = 1; channels <= 80; channels += 15) {
4262 DWConvMicrokernelTester()
4263 .cr(16)
4264 .kr(9)
4265 .channels(channels)
4266 .width(3)
4267 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004268 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004269 }
4270 }
4271
4272 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, input_offset) {
4273 TEST_REQUIRES_X86_AVX;
4274 for (uint32_t channels = 32; channels < 256; channels += 48) {
4275 DWConvMicrokernelTester()
4276 .cr(16)
4277 .kr(9)
4278 .channels(channels)
4279 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08004280 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004281 }
4282 }
4283
4284 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, zero) {
4285 TEST_REQUIRES_X86_AVX;
4286 for (uint32_t mz = 0; mz < 9; mz++) {
4287 for (uint32_t channels = 32; channels < 256; channels += 48) {
4288 DWConvMicrokernelTester()
4289 .cr(16)
4290 .kr(9)
4291 .channels(channels)
4292 .input_offset(304)
4293 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004294 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004295 }
4296 }
4297 }
4298#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4299
4300
4301#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4302 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_eq_8) {
4303 TEST_REQUIRES_X86_XOP;
4304 DWConvMicrokernelTester()
4305 .cr(8)
4306 .kr(9)
4307 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004308 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004309 }
4310
4311 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8) {
4312 TEST_REQUIRES_X86_XOP;
4313 for (uint32_t channels = 16; channels < 128; channels += 24) {
4314 DWConvMicrokernelTester()
4315 .cr(8)
4316 .kr(9)
4317 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004318 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004319 }
4320 }
4321
4322 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmin) {
4323 TEST_REQUIRES_X86_XOP;
4324 for (uint32_t channels = 16; channels < 128; channels += 24) {
4325 DWConvMicrokernelTester()
4326 .cr(8)
4327 .kr(9)
4328 .channels(channels)
4329 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004330 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004331 }
4332 }
4333
4334 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmax) {
4335 TEST_REQUIRES_X86_XOP;
4336 for (uint32_t channels = 16; channels < 128; channels += 24) {
4337 DWConvMicrokernelTester()
4338 .cr(8)
4339 .kr(9)
4340 .channels(channels)
4341 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004342 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004343 }
4344 }
4345
4346 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_lt_8) {
4347 TEST_REQUIRES_X86_XOP;
4348 for (uint32_t channels = 1; channels < 8; channels++) {
4349 DWConvMicrokernelTester()
4350 .cr(8)
4351 .kr(9)
4352 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004353 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004354 }
4355 }
4356
4357 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8) {
4358 TEST_REQUIRES_X86_XOP;
4359 for (uint32_t channels = 9; channels < 16; channels++) {
4360 DWConvMicrokernelTester()
4361 .cr(8)
4362 .kr(9)
4363 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004364 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004365 }
4366 }
4367
4368 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
4369 TEST_REQUIRES_X86_XOP;
4370 for (uint32_t channels = 9; channels < 16; channels++) {
4371 DWConvMicrokernelTester()
4372 .cr(8)
4373 .kr(9)
4374 .channels(channels)
4375 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004376 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004377 }
4378 }
4379
4380 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
4381 TEST_REQUIRES_X86_XOP;
4382 for (uint32_t channels = 9; channels < 16; channels++) {
4383 DWConvMicrokernelTester()
4384 .cr(8)
4385 .kr(9)
4386 .channels(channels)
4387 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004388 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004389 }
4390 }
4391
4392 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel) {
4393 TEST_REQUIRES_X86_XOP;
4394 for (size_t channels = 1; channels <= 40; channels += 7) {
4395 DWConvMicrokernelTester()
4396 .cr(8)
4397 .kr(9)
4398 .channels(channels)
4399 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004400 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004401 }
4402 }
4403
4404 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_step) {
4405 TEST_REQUIRES_X86_XOP;
4406 for (size_t channels = 1; channels <= 40; channels += 7) {
4407 for (size_t step = 2; step <= 9; step++) {
4408 DWConvMicrokernelTester()
4409 .cr(8)
4410 .kr(9)
4411 .channels(channels)
4412 .width(3)
4413 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004414 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004415 }
4416 }
4417 }
4418
4419 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
4420 TEST_REQUIRES_X86_XOP;
4421 for (size_t channels = 1; channels <= 40; channels += 7) {
4422 DWConvMicrokernelTester()
4423 .cr(8)
4424 .kr(9)
4425 .channels(8)
4426 .width(5)
4427 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004428 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004429 }
4430 }
4431
4432 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
4433 TEST_REQUIRES_X86_XOP;
4434 for (size_t channels = 1; channels <= 40; channels += 7) {
4435 DWConvMicrokernelTester()
4436 .cr(8)
4437 .kr(9)
4438 .channels(channels)
4439 .width(3)
4440 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004441 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004442 }
4443 }
4444
4445 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
4446 TEST_REQUIRES_X86_XOP;
4447 for (size_t channels = 1; channels <= 40; channels += 7) {
4448 DWConvMicrokernelTester()
4449 .cr(8)
4450 .kr(9)
4451 .channels(channels)
4452 .width(3)
4453 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004454 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004455 }
4456 }
4457
4458 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, input_offset) {
4459 TEST_REQUIRES_X86_XOP;
4460 for (uint32_t channels = 16; channels < 128; channels += 24) {
4461 DWConvMicrokernelTester()
4462 .cr(8)
4463 .kr(9)
4464 .channels(channels)
4465 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08004466 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004467 }
4468 }
4469
4470 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, zero) {
4471 TEST_REQUIRES_X86_XOP;
4472 for (uint32_t mz = 0; mz < 9; mz++) {
4473 for (uint32_t channels = 16; channels < 128; channels += 24) {
4474 DWConvMicrokernelTester()
4475 .cr(8)
4476 .kr(9)
4477 .channels(channels)
4478 .input_offset(176)
4479 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004480 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004481 }
4482 }
4483 }
4484#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4485
4486
4487#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4488 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_eq_16) {
4489 TEST_REQUIRES_X86_XOP;
4490 DWConvMicrokernelTester()
4491 .cr(16)
4492 .kr(9)
4493 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004494 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004495 }
4496
4497 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16) {
4498 TEST_REQUIRES_X86_XOP;
4499 for (uint32_t channels = 32; channels < 256; channels += 48) {
4500 DWConvMicrokernelTester()
4501 .cr(16)
4502 .kr(9)
4503 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004504 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004505 }
4506 }
4507
4508 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmin) {
4509 TEST_REQUIRES_X86_XOP;
4510 for (uint32_t channels = 32; channels < 256; channels += 48) {
4511 DWConvMicrokernelTester()
4512 .cr(16)
4513 .kr(9)
4514 .channels(channels)
4515 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004516 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004517 }
4518 }
4519
4520 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmax) {
4521 TEST_REQUIRES_X86_XOP;
4522 for (uint32_t channels = 32; channels < 256; channels += 48) {
4523 DWConvMicrokernelTester()
4524 .cr(16)
4525 .kr(9)
4526 .channels(channels)
4527 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004528 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004529 }
4530 }
4531
4532 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_lt_16) {
4533 TEST_REQUIRES_X86_XOP;
4534 for (uint32_t channels = 1; channels < 16; channels++) {
4535 DWConvMicrokernelTester()
4536 .cr(16)
4537 .kr(9)
4538 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004539 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004540 }
4541 }
4542
4543 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16) {
4544 TEST_REQUIRES_X86_XOP;
4545 for (uint32_t channels = 17; channels < 32; channels++) {
4546 DWConvMicrokernelTester()
4547 .cr(16)
4548 .kr(9)
4549 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004550 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004551 }
4552 }
4553
4554 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
4555 TEST_REQUIRES_X86_XOP;
4556 for (uint32_t channels = 17; channels < 32; channels++) {
4557 DWConvMicrokernelTester()
4558 .cr(16)
4559 .kr(9)
4560 .channels(channels)
4561 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004562 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004563 }
4564 }
4565
4566 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
4567 TEST_REQUIRES_X86_XOP;
4568 for (uint32_t channels = 17; channels < 32; channels++) {
4569 DWConvMicrokernelTester()
4570 .cr(16)
4571 .kr(9)
4572 .channels(channels)
4573 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004574 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004575 }
4576 }
4577
4578 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel) {
4579 TEST_REQUIRES_X86_XOP;
4580 for (size_t channels = 1; channels <= 80; channels += 15) {
4581 DWConvMicrokernelTester()
4582 .cr(16)
4583 .kr(9)
4584 .channels(channels)
4585 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004586 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004587 }
4588 }
4589
4590 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_step) {
4591 TEST_REQUIRES_X86_XOP;
4592 for (size_t channels = 1; channels <= 80; channels += 15) {
4593 for (size_t step = 2; step <= 9; step++) {
4594 DWConvMicrokernelTester()
4595 .cr(16)
4596 .kr(9)
4597 .channels(channels)
4598 .width(3)
4599 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004600 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004601 }
4602 }
4603 }
4604
4605 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
4606 TEST_REQUIRES_X86_XOP;
4607 for (size_t channels = 1; channels <= 80; channels += 15) {
4608 DWConvMicrokernelTester()
4609 .cr(16)
4610 .kr(9)
4611 .channels(16)
4612 .width(5)
4613 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004614 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004615 }
4616 }
4617
4618 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
4619 TEST_REQUIRES_X86_XOP;
4620 for (size_t channels = 1; channels <= 80; channels += 15) {
4621 DWConvMicrokernelTester()
4622 .cr(16)
4623 .kr(9)
4624 .channels(channels)
4625 .width(3)
4626 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004627 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004628 }
4629 }
4630
4631 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
4632 TEST_REQUIRES_X86_XOP;
4633 for (size_t channels = 1; channels <= 80; channels += 15) {
4634 DWConvMicrokernelTester()
4635 .cr(16)
4636 .kr(9)
4637 .channels(channels)
4638 .width(3)
4639 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004640 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004641 }
4642 }
4643
4644 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, input_offset) {
4645 TEST_REQUIRES_X86_XOP;
4646 for (uint32_t channels = 32; channels < 256; channels += 48) {
4647 DWConvMicrokernelTester()
4648 .cr(16)
4649 .kr(9)
4650 .channels(channels)
4651 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08004652 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004653 }
4654 }
4655
4656 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, zero) {
4657 TEST_REQUIRES_X86_XOP;
4658 for (uint32_t mz = 0; mz < 9; mz++) {
4659 for (uint32_t channels = 32; channels < 256; channels += 48) {
4660 DWConvMicrokernelTester()
4661 .cr(16)
4662 .kr(9)
4663 .channels(channels)
4664 .input_offset(304)
4665 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004666 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004667 }
4668 }
4669 }
4670#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4671
4672
4673#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan881ab022021-07-28 13:49:26 -07004674 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_eq_16) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004675 TEST_REQUIRES_X86_AVX2;
4676 DWConvMicrokernelTester()
4677 .cr(16)
4678 .kr(9)
4679 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004680 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004681 }
4682
Marat Dukhan881ab022021-07-28 13:49:26 -07004683 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004684 TEST_REQUIRES_X86_AVX2;
4685 for (uint32_t channels = 32; channels < 256; channels += 48) {
4686 DWConvMicrokernelTester()
4687 .cr(16)
4688 .kr(9)
4689 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004690 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004691 }
4692 }
4693
Marat Dukhan881ab022021-07-28 13:49:26 -07004694 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004695 TEST_REQUIRES_X86_AVX2;
4696 for (uint32_t channels = 32; channels < 256; channels += 48) {
4697 DWConvMicrokernelTester()
4698 .cr(16)
4699 .kr(9)
4700 .channels(channels)
4701 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004702 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004703 }
4704 }
4705
Marat Dukhan881ab022021-07-28 13:49:26 -07004706 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004707 TEST_REQUIRES_X86_AVX2;
4708 for (uint32_t channels = 32; channels < 256; channels += 48) {
4709 DWConvMicrokernelTester()
4710 .cr(16)
4711 .kr(9)
4712 .channels(channels)
4713 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004714 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004715 }
4716 }
4717
Marat Dukhan881ab022021-07-28 13:49:26 -07004718 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_lt_16) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004719 TEST_REQUIRES_X86_AVX2;
4720 for (uint32_t channels = 1; channels < 16; channels++) {
4721 DWConvMicrokernelTester()
4722 .cr(16)
4723 .kr(9)
4724 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004725 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004726 }
4727 }
4728
Marat Dukhan881ab022021-07-28 13:49:26 -07004729 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004730 TEST_REQUIRES_X86_AVX2;
4731 for (uint32_t channels = 17; channels < 32; channels++) {
4732 DWConvMicrokernelTester()
4733 .cr(16)
4734 .kr(9)
4735 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004736 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004737 }
4738 }
4739
Marat Dukhan881ab022021-07-28 13:49:26 -07004740 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004741 TEST_REQUIRES_X86_AVX2;
4742 for (uint32_t channels = 17; channels < 32; channels++) {
4743 DWConvMicrokernelTester()
4744 .cr(16)
4745 .kr(9)
4746 .channels(channels)
4747 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004748 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004749 }
4750 }
4751
Marat Dukhan881ab022021-07-28 13:49:26 -07004752 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004753 TEST_REQUIRES_X86_AVX2;
4754 for (uint32_t channels = 17; channels < 32; channels++) {
4755 DWConvMicrokernelTester()
4756 .cr(16)
4757 .kr(9)
4758 .channels(channels)
4759 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004760 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004761 }
4762 }
4763
Marat Dukhan881ab022021-07-28 13:49:26 -07004764 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004765 TEST_REQUIRES_X86_AVX2;
4766 for (size_t channels = 1; channels <= 80; channels += 15) {
4767 DWConvMicrokernelTester()
4768 .cr(16)
4769 .kr(9)
4770 .channels(channels)
4771 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004772 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004773 }
4774 }
4775
Marat Dukhan881ab022021-07-28 13:49:26 -07004776 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004777 TEST_REQUIRES_X86_AVX2;
4778 for (size_t channels = 1; channels <= 80; channels += 15) {
4779 for (size_t step = 2; step <= 9; step++) {
4780 DWConvMicrokernelTester()
4781 .cr(16)
4782 .kr(9)
4783 .channels(channels)
4784 .width(3)
4785 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004786 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004787 }
4788 }
4789 }
4790
Marat Dukhan881ab022021-07-28 13:49:26 -07004791 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004792 TEST_REQUIRES_X86_AVX2;
4793 for (size_t channels = 1; channels <= 80; channels += 15) {
4794 DWConvMicrokernelTester()
4795 .cr(16)
4796 .kr(9)
4797 .channels(16)
4798 .width(5)
4799 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004800 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004801 }
4802 }
4803
Marat Dukhan881ab022021-07-28 13:49:26 -07004804 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004805 TEST_REQUIRES_X86_AVX2;
4806 for (size_t channels = 1; channels <= 80; channels += 15) {
4807 DWConvMicrokernelTester()
4808 .cr(16)
4809 .kr(9)
4810 .channels(channels)
4811 .width(3)
4812 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004813 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004814 }
4815 }
4816
Marat Dukhan881ab022021-07-28 13:49:26 -07004817 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004818 TEST_REQUIRES_X86_AVX2;
4819 for (size_t channels = 1; channels <= 80; channels += 15) {
4820 DWConvMicrokernelTester()
4821 .cr(16)
4822 .kr(9)
4823 .channels(channels)
4824 .width(3)
4825 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004826 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004827 }
4828 }
4829
Marat Dukhan881ab022021-07-28 13:49:26 -07004830 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, input_offset) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004831 TEST_REQUIRES_X86_AVX2;
4832 for (uint32_t channels = 32; channels < 256; channels += 48) {
4833 DWConvMicrokernelTester()
4834 .cr(16)
4835 .kr(9)
4836 .channels(channels)
4837 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08004838 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004839 }
4840 }
4841
Marat Dukhan881ab022021-07-28 13:49:26 -07004842 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, zero) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004843 TEST_REQUIRES_X86_AVX2;
4844 for (uint32_t mz = 0; mz < 9; mz++) {
4845 for (uint32_t channels = 32; channels < 256; channels += 48) {
4846 DWConvMicrokernelTester()
4847 .cr(16)
4848 .kr(9)
4849 .channels(channels)
4850 .input_offset(304)
4851 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004852 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004853 }
4854 }
4855 }
4856#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4857
4858
4859#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan881ab022021-07-28 13:49:26 -07004860 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_eq_32) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004861 TEST_REQUIRES_X86_AVX2;
4862 DWConvMicrokernelTester()
4863 .cr(32)
4864 .kr(9)
4865 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08004866 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004867 }
4868
Marat Dukhan881ab022021-07-28 13:49:26 -07004869 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004870 TEST_REQUIRES_X86_AVX2;
4871 for (uint32_t channels = 64; channels < 512; channels += 96) {
4872 DWConvMicrokernelTester()
4873 .cr(32)
4874 .kr(9)
4875 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004876 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004877 }
4878 }
4879
Marat Dukhan881ab022021-07-28 13:49:26 -07004880 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004881 TEST_REQUIRES_X86_AVX2;
4882 for (uint32_t channels = 64; channels < 512; channels += 96) {
4883 DWConvMicrokernelTester()
4884 .cr(32)
4885 .kr(9)
4886 .channels(channels)
4887 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004888 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004889 }
4890 }
4891
Marat Dukhan881ab022021-07-28 13:49:26 -07004892 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004893 TEST_REQUIRES_X86_AVX2;
4894 for (uint32_t channels = 64; channels < 512; channels += 96) {
4895 DWConvMicrokernelTester()
4896 .cr(32)
4897 .kr(9)
4898 .channels(channels)
4899 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004900 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004901 }
4902 }
4903
Marat Dukhan881ab022021-07-28 13:49:26 -07004904 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_lt_32) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004905 TEST_REQUIRES_X86_AVX2;
4906 for (uint32_t channels = 1; channels < 32; channels++) {
4907 DWConvMicrokernelTester()
4908 .cr(32)
4909 .kr(9)
4910 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004911 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004912 }
4913 }
4914
Marat Dukhan881ab022021-07-28 13:49:26 -07004915 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004916 TEST_REQUIRES_X86_AVX2;
4917 for (uint32_t channels = 33; channels < 64; channels++) {
4918 DWConvMicrokernelTester()
4919 .cr(32)
4920 .kr(9)
4921 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004922 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004923 }
4924 }
4925
Marat Dukhan881ab022021-07-28 13:49:26 -07004926 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004927 TEST_REQUIRES_X86_AVX2;
4928 for (uint32_t channels = 33; channels < 64; channels++) {
4929 DWConvMicrokernelTester()
4930 .cr(32)
4931 .kr(9)
4932 .channels(channels)
4933 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004934 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004935 }
4936 }
4937
Marat Dukhan881ab022021-07-28 13:49:26 -07004938 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004939 TEST_REQUIRES_X86_AVX2;
4940 for (uint32_t channels = 33; channels < 64; channels++) {
4941 DWConvMicrokernelTester()
4942 .cr(32)
4943 .kr(9)
4944 .channels(channels)
4945 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004946 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004947 }
4948 }
4949
Marat Dukhan881ab022021-07-28 13:49:26 -07004950 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004951 TEST_REQUIRES_X86_AVX2;
4952 for (size_t channels = 1; channels <= 160; channels += 31) {
4953 DWConvMicrokernelTester()
4954 .cr(32)
4955 .kr(9)
4956 .channels(channels)
4957 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004958 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004959 }
4960 }
4961
Marat Dukhan881ab022021-07-28 13:49:26 -07004962 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004963 TEST_REQUIRES_X86_AVX2;
4964 for (size_t channels = 1; channels <= 160; channels += 31) {
4965 for (size_t step = 2; step <= 9; step++) {
4966 DWConvMicrokernelTester()
4967 .cr(32)
4968 .kr(9)
4969 .channels(channels)
4970 .width(3)
4971 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004972 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004973 }
4974 }
4975 }
4976
Marat Dukhan881ab022021-07-28 13:49:26 -07004977 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004978 TEST_REQUIRES_X86_AVX2;
4979 for (size_t channels = 1; channels <= 160; channels += 31) {
4980 DWConvMicrokernelTester()
4981 .cr(32)
4982 .kr(9)
4983 .channels(32)
4984 .width(5)
4985 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004986 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004987 }
4988 }
4989
Marat Dukhan881ab022021-07-28 13:49:26 -07004990 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07004991 TEST_REQUIRES_X86_AVX2;
4992 for (size_t channels = 1; channels <= 160; channels += 31) {
4993 DWConvMicrokernelTester()
4994 .cr(32)
4995 .kr(9)
4996 .channels(channels)
4997 .width(3)
4998 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004999 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07005000 }
5001 }
5002
Marat Dukhan881ab022021-07-28 13:49:26 -07005003 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07005004 TEST_REQUIRES_X86_AVX2;
5005 for (size_t channels = 1; channels <= 160; channels += 31) {
5006 DWConvMicrokernelTester()
5007 .cr(32)
5008 .kr(9)
5009 .channels(channels)
5010 .width(3)
5011 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005012 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07005013 }
5014 }
5015
Marat Dukhan881ab022021-07-28 13:49:26 -07005016 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, input_offset) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07005017 TEST_REQUIRES_X86_AVX2;
5018 for (uint32_t channels = 64; channels < 512; channels += 96) {
5019 DWConvMicrokernelTester()
5020 .cr(32)
5021 .kr(9)
5022 .channels(channels)
5023 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08005024 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07005025 }
5026 }
5027
Marat Dukhan881ab022021-07-28 13:49:26 -07005028 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, zero) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -07005029 TEST_REQUIRES_X86_AVX2;
5030 for (uint32_t mz = 0; mz < 9; mz++) {
5031 for (uint32_t channels = 64; channels < 512; channels += 96) {
5032 DWConvMicrokernelTester()
5033 .cr(32)
5034 .kr(9)
5035 .channels(channels)
5036 .input_offset(592)
5037 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005038 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005039 }
5040 }
5041 }
5042#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5043
5044
5045#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5046 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_eq_16) {
5047 TEST_REQUIRES_X86_AVX2;
5048 DWConvMicrokernelTester()
5049 .cr(16)
5050 .kr(9)
5051 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005052 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005053 }
5054
5055 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16) {
5056 TEST_REQUIRES_X86_AVX2;
5057 for (uint32_t channels = 32; channels < 256; channels += 48) {
5058 DWConvMicrokernelTester()
5059 .cr(16)
5060 .kr(9)
5061 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005062 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005063 }
5064 }
5065
5066 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
5067 TEST_REQUIRES_X86_AVX2;
5068 for (uint32_t channels = 32; channels < 256; channels += 48) {
5069 DWConvMicrokernelTester()
5070 .cr(16)
5071 .kr(9)
5072 .channels(channels)
5073 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005074 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005075 }
5076 }
5077
5078 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
5079 TEST_REQUIRES_X86_AVX2;
5080 for (uint32_t channels = 32; channels < 256; channels += 48) {
5081 DWConvMicrokernelTester()
5082 .cr(16)
5083 .kr(9)
5084 .channels(channels)
5085 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005086 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005087 }
5088 }
5089
5090 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_lt_16) {
5091 TEST_REQUIRES_X86_AVX2;
5092 for (uint32_t channels = 1; channels < 16; channels++) {
5093 DWConvMicrokernelTester()
5094 .cr(16)
5095 .kr(9)
5096 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005097 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005098 }
5099 }
5100
5101 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16) {
5102 TEST_REQUIRES_X86_AVX2;
5103 for (uint32_t channels = 17; channels < 32; channels++) {
5104 DWConvMicrokernelTester()
5105 .cr(16)
5106 .kr(9)
5107 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005108 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005109 }
5110 }
5111
5112 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
5113 TEST_REQUIRES_X86_AVX2;
5114 for (uint32_t channels = 17; channels < 32; channels++) {
5115 DWConvMicrokernelTester()
5116 .cr(16)
5117 .kr(9)
5118 .channels(channels)
5119 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005120 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005121 }
5122 }
5123
5124 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
5125 TEST_REQUIRES_X86_AVX2;
5126 for (uint32_t channels = 17; channels < 32; channels++) {
5127 DWConvMicrokernelTester()
5128 .cr(16)
5129 .kr(9)
5130 .channels(channels)
5131 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005132 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005133 }
5134 }
5135
5136 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel) {
5137 TEST_REQUIRES_X86_AVX2;
5138 for (size_t channels = 1; channels <= 80; channels += 15) {
5139 DWConvMicrokernelTester()
5140 .cr(16)
5141 .kr(9)
5142 .channels(channels)
5143 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005144 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005145 }
5146 }
5147
5148 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
5149 TEST_REQUIRES_X86_AVX2;
5150 for (size_t channels = 1; channels <= 80; channels += 15) {
5151 for (size_t step = 2; step <= 9; step++) {
5152 DWConvMicrokernelTester()
5153 .cr(16)
5154 .kr(9)
5155 .channels(channels)
5156 .width(3)
5157 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005158 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005159 }
5160 }
5161 }
5162
5163 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
5164 TEST_REQUIRES_X86_AVX2;
5165 for (size_t channels = 1; channels <= 80; channels += 15) {
5166 DWConvMicrokernelTester()
5167 .cr(16)
5168 .kr(9)
5169 .channels(16)
5170 .width(5)
5171 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005172 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005173 }
5174 }
5175
5176 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
5177 TEST_REQUIRES_X86_AVX2;
5178 for (size_t channels = 1; channels <= 80; channels += 15) {
5179 DWConvMicrokernelTester()
5180 .cr(16)
5181 .kr(9)
5182 .channels(channels)
5183 .width(3)
5184 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005185 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005186 }
5187 }
5188
5189 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
5190 TEST_REQUIRES_X86_AVX2;
5191 for (size_t channels = 1; channels <= 80; channels += 15) {
5192 DWConvMicrokernelTester()
5193 .cr(16)
5194 .kr(9)
5195 .channels(channels)
5196 .width(3)
5197 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005198 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005199 }
5200 }
5201
5202 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, input_offset) {
5203 TEST_REQUIRES_X86_AVX2;
5204 for (uint32_t channels = 32; channels < 256; channels += 48) {
5205 DWConvMicrokernelTester()
5206 .cr(16)
5207 .kr(9)
5208 .channels(channels)
5209 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08005210 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005211 }
5212 }
5213
5214 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, zero) {
5215 TEST_REQUIRES_X86_AVX2;
5216 for (uint32_t mz = 0; mz < 9; mz++) {
5217 for (uint32_t channels = 32; channels < 256; channels += 48) {
5218 DWConvMicrokernelTester()
5219 .cr(16)
5220 .kr(9)
5221 .channels(channels)
5222 .input_offset(304)
5223 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005224 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005225 }
5226 }
5227 }
5228#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5229
5230
5231#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5232 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_eq_32) {
5233 TEST_REQUIRES_X86_AVX2;
5234 DWConvMicrokernelTester()
5235 .cr(32)
5236 .kr(9)
5237 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08005238 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005239 }
5240
5241 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32) {
5242 TEST_REQUIRES_X86_AVX2;
5243 for (uint32_t channels = 64; channels < 512; channels += 96) {
5244 DWConvMicrokernelTester()
5245 .cr(32)
5246 .kr(9)
5247 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005248 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005249 }
5250 }
5251
5252 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
5253 TEST_REQUIRES_X86_AVX2;
5254 for (uint32_t channels = 64; channels < 512; channels += 96) {
5255 DWConvMicrokernelTester()
5256 .cr(32)
5257 .kr(9)
5258 .channels(channels)
5259 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005260 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005261 }
5262 }
5263
5264 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
5265 TEST_REQUIRES_X86_AVX2;
5266 for (uint32_t channels = 64; channels < 512; channels += 96) {
5267 DWConvMicrokernelTester()
5268 .cr(32)
5269 .kr(9)
5270 .channels(channels)
5271 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005272 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005273 }
5274 }
5275
5276 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_lt_32) {
5277 TEST_REQUIRES_X86_AVX2;
5278 for (uint32_t channels = 1; channels < 32; channels++) {
5279 DWConvMicrokernelTester()
5280 .cr(32)
5281 .kr(9)
5282 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005283 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005284 }
5285 }
5286
5287 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32) {
5288 TEST_REQUIRES_X86_AVX2;
5289 for (uint32_t channels = 33; channels < 64; channels++) {
5290 DWConvMicrokernelTester()
5291 .cr(32)
5292 .kr(9)
5293 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005294 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005295 }
5296 }
5297
5298 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
5299 TEST_REQUIRES_X86_AVX2;
5300 for (uint32_t channels = 33; channels < 64; channels++) {
5301 DWConvMicrokernelTester()
5302 .cr(32)
5303 .kr(9)
5304 .channels(channels)
5305 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005306 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005307 }
5308 }
5309
5310 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
5311 TEST_REQUIRES_X86_AVX2;
5312 for (uint32_t channels = 33; channels < 64; channels++) {
5313 DWConvMicrokernelTester()
5314 .cr(32)
5315 .kr(9)
5316 .channels(channels)
5317 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005318 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005319 }
5320 }
5321
5322 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel) {
5323 TEST_REQUIRES_X86_AVX2;
5324 for (size_t channels = 1; channels <= 160; channels += 31) {
5325 DWConvMicrokernelTester()
5326 .cr(32)
5327 .kr(9)
5328 .channels(channels)
5329 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005330 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005331 }
5332 }
5333
5334 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
5335 TEST_REQUIRES_X86_AVX2;
5336 for (size_t channels = 1; channels <= 160; channels += 31) {
5337 for (size_t step = 2; step <= 9; step++) {
5338 DWConvMicrokernelTester()
5339 .cr(32)
5340 .kr(9)
5341 .channels(channels)
5342 .width(3)
5343 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005344 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005345 }
5346 }
5347 }
5348
5349 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
5350 TEST_REQUIRES_X86_AVX2;
5351 for (size_t channels = 1; channels <= 160; channels += 31) {
5352 DWConvMicrokernelTester()
5353 .cr(32)
5354 .kr(9)
5355 .channels(32)
5356 .width(5)
5357 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08005358 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005359 }
5360 }
5361
5362 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
5363 TEST_REQUIRES_X86_AVX2;
5364 for (size_t channels = 1; channels <= 160; channels += 31) {
5365 DWConvMicrokernelTester()
5366 .cr(32)
5367 .kr(9)
5368 .channels(channels)
5369 .width(3)
5370 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005371 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005372 }
5373 }
5374
5375 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
5376 TEST_REQUIRES_X86_AVX2;
5377 for (size_t channels = 1; channels <= 160; channels += 31) {
5378 DWConvMicrokernelTester()
5379 .cr(32)
5380 .kr(9)
5381 .channels(channels)
5382 .width(3)
5383 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005384 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005385 }
5386 }
5387
5388 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, input_offset) {
5389 TEST_REQUIRES_X86_AVX2;
5390 for (uint32_t channels = 64; channels < 512; channels += 96) {
5391 DWConvMicrokernelTester()
5392 .cr(32)
5393 .kr(9)
5394 .channels(channels)
5395 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08005396 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07005397 }
5398 }
5399
5400 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, zero) {
5401 TEST_REQUIRES_X86_AVX2;
5402 for (uint32_t mz = 0; mz < 9; mz++) {
5403 for (uint32_t channels = 64; channels < 512; channels += 96) {
5404 DWConvMicrokernelTester()
5405 .cr(32)
5406 .kr(9)
5407 .channels(channels)
5408 .input_offset(592)
5409 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005410 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07005411 }
5412 }
5413 }
5414#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5415
5416
5417#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005418 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
5419 TEST_REQUIRES_X86_AVX2;
5420 DWConvMicrokernelTester()
5421 .cr(16)
5422 .kr(9)
5423 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005424 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005425 }
5426
5427 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
5428 TEST_REQUIRES_X86_AVX2;
5429 for (uint32_t channels = 32; channels < 256; channels += 48) {
5430 DWConvMicrokernelTester()
5431 .cr(16)
5432 .kr(9)
5433 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005434 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005435 }
5436 }
5437
5438 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
5439 TEST_REQUIRES_X86_AVX2;
5440 for (uint32_t channels = 32; channels < 256; channels += 48) {
5441 DWConvMicrokernelTester()
5442 .cr(16)
5443 .kr(9)
5444 .channels(channels)
5445 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005446 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005447 }
5448 }
5449
5450 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
5451 TEST_REQUIRES_X86_AVX2;
5452 for (uint32_t channels = 32; channels < 256; channels += 48) {
5453 DWConvMicrokernelTester()
5454 .cr(16)
5455 .kr(9)
5456 .channels(channels)
5457 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005458 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005459 }
5460 }
5461
5462 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
5463 TEST_REQUIRES_X86_AVX2;
5464 for (uint32_t channels = 1; channels < 16; channels++) {
5465 DWConvMicrokernelTester()
5466 .cr(16)
5467 .kr(9)
5468 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005469 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005470 }
5471 }
5472
5473 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
5474 TEST_REQUIRES_X86_AVX2;
5475 for (uint32_t channels = 17; channels < 32; channels++) {
5476 DWConvMicrokernelTester()
5477 .cr(16)
5478 .kr(9)
5479 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005480 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005481 }
5482 }
5483
5484 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
5485 TEST_REQUIRES_X86_AVX2;
5486 for (uint32_t channels = 17; channels < 32; channels++) {
5487 DWConvMicrokernelTester()
5488 .cr(16)
5489 .kr(9)
5490 .channels(channels)
5491 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005492 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005493 }
5494 }
5495
5496 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
5497 TEST_REQUIRES_X86_AVX2;
5498 for (uint32_t channels = 17; channels < 32; channels++) {
5499 DWConvMicrokernelTester()
5500 .cr(16)
5501 .kr(9)
5502 .channels(channels)
5503 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005504 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005505 }
5506 }
5507
5508 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
5509 TEST_REQUIRES_X86_AVX2;
5510 for (size_t channels = 1; channels <= 80; channels += 15) {
5511 DWConvMicrokernelTester()
5512 .cr(16)
5513 .kr(9)
5514 .channels(channels)
5515 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005516 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005517 }
5518 }
5519
5520 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
5521 TEST_REQUIRES_X86_AVX2;
5522 for (size_t channels = 1; channels <= 80; channels += 15) {
5523 for (size_t step = 2; step <= 9; step++) {
5524 DWConvMicrokernelTester()
5525 .cr(16)
5526 .kr(9)
5527 .channels(channels)
5528 .width(3)
5529 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005530 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005531 }
5532 }
5533 }
5534
5535 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
5536 TEST_REQUIRES_X86_AVX2;
5537 for (size_t channels = 1; channels <= 80; channels += 15) {
5538 DWConvMicrokernelTester()
5539 .cr(16)
5540 .kr(9)
5541 .channels(16)
5542 .width(5)
5543 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005544 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005545 }
5546 }
5547
5548 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
5549 TEST_REQUIRES_X86_AVX2;
5550 for (size_t channels = 1; channels <= 80; channels += 15) {
5551 DWConvMicrokernelTester()
5552 .cr(16)
5553 .kr(9)
5554 .channels(channels)
5555 .width(3)
5556 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005557 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005558 }
5559 }
5560
5561 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
5562 TEST_REQUIRES_X86_AVX2;
5563 for (size_t channels = 1; channels <= 80; channels += 15) {
5564 DWConvMicrokernelTester()
5565 .cr(16)
5566 .kr(9)
5567 .channels(channels)
5568 .width(3)
5569 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005570 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005571 }
5572 }
5573
5574 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
5575 TEST_REQUIRES_X86_AVX2;
5576 for (uint32_t channels = 32; channels < 256; channels += 48) {
5577 DWConvMicrokernelTester()
5578 .cr(16)
5579 .kr(9)
5580 .channels(channels)
5581 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08005582 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005583 }
5584 }
5585
5586 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
5587 TEST_REQUIRES_X86_AVX2;
5588 for (uint32_t mz = 0; mz < 9; mz++) {
5589 for (uint32_t channels = 32; channels < 256; channels += 48) {
5590 DWConvMicrokernelTester()
5591 .cr(16)
5592 .kr(9)
5593 .channels(channels)
5594 .input_offset(304)
5595 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005596 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005597 }
5598 }
5599 }
5600#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5601
5602
5603#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5604 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
5605 TEST_REQUIRES_X86_AVX2;
5606 DWConvMicrokernelTester()
5607 .cr(32)
5608 .kr(9)
5609 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08005610 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005611 }
5612
5613 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
5614 TEST_REQUIRES_X86_AVX2;
5615 for (uint32_t channels = 64; channels < 512; channels += 96) {
5616 DWConvMicrokernelTester()
5617 .cr(32)
5618 .kr(9)
5619 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005620 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005621 }
5622 }
5623
5624 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
5625 TEST_REQUIRES_X86_AVX2;
5626 for (uint32_t channels = 64; channels < 512; channels += 96) {
5627 DWConvMicrokernelTester()
5628 .cr(32)
5629 .kr(9)
5630 .channels(channels)
5631 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005632 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005633 }
5634 }
5635
5636 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
5637 TEST_REQUIRES_X86_AVX2;
5638 for (uint32_t channels = 64; channels < 512; channels += 96) {
5639 DWConvMicrokernelTester()
5640 .cr(32)
5641 .kr(9)
5642 .channels(channels)
5643 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005644 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005645 }
5646 }
5647
5648 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
5649 TEST_REQUIRES_X86_AVX2;
5650 for (uint32_t channels = 1; channels < 32; channels++) {
5651 DWConvMicrokernelTester()
5652 .cr(32)
5653 .kr(9)
5654 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005655 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005656 }
5657 }
5658
5659 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
5660 TEST_REQUIRES_X86_AVX2;
5661 for (uint32_t channels = 33; channels < 64; channels++) {
5662 DWConvMicrokernelTester()
5663 .cr(32)
5664 .kr(9)
5665 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005666 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005667 }
5668 }
5669
5670 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
5671 TEST_REQUIRES_X86_AVX2;
5672 for (uint32_t channels = 33; channels < 64; channels++) {
5673 DWConvMicrokernelTester()
5674 .cr(32)
5675 .kr(9)
5676 .channels(channels)
5677 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005678 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005679 }
5680 }
5681
5682 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
5683 TEST_REQUIRES_X86_AVX2;
5684 for (uint32_t channels = 33; channels < 64; channels++) {
5685 DWConvMicrokernelTester()
5686 .cr(32)
5687 .kr(9)
5688 .channels(channels)
5689 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005690 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005691 }
5692 }
5693
5694 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
5695 TEST_REQUIRES_X86_AVX2;
5696 for (size_t channels = 1; channels <= 160; channels += 31) {
5697 DWConvMicrokernelTester()
5698 .cr(32)
5699 .kr(9)
5700 .channels(channels)
5701 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005702 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005703 }
5704 }
5705
5706 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
5707 TEST_REQUIRES_X86_AVX2;
5708 for (size_t channels = 1; channels <= 160; channels += 31) {
5709 for (size_t step = 2; step <= 9; step++) {
5710 DWConvMicrokernelTester()
5711 .cr(32)
5712 .kr(9)
5713 .channels(channels)
5714 .width(3)
5715 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005716 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005717 }
5718 }
5719 }
5720
5721 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
5722 TEST_REQUIRES_X86_AVX2;
5723 for (size_t channels = 1; channels <= 160; channels += 31) {
5724 DWConvMicrokernelTester()
5725 .cr(32)
5726 .kr(9)
5727 .channels(32)
5728 .width(5)
5729 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08005730 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005731 }
5732 }
5733
5734 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
5735 TEST_REQUIRES_X86_AVX2;
5736 for (size_t channels = 1; channels <= 160; channels += 31) {
5737 DWConvMicrokernelTester()
5738 .cr(32)
5739 .kr(9)
5740 .channels(channels)
5741 .width(3)
5742 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005743 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005744 }
5745 }
5746
5747 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
5748 TEST_REQUIRES_X86_AVX2;
5749 for (size_t channels = 1; channels <= 160; channels += 31) {
5750 DWConvMicrokernelTester()
5751 .cr(32)
5752 .kr(9)
5753 .channels(channels)
5754 .width(3)
5755 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005756 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005757 }
5758 }
5759
5760 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
5761 TEST_REQUIRES_X86_AVX2;
5762 for (uint32_t channels = 64; channels < 512; channels += 96) {
5763 DWConvMicrokernelTester()
5764 .cr(32)
5765 .kr(9)
5766 .channels(channels)
5767 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08005768 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005769 }
5770 }
5771
5772 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
5773 TEST_REQUIRES_X86_AVX2;
5774 for (uint32_t mz = 0; mz < 9; mz++) {
5775 for (uint32_t channels = 64; channels < 512; channels += 96) {
5776 DWConvMicrokernelTester()
5777 .cr(32)
5778 .kr(9)
5779 .channels(channels)
5780 .input_offset(592)
5781 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005782 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07005783 }
5784 }
5785 }
5786#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5787
5788
5789#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancaf48312021-06-01 20:20:58 -07005790 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_eq_8) {
5791 TEST_REQUIRES_X86_SSE41;
5792 DWConvMicrokernelTester()
5793 .cr(8)
5794 .kr(9)
5795 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005796 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005797 }
5798
5799 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8) {
5800 TEST_REQUIRES_X86_SSE41;
5801 for (uint32_t channels = 16; channels < 128; channels += 24) {
5802 DWConvMicrokernelTester()
5803 .cr(8)
5804 .kr(9)
5805 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005806 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005807 }
5808 }
5809
5810 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
5811 TEST_REQUIRES_X86_SSE41;
5812 for (uint32_t channels = 16; channels < 128; channels += 24) {
5813 DWConvMicrokernelTester()
5814 .cr(8)
5815 .kr(9)
5816 .channels(channels)
5817 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005818 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005819 }
5820 }
5821
5822 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
5823 TEST_REQUIRES_X86_SSE41;
5824 for (uint32_t channels = 16; channels < 128; channels += 24) {
5825 DWConvMicrokernelTester()
5826 .cr(8)
5827 .kr(9)
5828 .channels(channels)
5829 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005830 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005831 }
5832 }
5833
5834 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_lt_8) {
5835 TEST_REQUIRES_X86_SSE41;
5836 for (uint32_t channels = 1; channels < 8; channels++) {
5837 DWConvMicrokernelTester()
5838 .cr(8)
5839 .kr(9)
5840 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005841 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005842 }
5843 }
5844
5845 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8) {
5846 TEST_REQUIRES_X86_SSE41;
5847 for (uint32_t channels = 9; channels < 16; channels++) {
5848 DWConvMicrokernelTester()
5849 .cr(8)
5850 .kr(9)
5851 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005852 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005853 }
5854 }
5855
5856 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
5857 TEST_REQUIRES_X86_SSE41;
5858 for (uint32_t channels = 9; channels < 16; channels++) {
5859 DWConvMicrokernelTester()
5860 .cr(8)
5861 .kr(9)
5862 .channels(channels)
5863 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005864 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005865 }
5866 }
5867
5868 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
5869 TEST_REQUIRES_X86_SSE41;
5870 for (uint32_t channels = 9; channels < 16; channels++) {
5871 DWConvMicrokernelTester()
5872 .cr(8)
5873 .kr(9)
5874 .channels(channels)
5875 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005876 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005877 }
5878 }
5879
5880 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel) {
5881 TEST_REQUIRES_X86_SSE41;
5882 for (size_t channels = 1; channels <= 40; channels += 7) {
5883 DWConvMicrokernelTester()
5884 .cr(8)
5885 .kr(9)
5886 .channels(channels)
5887 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005888 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005889 }
5890 }
5891
5892 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_step) {
5893 TEST_REQUIRES_X86_SSE41;
5894 for (size_t channels = 1; channels <= 40; channels += 7) {
5895 for (size_t step = 2; step <= 9; step++) {
5896 DWConvMicrokernelTester()
5897 .cr(8)
5898 .kr(9)
5899 .channels(channels)
5900 .width(3)
5901 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005902 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005903 }
5904 }
5905 }
5906
5907 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
5908 TEST_REQUIRES_X86_SSE41;
5909 for (size_t channels = 1; channels <= 40; channels += 7) {
5910 DWConvMicrokernelTester()
5911 .cr(8)
5912 .kr(9)
5913 .channels(8)
5914 .width(5)
5915 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005916 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005917 }
5918 }
5919
5920 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
5921 TEST_REQUIRES_X86_SSE41;
5922 for (size_t channels = 1; channels <= 40; channels += 7) {
5923 DWConvMicrokernelTester()
5924 .cr(8)
5925 .kr(9)
5926 .channels(channels)
5927 .width(3)
5928 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005929 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005930 }
5931 }
5932
5933 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
5934 TEST_REQUIRES_X86_SSE41;
5935 for (size_t channels = 1; channels <= 40; channels += 7) {
5936 DWConvMicrokernelTester()
5937 .cr(8)
5938 .kr(9)
5939 .channels(channels)
5940 .width(3)
5941 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005942 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005943 }
5944 }
5945
5946 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, input_offset) {
5947 TEST_REQUIRES_X86_SSE41;
5948 for (uint32_t channels = 16; channels < 128; channels += 24) {
5949 DWConvMicrokernelTester()
5950 .cr(8)
5951 .kr(9)
5952 .channels(channels)
5953 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08005954 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005955 }
5956 }
5957
5958 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, zero) {
5959 TEST_REQUIRES_X86_SSE41;
5960 for (uint32_t mz = 0; mz < 9; mz++) {
5961 for (uint32_t channels = 16; channels < 128; channels += 24) {
5962 DWConvMicrokernelTester()
5963 .cr(8)
5964 .kr(9)
5965 .channels(channels)
5966 .input_offset(176)
5967 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005968 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005969 }
5970 }
5971 }
5972#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5973
5974
5975#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5976 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_eq_16) {
5977 TEST_REQUIRES_X86_SSE41;
5978 DWConvMicrokernelTester()
5979 .cr(16)
5980 .kr(9)
5981 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005982 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005983 }
5984
5985 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16) {
5986 TEST_REQUIRES_X86_SSE41;
5987 for (uint32_t channels = 32; channels < 256; channels += 48) {
5988 DWConvMicrokernelTester()
5989 .cr(16)
5990 .kr(9)
5991 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005992 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07005993 }
5994 }
5995
5996 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
5997 TEST_REQUIRES_X86_SSE41;
5998 for (uint32_t channels = 32; channels < 256; channels += 48) {
5999 DWConvMicrokernelTester()
6000 .cr(16)
6001 .kr(9)
6002 .channels(channels)
6003 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006004 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006005 }
6006 }
6007
6008 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
6009 TEST_REQUIRES_X86_SSE41;
6010 for (uint32_t channels = 32; channels < 256; channels += 48) {
6011 DWConvMicrokernelTester()
6012 .cr(16)
6013 .kr(9)
6014 .channels(channels)
6015 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006016 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006017 }
6018 }
6019
6020 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_lt_16) {
6021 TEST_REQUIRES_X86_SSE41;
6022 for (uint32_t channels = 1; channels < 16; channels++) {
6023 DWConvMicrokernelTester()
6024 .cr(16)
6025 .kr(9)
6026 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006027 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006028 }
6029 }
6030
6031 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16) {
6032 TEST_REQUIRES_X86_SSE41;
6033 for (uint32_t channels = 17; channels < 32; channels++) {
6034 DWConvMicrokernelTester()
6035 .cr(16)
6036 .kr(9)
6037 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006038 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006039 }
6040 }
6041
6042 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
6043 TEST_REQUIRES_X86_SSE41;
6044 for (uint32_t channels = 17; channels < 32; channels++) {
6045 DWConvMicrokernelTester()
6046 .cr(16)
6047 .kr(9)
6048 .channels(channels)
6049 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006050 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006051 }
6052 }
6053
6054 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
6055 TEST_REQUIRES_X86_SSE41;
6056 for (uint32_t channels = 17; channels < 32; channels++) {
6057 DWConvMicrokernelTester()
6058 .cr(16)
6059 .kr(9)
6060 .channels(channels)
6061 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006062 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006063 }
6064 }
6065
6066 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel) {
6067 TEST_REQUIRES_X86_SSE41;
6068 for (size_t channels = 1; channels <= 80; channels += 15) {
6069 DWConvMicrokernelTester()
6070 .cr(16)
6071 .kr(9)
6072 .channels(channels)
6073 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006074 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006075 }
6076 }
6077
6078 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_step) {
6079 TEST_REQUIRES_X86_SSE41;
6080 for (size_t channels = 1; channels <= 80; channels += 15) {
6081 for (size_t step = 2; step <= 9; step++) {
6082 DWConvMicrokernelTester()
6083 .cr(16)
6084 .kr(9)
6085 .channels(channels)
6086 .width(3)
6087 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006088 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006089 }
6090 }
6091 }
6092
6093 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
6094 TEST_REQUIRES_X86_SSE41;
6095 for (size_t channels = 1; channels <= 80; channels += 15) {
6096 DWConvMicrokernelTester()
6097 .cr(16)
6098 .kr(9)
6099 .channels(16)
6100 .width(5)
6101 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006102 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006103 }
6104 }
6105
6106 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
6107 TEST_REQUIRES_X86_SSE41;
6108 for (size_t channels = 1; channels <= 80; channels += 15) {
6109 DWConvMicrokernelTester()
6110 .cr(16)
6111 .kr(9)
6112 .channels(channels)
6113 .width(3)
6114 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006115 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006116 }
6117 }
6118
6119 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
6120 TEST_REQUIRES_X86_SSE41;
6121 for (size_t channels = 1; channels <= 80; channels += 15) {
6122 DWConvMicrokernelTester()
6123 .cr(16)
6124 .kr(9)
6125 .channels(channels)
6126 .width(3)
6127 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006128 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006129 }
6130 }
6131
6132 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, input_offset) {
6133 TEST_REQUIRES_X86_SSE41;
6134 for (uint32_t channels = 32; channels < 256; channels += 48) {
6135 DWConvMicrokernelTester()
6136 .cr(16)
6137 .kr(9)
6138 .channels(channels)
6139 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08006140 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006141 }
6142 }
6143
6144 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, zero) {
6145 TEST_REQUIRES_X86_SSE41;
6146 for (uint32_t mz = 0; mz < 9; mz++) {
6147 for (uint32_t channels = 32; channels < 256; channels += 48) {
6148 DWConvMicrokernelTester()
6149 .cr(16)
6150 .kr(9)
6151 .channels(channels)
6152 .input_offset(304)
6153 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006154 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006155 }
6156 }
6157 }
6158#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6159
6160
6161#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6162 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_eq_24) {
6163 TEST_REQUIRES_X86_SSE41;
6164 DWConvMicrokernelTester()
6165 .cr(24)
6166 .kr(9)
6167 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08006168 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006169 }
6170
6171 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24) {
6172 TEST_REQUIRES_X86_SSE41;
6173 for (uint32_t channels = 48; channels < 384; channels += 72) {
6174 DWConvMicrokernelTester()
6175 .cr(24)
6176 .kr(9)
6177 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006178 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006179 }
6180 }
6181
6182 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmin) {
6183 TEST_REQUIRES_X86_SSE41;
6184 for (uint32_t channels = 48; channels < 384; channels += 72) {
6185 DWConvMicrokernelTester()
6186 .cr(24)
6187 .kr(9)
6188 .channels(channels)
6189 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006190 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006191 }
6192 }
6193
6194 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmax) {
6195 TEST_REQUIRES_X86_SSE41;
6196 for (uint32_t channels = 48; channels < 384; channels += 72) {
6197 DWConvMicrokernelTester()
6198 .cr(24)
6199 .kr(9)
6200 .channels(channels)
6201 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006202 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006203 }
6204 }
6205
6206 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_lt_24) {
6207 TEST_REQUIRES_X86_SSE41;
6208 for (uint32_t channels = 1; channels < 24; channels++) {
6209 DWConvMicrokernelTester()
6210 .cr(24)
6211 .kr(9)
6212 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006213 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006214 }
6215 }
6216
6217 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24) {
6218 TEST_REQUIRES_X86_SSE41;
6219 for (uint32_t channels = 25; channels < 48; channels++) {
6220 DWConvMicrokernelTester()
6221 .cr(24)
6222 .kr(9)
6223 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006224 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006225 }
6226 }
6227
6228 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmin) {
6229 TEST_REQUIRES_X86_SSE41;
6230 for (uint32_t channels = 25; channels < 48; channels++) {
6231 DWConvMicrokernelTester()
6232 .cr(24)
6233 .kr(9)
6234 .channels(channels)
6235 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006236 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006237 }
6238 }
6239
6240 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmax) {
6241 TEST_REQUIRES_X86_SSE41;
6242 for (uint32_t channels = 25; channels < 48; channels++) {
6243 DWConvMicrokernelTester()
6244 .cr(24)
6245 .kr(9)
6246 .channels(channels)
6247 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006248 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006249 }
6250 }
6251
6252 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel) {
6253 TEST_REQUIRES_X86_SSE41;
6254 for (size_t channels = 1; channels <= 120; channels += 23) {
6255 DWConvMicrokernelTester()
6256 .cr(24)
6257 .kr(9)
6258 .channels(channels)
6259 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006260 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006261 }
6262 }
6263
6264 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_step) {
6265 TEST_REQUIRES_X86_SSE41;
6266 for (size_t channels = 1; channels <= 120; channels += 23) {
6267 for (size_t step = 2; step <= 9; step++) {
6268 DWConvMicrokernelTester()
6269 .cr(24)
6270 .kr(9)
6271 .channels(channels)
6272 .width(3)
6273 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006274 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006275 }
6276 }
6277 }
6278
6279 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_output_stride) {
6280 TEST_REQUIRES_X86_SSE41;
6281 for (size_t channels = 1; channels <= 120; channels += 23) {
6282 DWConvMicrokernelTester()
6283 .cr(24)
6284 .kr(9)
6285 .channels(24)
6286 .width(5)
6287 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08006288 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006289 }
6290 }
6291
6292 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmin) {
6293 TEST_REQUIRES_X86_SSE41;
6294 for (size_t channels = 1; channels <= 120; channels += 23) {
6295 DWConvMicrokernelTester()
6296 .cr(24)
6297 .kr(9)
6298 .channels(channels)
6299 .width(3)
6300 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006301 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006302 }
6303 }
6304
6305 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmax) {
6306 TEST_REQUIRES_X86_SSE41;
6307 for (size_t channels = 1; channels <= 120; channels += 23) {
6308 DWConvMicrokernelTester()
6309 .cr(24)
6310 .kr(9)
6311 .channels(channels)
6312 .width(3)
6313 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006314 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006315 }
6316 }
6317
6318 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, input_offset) {
6319 TEST_REQUIRES_X86_SSE41;
6320 for (uint32_t channels = 48; channels < 384; channels += 72) {
6321 DWConvMicrokernelTester()
6322 .cr(24)
6323 .kr(9)
6324 .channels(channels)
6325 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08006326 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006327 }
6328 }
6329
6330 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, zero) {
6331 TEST_REQUIRES_X86_SSE41;
6332 for (uint32_t mz = 0; mz < 9; mz++) {
6333 for (uint32_t channels = 48; channels < 384; channels += 72) {
6334 DWConvMicrokernelTester()
6335 .cr(24)
6336 .kr(9)
6337 .channels(channels)
6338 .input_offset(464)
6339 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006340 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006341 }
6342 }
6343 }
6344#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6345
6346
6347#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6348 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_eq_8) {
6349 TEST_REQUIRES_X86_AVX;
6350 DWConvMicrokernelTester()
6351 .cr(8)
6352 .kr(9)
6353 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006354 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006355 }
6356
6357 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8) {
6358 TEST_REQUIRES_X86_AVX;
6359 for (uint32_t channels = 16; channels < 128; channels += 24) {
6360 DWConvMicrokernelTester()
6361 .cr(8)
6362 .kr(9)
6363 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006364 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006365 }
6366 }
6367
6368 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
6369 TEST_REQUIRES_X86_AVX;
6370 for (uint32_t channels = 16; channels < 128; channels += 24) {
6371 DWConvMicrokernelTester()
6372 .cr(8)
6373 .kr(9)
6374 .channels(channels)
6375 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006376 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006377 }
6378 }
6379
6380 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
6381 TEST_REQUIRES_X86_AVX;
6382 for (uint32_t channels = 16; channels < 128; channels += 24) {
6383 DWConvMicrokernelTester()
6384 .cr(8)
6385 .kr(9)
6386 .channels(channels)
6387 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006388 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006389 }
6390 }
6391
6392 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_lt_8) {
6393 TEST_REQUIRES_X86_AVX;
6394 for (uint32_t channels = 1; channels < 8; channels++) {
6395 DWConvMicrokernelTester()
6396 .cr(8)
6397 .kr(9)
6398 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006399 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006400 }
6401 }
6402
6403 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8) {
6404 TEST_REQUIRES_X86_AVX;
6405 for (uint32_t channels = 9; channels < 16; channels++) {
6406 DWConvMicrokernelTester()
6407 .cr(8)
6408 .kr(9)
6409 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006410 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006411 }
6412 }
6413
6414 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
6415 TEST_REQUIRES_X86_AVX;
6416 for (uint32_t channels = 9; channels < 16; channels++) {
6417 DWConvMicrokernelTester()
6418 .cr(8)
6419 .kr(9)
6420 .channels(channels)
6421 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006422 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006423 }
6424 }
6425
6426 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
6427 TEST_REQUIRES_X86_AVX;
6428 for (uint32_t channels = 9; channels < 16; channels++) {
6429 DWConvMicrokernelTester()
6430 .cr(8)
6431 .kr(9)
6432 .channels(channels)
6433 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006434 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006435 }
6436 }
6437
6438 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel) {
6439 TEST_REQUIRES_X86_AVX;
6440 for (size_t channels = 1; channels <= 40; channels += 7) {
6441 DWConvMicrokernelTester()
6442 .cr(8)
6443 .kr(9)
6444 .channels(channels)
6445 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006446 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006447 }
6448 }
6449
6450 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_step) {
6451 TEST_REQUIRES_X86_AVX;
6452 for (size_t channels = 1; channels <= 40; channels += 7) {
6453 for (size_t step = 2; step <= 9; step++) {
6454 DWConvMicrokernelTester()
6455 .cr(8)
6456 .kr(9)
6457 .channels(channels)
6458 .width(3)
6459 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006460 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006461 }
6462 }
6463 }
6464
6465 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
6466 TEST_REQUIRES_X86_AVX;
6467 for (size_t channels = 1; channels <= 40; channels += 7) {
6468 DWConvMicrokernelTester()
6469 .cr(8)
6470 .kr(9)
6471 .channels(8)
6472 .width(5)
6473 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006474 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006475 }
6476 }
6477
6478 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmin) {
6479 TEST_REQUIRES_X86_AVX;
6480 for (size_t channels = 1; channels <= 40; channels += 7) {
6481 DWConvMicrokernelTester()
6482 .cr(8)
6483 .kr(9)
6484 .channels(channels)
6485 .width(3)
6486 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006487 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006488 }
6489 }
6490
6491 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmax) {
6492 TEST_REQUIRES_X86_AVX;
6493 for (size_t channels = 1; channels <= 40; channels += 7) {
6494 DWConvMicrokernelTester()
6495 .cr(8)
6496 .kr(9)
6497 .channels(channels)
6498 .width(3)
6499 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006500 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006501 }
6502 }
6503
6504 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, input_offset) {
6505 TEST_REQUIRES_X86_AVX;
6506 for (uint32_t channels = 16; channels < 128; channels += 24) {
6507 DWConvMicrokernelTester()
6508 .cr(8)
6509 .kr(9)
6510 .channels(channels)
6511 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08006512 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006513 }
6514 }
6515
6516 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, zero) {
6517 TEST_REQUIRES_X86_AVX;
6518 for (uint32_t mz = 0; mz < 9; mz++) {
6519 for (uint32_t channels = 16; channels < 128; channels += 24) {
6520 DWConvMicrokernelTester()
6521 .cr(8)
6522 .kr(9)
6523 .channels(channels)
6524 .input_offset(176)
6525 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006526 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006527 }
6528 }
6529 }
6530#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6531
6532
6533#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6534 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_eq_16) {
6535 TEST_REQUIRES_X86_AVX;
6536 DWConvMicrokernelTester()
6537 .cr(16)
6538 .kr(9)
6539 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006540 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006541 }
6542
6543 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16) {
6544 TEST_REQUIRES_X86_AVX;
6545 for (uint32_t channels = 32; channels < 256; channels += 48) {
6546 DWConvMicrokernelTester()
6547 .cr(16)
6548 .kr(9)
6549 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006550 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006551 }
6552 }
6553
6554 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
6555 TEST_REQUIRES_X86_AVX;
6556 for (uint32_t channels = 32; channels < 256; channels += 48) {
6557 DWConvMicrokernelTester()
6558 .cr(16)
6559 .kr(9)
6560 .channels(channels)
6561 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006562 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006563 }
6564 }
6565
6566 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
6567 TEST_REQUIRES_X86_AVX;
6568 for (uint32_t channels = 32; channels < 256; channels += 48) {
6569 DWConvMicrokernelTester()
6570 .cr(16)
6571 .kr(9)
6572 .channels(channels)
6573 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006574 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006575 }
6576 }
6577
6578 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_lt_16) {
6579 TEST_REQUIRES_X86_AVX;
6580 for (uint32_t channels = 1; channels < 16; channels++) {
6581 DWConvMicrokernelTester()
6582 .cr(16)
6583 .kr(9)
6584 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006585 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006586 }
6587 }
6588
6589 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16) {
6590 TEST_REQUIRES_X86_AVX;
6591 for (uint32_t channels = 17; channels < 32; channels++) {
6592 DWConvMicrokernelTester()
6593 .cr(16)
6594 .kr(9)
6595 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006596 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006597 }
6598 }
6599
6600 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
6601 TEST_REQUIRES_X86_AVX;
6602 for (uint32_t channels = 17; channels < 32; channels++) {
6603 DWConvMicrokernelTester()
6604 .cr(16)
6605 .kr(9)
6606 .channels(channels)
6607 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006608 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006609 }
6610 }
6611
6612 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
6613 TEST_REQUIRES_X86_AVX;
6614 for (uint32_t channels = 17; channels < 32; channels++) {
6615 DWConvMicrokernelTester()
6616 .cr(16)
6617 .kr(9)
6618 .channels(channels)
6619 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006620 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006621 }
6622 }
6623
6624 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel) {
6625 TEST_REQUIRES_X86_AVX;
6626 for (size_t channels = 1; channels <= 80; channels += 15) {
6627 DWConvMicrokernelTester()
6628 .cr(16)
6629 .kr(9)
6630 .channels(channels)
6631 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006632 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006633 }
6634 }
6635
6636 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_step) {
6637 TEST_REQUIRES_X86_AVX;
6638 for (size_t channels = 1; channels <= 80; channels += 15) {
6639 for (size_t step = 2; step <= 9; step++) {
6640 DWConvMicrokernelTester()
6641 .cr(16)
6642 .kr(9)
6643 .channels(channels)
6644 .width(3)
6645 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006646 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006647 }
6648 }
6649 }
6650
6651 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
6652 TEST_REQUIRES_X86_AVX;
6653 for (size_t channels = 1; channels <= 80; channels += 15) {
6654 DWConvMicrokernelTester()
6655 .cr(16)
6656 .kr(9)
6657 .channels(16)
6658 .width(5)
6659 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006660 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006661 }
6662 }
6663
6664 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmin) {
6665 TEST_REQUIRES_X86_AVX;
6666 for (size_t channels = 1; channels <= 80; channels += 15) {
6667 DWConvMicrokernelTester()
6668 .cr(16)
6669 .kr(9)
6670 .channels(channels)
6671 .width(3)
6672 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006673 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006674 }
6675 }
6676
6677 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmax) {
6678 TEST_REQUIRES_X86_AVX;
6679 for (size_t channels = 1; channels <= 80; channels += 15) {
6680 DWConvMicrokernelTester()
6681 .cr(16)
6682 .kr(9)
6683 .channels(channels)
6684 .width(3)
6685 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006686 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006687 }
6688 }
6689
6690 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, input_offset) {
6691 TEST_REQUIRES_X86_AVX;
6692 for (uint32_t channels = 32; channels < 256; channels += 48) {
6693 DWConvMicrokernelTester()
6694 .cr(16)
6695 .kr(9)
6696 .channels(channels)
6697 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08006698 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006699 }
6700 }
6701
6702 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, zero) {
6703 TEST_REQUIRES_X86_AVX;
6704 for (uint32_t mz = 0; mz < 9; mz++) {
6705 for (uint32_t channels = 32; channels < 256; channels += 48) {
6706 DWConvMicrokernelTester()
6707 .cr(16)
6708 .kr(9)
6709 .channels(channels)
6710 .input_offset(304)
6711 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006712 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006713 }
6714 }
6715 }
6716#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6717
6718
6719#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6720 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_eq_24) {
6721 TEST_REQUIRES_X86_AVX;
6722 DWConvMicrokernelTester()
6723 .cr(24)
6724 .kr(9)
6725 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08006726 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006727 }
6728
6729 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24) {
6730 TEST_REQUIRES_X86_AVX;
6731 for (uint32_t channels = 48; channels < 384; channels += 72) {
6732 DWConvMicrokernelTester()
6733 .cr(24)
6734 .kr(9)
6735 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006736 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006737 }
6738 }
6739
6740 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmin) {
6741 TEST_REQUIRES_X86_AVX;
6742 for (uint32_t channels = 48; channels < 384; channels += 72) {
6743 DWConvMicrokernelTester()
6744 .cr(24)
6745 .kr(9)
6746 .channels(channels)
6747 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006748 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006749 }
6750 }
6751
6752 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmax) {
6753 TEST_REQUIRES_X86_AVX;
6754 for (uint32_t channels = 48; channels < 384; channels += 72) {
6755 DWConvMicrokernelTester()
6756 .cr(24)
6757 .kr(9)
6758 .channels(channels)
6759 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006760 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006761 }
6762 }
6763
6764 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_lt_24) {
6765 TEST_REQUIRES_X86_AVX;
6766 for (uint32_t channels = 1; channels < 24; channels++) {
6767 DWConvMicrokernelTester()
6768 .cr(24)
6769 .kr(9)
6770 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006771 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006772 }
6773 }
6774
6775 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24) {
6776 TEST_REQUIRES_X86_AVX;
6777 for (uint32_t channels = 25; channels < 48; channels++) {
6778 DWConvMicrokernelTester()
6779 .cr(24)
6780 .kr(9)
6781 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006782 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006783 }
6784 }
6785
6786 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmin) {
6787 TEST_REQUIRES_X86_AVX;
6788 for (uint32_t channels = 25; channels < 48; channels++) {
6789 DWConvMicrokernelTester()
6790 .cr(24)
6791 .kr(9)
6792 .channels(channels)
6793 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006794 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006795 }
6796 }
6797
6798 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmax) {
6799 TEST_REQUIRES_X86_AVX;
6800 for (uint32_t channels = 25; channels < 48; channels++) {
6801 DWConvMicrokernelTester()
6802 .cr(24)
6803 .kr(9)
6804 .channels(channels)
6805 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006806 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006807 }
6808 }
6809
6810 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel) {
6811 TEST_REQUIRES_X86_AVX;
6812 for (size_t channels = 1; channels <= 120; channels += 23) {
6813 DWConvMicrokernelTester()
6814 .cr(24)
6815 .kr(9)
6816 .channels(channels)
6817 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006818 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006819 }
6820 }
6821
6822 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_step) {
6823 TEST_REQUIRES_X86_AVX;
6824 for (size_t channels = 1; channels <= 120; channels += 23) {
6825 for (size_t step = 2; step <= 9; step++) {
6826 DWConvMicrokernelTester()
6827 .cr(24)
6828 .kr(9)
6829 .channels(channels)
6830 .width(3)
6831 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006832 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006833 }
6834 }
6835 }
6836
6837 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_output_stride) {
6838 TEST_REQUIRES_X86_AVX;
6839 for (size_t channels = 1; channels <= 120; channels += 23) {
6840 DWConvMicrokernelTester()
6841 .cr(24)
6842 .kr(9)
6843 .channels(24)
6844 .width(5)
6845 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08006846 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006847 }
6848 }
6849
6850 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmin) {
6851 TEST_REQUIRES_X86_AVX;
6852 for (size_t channels = 1; channels <= 120; channels += 23) {
6853 DWConvMicrokernelTester()
6854 .cr(24)
6855 .kr(9)
6856 .channels(channels)
6857 .width(3)
6858 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006859 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006860 }
6861 }
6862
6863 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmax) {
6864 TEST_REQUIRES_X86_AVX;
6865 for (size_t channels = 1; channels <= 120; channels += 23) {
6866 DWConvMicrokernelTester()
6867 .cr(24)
6868 .kr(9)
6869 .channels(channels)
6870 .width(3)
6871 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006872 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006873 }
6874 }
6875
6876 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, input_offset) {
6877 TEST_REQUIRES_X86_AVX;
6878 for (uint32_t channels = 48; channels < 384; channels += 72) {
6879 DWConvMicrokernelTester()
6880 .cr(24)
6881 .kr(9)
6882 .channels(channels)
6883 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08006884 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006885 }
6886 }
6887
6888 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, zero) {
6889 TEST_REQUIRES_X86_AVX;
6890 for (uint32_t mz = 0; mz < 9; mz++) {
6891 for (uint32_t channels = 48; channels < 384; channels += 72) {
6892 DWConvMicrokernelTester()
6893 .cr(24)
6894 .kr(9)
6895 .channels(channels)
6896 .input_offset(464)
6897 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006898 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006899 }
6900 }
6901 }
6902#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6903
6904
6905#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6906 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_eq_8) {
6907 TEST_REQUIRES_X86_XOP;
6908 DWConvMicrokernelTester()
6909 .cr(8)
6910 .kr(9)
6911 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006912 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006913 }
6914
6915 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8) {
6916 TEST_REQUIRES_X86_XOP;
6917 for (uint32_t channels = 16; channels < 128; channels += 24) {
6918 DWConvMicrokernelTester()
6919 .cr(8)
6920 .kr(9)
6921 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006922 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006923 }
6924 }
6925
6926 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
6927 TEST_REQUIRES_X86_XOP;
6928 for (uint32_t channels = 16; channels < 128; channels += 24) {
6929 DWConvMicrokernelTester()
6930 .cr(8)
6931 .kr(9)
6932 .channels(channels)
6933 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006934 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006935 }
6936 }
6937
6938 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
6939 TEST_REQUIRES_X86_XOP;
6940 for (uint32_t channels = 16; channels < 128; channels += 24) {
6941 DWConvMicrokernelTester()
6942 .cr(8)
6943 .kr(9)
6944 .channels(channels)
6945 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006946 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006947 }
6948 }
6949
6950 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_lt_8) {
6951 TEST_REQUIRES_X86_XOP;
6952 for (uint32_t channels = 1; channels < 8; channels++) {
6953 DWConvMicrokernelTester()
6954 .cr(8)
6955 .kr(9)
6956 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006957 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006958 }
6959 }
6960
6961 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8) {
6962 TEST_REQUIRES_X86_XOP;
6963 for (uint32_t channels = 9; channels < 16; channels++) {
6964 DWConvMicrokernelTester()
6965 .cr(8)
6966 .kr(9)
6967 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006968 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006969 }
6970 }
6971
6972 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
6973 TEST_REQUIRES_X86_XOP;
6974 for (uint32_t channels = 9; channels < 16; channels++) {
6975 DWConvMicrokernelTester()
6976 .cr(8)
6977 .kr(9)
6978 .channels(channels)
6979 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006980 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006981 }
6982 }
6983
6984 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
6985 TEST_REQUIRES_X86_XOP;
6986 for (uint32_t channels = 9; channels < 16; channels++) {
6987 DWConvMicrokernelTester()
6988 .cr(8)
6989 .kr(9)
6990 .channels(channels)
6991 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006992 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07006993 }
6994 }
6995
6996 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel) {
6997 TEST_REQUIRES_X86_XOP;
6998 for (size_t channels = 1; channels <= 40; channels += 7) {
6999 DWConvMicrokernelTester()
7000 .cr(8)
7001 .kr(9)
7002 .channels(channels)
7003 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007004 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007005 }
7006 }
7007
7008 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_step) {
7009 TEST_REQUIRES_X86_XOP;
7010 for (size_t channels = 1; channels <= 40; channels += 7) {
7011 for (size_t step = 2; step <= 9; step++) {
7012 DWConvMicrokernelTester()
7013 .cr(8)
7014 .kr(9)
7015 .channels(channels)
7016 .width(3)
7017 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007018 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007019 }
7020 }
7021 }
7022
7023 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
7024 TEST_REQUIRES_X86_XOP;
7025 for (size_t channels = 1; channels <= 40; channels += 7) {
7026 DWConvMicrokernelTester()
7027 .cr(8)
7028 .kr(9)
7029 .channels(8)
7030 .width(5)
7031 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007032 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007033 }
7034 }
7035
7036 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmin) {
7037 TEST_REQUIRES_X86_XOP;
7038 for (size_t channels = 1; channels <= 40; channels += 7) {
7039 DWConvMicrokernelTester()
7040 .cr(8)
7041 .kr(9)
7042 .channels(channels)
7043 .width(3)
7044 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007045 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007046 }
7047 }
7048
7049 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmax) {
7050 TEST_REQUIRES_X86_XOP;
7051 for (size_t channels = 1; channels <= 40; channels += 7) {
7052 DWConvMicrokernelTester()
7053 .cr(8)
7054 .kr(9)
7055 .channels(channels)
7056 .width(3)
7057 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007058 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007059 }
7060 }
7061
7062 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, input_offset) {
7063 TEST_REQUIRES_X86_XOP;
7064 for (uint32_t channels = 16; channels < 128; channels += 24) {
7065 DWConvMicrokernelTester()
7066 .cr(8)
7067 .kr(9)
7068 .channels(channels)
7069 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08007070 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007071 }
7072 }
7073
7074 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, zero) {
7075 TEST_REQUIRES_X86_XOP;
7076 for (uint32_t mz = 0; mz < 9; mz++) {
7077 for (uint32_t channels = 16; channels < 128; channels += 24) {
7078 DWConvMicrokernelTester()
7079 .cr(8)
7080 .kr(9)
7081 .channels(channels)
7082 .input_offset(176)
7083 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007084 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007085 }
7086 }
7087 }
7088#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7089
7090
7091#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7092 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_eq_16) {
7093 TEST_REQUIRES_X86_XOP;
7094 DWConvMicrokernelTester()
7095 .cr(16)
7096 .kr(9)
7097 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08007098 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007099 }
7100
7101 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16) {
7102 TEST_REQUIRES_X86_XOP;
7103 for (uint32_t channels = 32; channels < 256; channels += 48) {
7104 DWConvMicrokernelTester()
7105 .cr(16)
7106 .kr(9)
7107 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007108 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007109 }
7110 }
7111
7112 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
7113 TEST_REQUIRES_X86_XOP;
7114 for (uint32_t channels = 32; channels < 256; channels += 48) {
7115 DWConvMicrokernelTester()
7116 .cr(16)
7117 .kr(9)
7118 .channels(channels)
7119 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007120 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007121 }
7122 }
7123
7124 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
7125 TEST_REQUIRES_X86_XOP;
7126 for (uint32_t channels = 32; channels < 256; channels += 48) {
7127 DWConvMicrokernelTester()
7128 .cr(16)
7129 .kr(9)
7130 .channels(channels)
7131 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007132 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007133 }
7134 }
7135
7136 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_lt_16) {
7137 TEST_REQUIRES_X86_XOP;
7138 for (uint32_t channels = 1; channels < 16; channels++) {
7139 DWConvMicrokernelTester()
7140 .cr(16)
7141 .kr(9)
7142 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007143 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007144 }
7145 }
7146
7147 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16) {
7148 TEST_REQUIRES_X86_XOP;
7149 for (uint32_t channels = 17; channels < 32; channels++) {
7150 DWConvMicrokernelTester()
7151 .cr(16)
7152 .kr(9)
7153 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007154 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007155 }
7156 }
7157
7158 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
7159 TEST_REQUIRES_X86_XOP;
7160 for (uint32_t channels = 17; channels < 32; channels++) {
7161 DWConvMicrokernelTester()
7162 .cr(16)
7163 .kr(9)
7164 .channels(channels)
7165 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007166 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007167 }
7168 }
7169
7170 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
7171 TEST_REQUIRES_X86_XOP;
7172 for (uint32_t channels = 17; channels < 32; channels++) {
7173 DWConvMicrokernelTester()
7174 .cr(16)
7175 .kr(9)
7176 .channels(channels)
7177 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007178 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007179 }
7180 }
7181
7182 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel) {
7183 TEST_REQUIRES_X86_XOP;
7184 for (size_t channels = 1; channels <= 80; channels += 15) {
7185 DWConvMicrokernelTester()
7186 .cr(16)
7187 .kr(9)
7188 .channels(channels)
7189 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007190 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007191 }
7192 }
7193
7194 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_step) {
7195 TEST_REQUIRES_X86_XOP;
7196 for (size_t channels = 1; channels <= 80; channels += 15) {
7197 for (size_t step = 2; step <= 9; step++) {
7198 DWConvMicrokernelTester()
7199 .cr(16)
7200 .kr(9)
7201 .channels(channels)
7202 .width(3)
7203 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007204 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007205 }
7206 }
7207 }
7208
7209 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
7210 TEST_REQUIRES_X86_XOP;
7211 for (size_t channels = 1; channels <= 80; channels += 15) {
7212 DWConvMicrokernelTester()
7213 .cr(16)
7214 .kr(9)
7215 .channels(16)
7216 .width(5)
7217 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007218 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007219 }
7220 }
7221
7222 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmin) {
7223 TEST_REQUIRES_X86_XOP;
7224 for (size_t channels = 1; channels <= 80; channels += 15) {
7225 DWConvMicrokernelTester()
7226 .cr(16)
7227 .kr(9)
7228 .channels(channels)
7229 .width(3)
7230 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007231 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007232 }
7233 }
7234
7235 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmax) {
7236 TEST_REQUIRES_X86_XOP;
7237 for (size_t channels = 1; channels <= 80; channels += 15) {
7238 DWConvMicrokernelTester()
7239 .cr(16)
7240 .kr(9)
7241 .channels(channels)
7242 .width(3)
7243 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007244 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007245 }
7246 }
7247
7248 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, input_offset) {
7249 TEST_REQUIRES_X86_XOP;
7250 for (uint32_t channels = 32; channels < 256; channels += 48) {
7251 DWConvMicrokernelTester()
7252 .cr(16)
7253 .kr(9)
7254 .channels(channels)
7255 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08007256 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007257 }
7258 }
7259
7260 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, zero) {
7261 TEST_REQUIRES_X86_XOP;
7262 for (uint32_t mz = 0; mz < 9; mz++) {
7263 for (uint32_t channels = 32; channels < 256; channels += 48) {
7264 DWConvMicrokernelTester()
7265 .cr(16)
7266 .kr(9)
7267 .channels(channels)
7268 .input_offset(304)
7269 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007270 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007271 }
7272 }
7273 }
7274#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7275
7276
7277#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7278 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_eq_24) {
7279 TEST_REQUIRES_X86_XOP;
7280 DWConvMicrokernelTester()
7281 .cr(24)
7282 .kr(9)
7283 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08007284 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007285 }
7286
7287 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24) {
7288 TEST_REQUIRES_X86_XOP;
7289 for (uint32_t channels = 48; channels < 384; channels += 72) {
7290 DWConvMicrokernelTester()
7291 .cr(24)
7292 .kr(9)
7293 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007294 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007295 }
7296 }
7297
7298 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmin) {
7299 TEST_REQUIRES_X86_XOP;
7300 for (uint32_t channels = 48; channels < 384; channels += 72) {
7301 DWConvMicrokernelTester()
7302 .cr(24)
7303 .kr(9)
7304 .channels(channels)
7305 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007306 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007307 }
7308 }
7309
7310 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmax) {
7311 TEST_REQUIRES_X86_XOP;
7312 for (uint32_t channels = 48; channels < 384; channels += 72) {
7313 DWConvMicrokernelTester()
7314 .cr(24)
7315 .kr(9)
7316 .channels(channels)
7317 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007318 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007319 }
7320 }
7321
7322 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_lt_24) {
7323 TEST_REQUIRES_X86_XOP;
7324 for (uint32_t channels = 1; channels < 24; channels++) {
7325 DWConvMicrokernelTester()
7326 .cr(24)
7327 .kr(9)
7328 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007329 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007330 }
7331 }
7332
7333 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24) {
7334 TEST_REQUIRES_X86_XOP;
7335 for (uint32_t channels = 25; channels < 48; channels++) {
7336 DWConvMicrokernelTester()
7337 .cr(24)
7338 .kr(9)
7339 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007340 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007341 }
7342 }
7343
7344 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmin) {
7345 TEST_REQUIRES_X86_XOP;
7346 for (uint32_t channels = 25; channels < 48; channels++) {
7347 DWConvMicrokernelTester()
7348 .cr(24)
7349 .kr(9)
7350 .channels(channels)
7351 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007352 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007353 }
7354 }
7355
7356 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmax) {
7357 TEST_REQUIRES_X86_XOP;
7358 for (uint32_t channels = 25; channels < 48; channels++) {
7359 DWConvMicrokernelTester()
7360 .cr(24)
7361 .kr(9)
7362 .channels(channels)
7363 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007364 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007365 }
7366 }
7367
7368 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel) {
7369 TEST_REQUIRES_X86_XOP;
7370 for (size_t channels = 1; channels <= 120; channels += 23) {
7371 DWConvMicrokernelTester()
7372 .cr(24)
7373 .kr(9)
7374 .channels(channels)
7375 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007376 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007377 }
7378 }
7379
7380 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_step) {
7381 TEST_REQUIRES_X86_XOP;
7382 for (size_t channels = 1; channels <= 120; channels += 23) {
7383 for (size_t step = 2; step <= 9; step++) {
7384 DWConvMicrokernelTester()
7385 .cr(24)
7386 .kr(9)
7387 .channels(channels)
7388 .width(3)
7389 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007390 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007391 }
7392 }
7393 }
7394
7395 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_output_stride) {
7396 TEST_REQUIRES_X86_XOP;
7397 for (size_t channels = 1; channels <= 120; channels += 23) {
7398 DWConvMicrokernelTester()
7399 .cr(24)
7400 .kr(9)
7401 .channels(24)
7402 .width(5)
7403 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08007404 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007405 }
7406 }
7407
7408 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmin) {
7409 TEST_REQUIRES_X86_XOP;
7410 for (size_t channels = 1; channels <= 120; channels += 23) {
7411 DWConvMicrokernelTester()
7412 .cr(24)
7413 .kr(9)
7414 .channels(channels)
7415 .width(3)
7416 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007417 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007418 }
7419 }
7420
7421 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmax) {
7422 TEST_REQUIRES_X86_XOP;
7423 for (size_t channels = 1; channels <= 120; channels += 23) {
7424 DWConvMicrokernelTester()
7425 .cr(24)
7426 .kr(9)
7427 .channels(channels)
7428 .width(3)
7429 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007430 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007431 }
7432 }
7433
7434 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, input_offset) {
7435 TEST_REQUIRES_X86_XOP;
7436 for (uint32_t channels = 48; channels < 384; channels += 72) {
7437 DWConvMicrokernelTester()
7438 .cr(24)
7439 .kr(9)
7440 .channels(channels)
7441 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08007442 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007443 }
7444 }
7445
7446 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, zero) {
7447 TEST_REQUIRES_X86_XOP;
7448 for (uint32_t mz = 0; mz < 9; mz++) {
7449 for (uint32_t channels = 48; channels < 384; channels += 72) {
7450 DWConvMicrokernelTester()
7451 .cr(24)
7452 .kr(9)
7453 .channels(channels)
7454 .input_offset(464)
7455 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007456 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -07007457 }
7458 }
7459 }
7460#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7461
7462
7463#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007464 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_eq_8) {
7465 TEST_REQUIRES_X86_AVX2;
7466 DWConvMicrokernelTester()
7467 .cr(8)
7468 .kr(9)
7469 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007470 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007471 }
7472
7473 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8) {
7474 TEST_REQUIRES_X86_AVX2;
7475 for (uint32_t channels = 16; channels < 128; channels += 24) {
7476 DWConvMicrokernelTester()
7477 .cr(8)
7478 .kr(9)
7479 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007480 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007481 }
7482 }
7483
7484 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
7485 TEST_REQUIRES_X86_AVX2;
7486 for (uint32_t channels = 16; channels < 128; channels += 24) {
7487 DWConvMicrokernelTester()
7488 .cr(8)
7489 .kr(9)
7490 .channels(channels)
7491 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007492 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007493 }
7494 }
7495
7496 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
7497 TEST_REQUIRES_X86_AVX2;
7498 for (uint32_t channels = 16; channels < 128; channels += 24) {
7499 DWConvMicrokernelTester()
7500 .cr(8)
7501 .kr(9)
7502 .channels(channels)
7503 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007504 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007505 }
7506 }
7507
7508 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_lt_8) {
7509 TEST_REQUIRES_X86_AVX2;
7510 for (uint32_t channels = 1; channels < 8; channels++) {
7511 DWConvMicrokernelTester()
7512 .cr(8)
7513 .kr(9)
7514 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007515 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007516 }
7517 }
7518
7519 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8) {
7520 TEST_REQUIRES_X86_AVX2;
7521 for (uint32_t channels = 9; channels < 16; channels++) {
7522 DWConvMicrokernelTester()
7523 .cr(8)
7524 .kr(9)
7525 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007526 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007527 }
7528 }
7529
7530 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
7531 TEST_REQUIRES_X86_AVX2;
7532 for (uint32_t channels = 9; channels < 16; channels++) {
7533 DWConvMicrokernelTester()
7534 .cr(8)
7535 .kr(9)
7536 .channels(channels)
7537 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007538 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007539 }
7540 }
7541
7542 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
7543 TEST_REQUIRES_X86_AVX2;
7544 for (uint32_t channels = 9; channels < 16; channels++) {
7545 DWConvMicrokernelTester()
7546 .cr(8)
7547 .kr(9)
7548 .channels(channels)
7549 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007550 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007551 }
7552 }
7553
7554 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel) {
7555 TEST_REQUIRES_X86_AVX2;
7556 for (size_t channels = 1; channels <= 40; channels += 7) {
7557 DWConvMicrokernelTester()
7558 .cr(8)
7559 .kr(9)
7560 .channels(channels)
7561 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007562 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007563 }
7564 }
7565
7566 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_step) {
7567 TEST_REQUIRES_X86_AVX2;
7568 for (size_t channels = 1; channels <= 40; channels += 7) {
7569 for (size_t step = 2; step <= 9; step++) {
7570 DWConvMicrokernelTester()
7571 .cr(8)
7572 .kr(9)
7573 .channels(channels)
7574 .width(3)
7575 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007576 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007577 }
7578 }
7579 }
7580
7581 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
7582 TEST_REQUIRES_X86_AVX2;
7583 for (size_t channels = 1; channels <= 40; channels += 7) {
7584 DWConvMicrokernelTester()
7585 .cr(8)
7586 .kr(9)
7587 .channels(8)
7588 .width(5)
7589 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007590 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007591 }
7592 }
7593
7594 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
7595 TEST_REQUIRES_X86_AVX2;
7596 for (size_t channels = 1; channels <= 40; channels += 7) {
7597 DWConvMicrokernelTester()
7598 .cr(8)
7599 .kr(9)
7600 .channels(channels)
7601 .width(3)
7602 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007603 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007604 }
7605 }
7606
7607 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
7608 TEST_REQUIRES_X86_AVX2;
7609 for (size_t channels = 1; channels <= 40; channels += 7) {
7610 DWConvMicrokernelTester()
7611 .cr(8)
7612 .kr(9)
7613 .channels(channels)
7614 .width(3)
7615 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007616 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007617 }
7618 }
7619
7620 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_offset) {
7621 TEST_REQUIRES_X86_AVX2;
7622 for (uint32_t channels = 16; channels < 128; channels += 24) {
7623 DWConvMicrokernelTester()
7624 .cr(8)
7625 .kr(9)
7626 .channels(channels)
7627 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08007628 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007629 }
7630 }
7631
7632 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, zero) {
7633 TEST_REQUIRES_X86_AVX2;
7634 for (uint32_t mz = 0; mz < 9; mz++) {
7635 for (uint32_t channels = 16; channels < 128; channels += 24) {
7636 DWConvMicrokernelTester()
7637 .cr(8)
7638 .kr(9)
7639 .channels(channels)
7640 .input_offset(176)
7641 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007642 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007643 }
7644 }
7645 }
7646#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7647
7648
7649#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7650 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_eq_16) {
7651 TEST_REQUIRES_X86_AVX2;
7652 DWConvMicrokernelTester()
7653 .cr(16)
7654 .kr(9)
7655 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08007656 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007657 }
7658
7659 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16) {
7660 TEST_REQUIRES_X86_AVX2;
7661 for (uint32_t channels = 32; channels < 256; channels += 48) {
7662 DWConvMicrokernelTester()
7663 .cr(16)
7664 .kr(9)
7665 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007666 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007667 }
7668 }
7669
7670 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
7671 TEST_REQUIRES_X86_AVX2;
7672 for (uint32_t channels = 32; channels < 256; channels += 48) {
7673 DWConvMicrokernelTester()
7674 .cr(16)
7675 .kr(9)
7676 .channels(channels)
7677 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007678 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007679 }
7680 }
7681
7682 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
7683 TEST_REQUIRES_X86_AVX2;
7684 for (uint32_t channels = 32; channels < 256; channels += 48) {
7685 DWConvMicrokernelTester()
7686 .cr(16)
7687 .kr(9)
7688 .channels(channels)
7689 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007690 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007691 }
7692 }
7693
7694 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_lt_16) {
7695 TEST_REQUIRES_X86_AVX2;
7696 for (uint32_t channels = 1; channels < 16; channels++) {
7697 DWConvMicrokernelTester()
7698 .cr(16)
7699 .kr(9)
7700 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007701 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007702 }
7703 }
7704
7705 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16) {
7706 TEST_REQUIRES_X86_AVX2;
7707 for (uint32_t channels = 17; channels < 32; channels++) {
7708 DWConvMicrokernelTester()
7709 .cr(16)
7710 .kr(9)
7711 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007712 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007713 }
7714 }
7715
7716 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
7717 TEST_REQUIRES_X86_AVX2;
7718 for (uint32_t channels = 17; channels < 32; channels++) {
7719 DWConvMicrokernelTester()
7720 .cr(16)
7721 .kr(9)
7722 .channels(channels)
7723 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007724 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007725 }
7726 }
7727
7728 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
7729 TEST_REQUIRES_X86_AVX2;
7730 for (uint32_t channels = 17; channels < 32; channels++) {
7731 DWConvMicrokernelTester()
7732 .cr(16)
7733 .kr(9)
7734 .channels(channels)
7735 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007736 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007737 }
7738 }
7739
7740 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel) {
7741 TEST_REQUIRES_X86_AVX2;
7742 for (size_t channels = 1; channels <= 80; channels += 15) {
7743 DWConvMicrokernelTester()
7744 .cr(16)
7745 .kr(9)
7746 .channels(channels)
7747 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007748 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007749 }
7750 }
7751
7752 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_step) {
7753 TEST_REQUIRES_X86_AVX2;
7754 for (size_t channels = 1; channels <= 80; channels += 15) {
7755 for (size_t step = 2; step <= 9; step++) {
7756 DWConvMicrokernelTester()
7757 .cr(16)
7758 .kr(9)
7759 .channels(channels)
7760 .width(3)
7761 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007762 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007763 }
7764 }
7765 }
7766
7767 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
7768 TEST_REQUIRES_X86_AVX2;
7769 for (size_t channels = 1; channels <= 80; channels += 15) {
7770 DWConvMicrokernelTester()
7771 .cr(16)
7772 .kr(9)
7773 .channels(16)
7774 .width(5)
7775 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007776 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007777 }
7778 }
7779
7780 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
7781 TEST_REQUIRES_X86_AVX2;
7782 for (size_t channels = 1; channels <= 80; channels += 15) {
7783 DWConvMicrokernelTester()
7784 .cr(16)
7785 .kr(9)
7786 .channels(channels)
7787 .width(3)
7788 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007789 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007790 }
7791 }
7792
7793 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
7794 TEST_REQUIRES_X86_AVX2;
7795 for (size_t channels = 1; channels <= 80; channels += 15) {
7796 DWConvMicrokernelTester()
7797 .cr(16)
7798 .kr(9)
7799 .channels(channels)
7800 .width(3)
7801 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007802 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007803 }
7804 }
7805
7806 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_offset) {
7807 TEST_REQUIRES_X86_AVX2;
7808 for (uint32_t channels = 32; channels < 256; channels += 48) {
7809 DWConvMicrokernelTester()
7810 .cr(16)
7811 .kr(9)
7812 .channels(channels)
7813 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08007814 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007815 }
7816 }
7817
7818 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, zero) {
7819 TEST_REQUIRES_X86_AVX2;
7820 for (uint32_t mz = 0; mz < 9; mz++) {
7821 for (uint32_t channels = 32; channels < 256; channels += 48) {
7822 DWConvMicrokernelTester()
7823 .cr(16)
7824 .kr(9)
7825 .channels(channels)
7826 .input_offset(304)
7827 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007828 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007829 }
7830 }
7831 }
7832#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7833
7834
7835#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7836 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_eq_24) {
7837 TEST_REQUIRES_X86_AVX2;
7838 DWConvMicrokernelTester()
7839 .cr(24)
7840 .kr(9)
7841 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08007842 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007843 }
7844
7845 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24) {
7846 TEST_REQUIRES_X86_AVX2;
7847 for (uint32_t channels = 48; channels < 384; channels += 72) {
7848 DWConvMicrokernelTester()
7849 .cr(24)
7850 .kr(9)
7851 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007852 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007853 }
7854 }
7855
7856 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
7857 TEST_REQUIRES_X86_AVX2;
7858 for (uint32_t channels = 48; channels < 384; channels += 72) {
7859 DWConvMicrokernelTester()
7860 .cr(24)
7861 .kr(9)
7862 .channels(channels)
7863 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007864 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007865 }
7866 }
7867
7868 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
7869 TEST_REQUIRES_X86_AVX2;
7870 for (uint32_t channels = 48; channels < 384; channels += 72) {
7871 DWConvMicrokernelTester()
7872 .cr(24)
7873 .kr(9)
7874 .channels(channels)
7875 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007876 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007877 }
7878 }
7879
7880 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_lt_24) {
7881 TEST_REQUIRES_X86_AVX2;
7882 for (uint32_t channels = 1; channels < 24; channels++) {
7883 DWConvMicrokernelTester()
7884 .cr(24)
7885 .kr(9)
7886 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007887 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007888 }
7889 }
7890
7891 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24) {
7892 TEST_REQUIRES_X86_AVX2;
7893 for (uint32_t channels = 25; channels < 48; channels++) {
7894 DWConvMicrokernelTester()
7895 .cr(24)
7896 .kr(9)
7897 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007898 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007899 }
7900 }
7901
7902 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
7903 TEST_REQUIRES_X86_AVX2;
7904 for (uint32_t channels = 25; channels < 48; channels++) {
7905 DWConvMicrokernelTester()
7906 .cr(24)
7907 .kr(9)
7908 .channels(channels)
7909 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007910 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007911 }
7912 }
7913
7914 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
7915 TEST_REQUIRES_X86_AVX2;
7916 for (uint32_t channels = 25; channels < 48; channels++) {
7917 DWConvMicrokernelTester()
7918 .cr(24)
7919 .kr(9)
7920 .channels(channels)
7921 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007922 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007923 }
7924 }
7925
7926 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel) {
7927 TEST_REQUIRES_X86_AVX2;
7928 for (size_t channels = 1; channels <= 120; channels += 23) {
7929 DWConvMicrokernelTester()
7930 .cr(24)
7931 .kr(9)
7932 .channels(channels)
7933 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007934 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007935 }
7936 }
7937
7938 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_step) {
7939 TEST_REQUIRES_X86_AVX2;
7940 for (size_t channels = 1; channels <= 120; channels += 23) {
7941 for (size_t step = 2; step <= 9; step++) {
7942 DWConvMicrokernelTester()
7943 .cr(24)
7944 .kr(9)
7945 .channels(channels)
7946 .width(3)
7947 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007948 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007949 }
7950 }
7951 }
7952
7953 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
7954 TEST_REQUIRES_X86_AVX2;
7955 for (size_t channels = 1; channels <= 120; channels += 23) {
7956 DWConvMicrokernelTester()
7957 .cr(24)
7958 .kr(9)
7959 .channels(24)
7960 .width(5)
7961 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08007962 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007963 }
7964 }
7965
7966 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
7967 TEST_REQUIRES_X86_AVX2;
7968 for (size_t channels = 1; channels <= 120; channels += 23) {
7969 DWConvMicrokernelTester()
7970 .cr(24)
7971 .kr(9)
7972 .channels(channels)
7973 .width(3)
7974 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007975 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007976 }
7977 }
7978
7979 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
7980 TEST_REQUIRES_X86_AVX2;
7981 for (size_t channels = 1; channels <= 120; channels += 23) {
7982 DWConvMicrokernelTester()
7983 .cr(24)
7984 .kr(9)
7985 .channels(channels)
7986 .width(3)
7987 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007988 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07007989 }
7990 }
7991
7992 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, input_offset) {
7993 TEST_REQUIRES_X86_AVX2;
7994 for (uint32_t channels = 48; channels < 384; channels += 72) {
7995 DWConvMicrokernelTester()
7996 .cr(24)
7997 .kr(9)
7998 .channels(channels)
7999 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08008000 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008001 }
8002 }
8003
8004 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, zero) {
8005 TEST_REQUIRES_X86_AVX2;
8006 for (uint32_t mz = 0; mz < 9; mz++) {
8007 for (uint32_t channels = 48; channels < 384; channels += 72) {
8008 DWConvMicrokernelTester()
8009 .cr(24)
8010 .kr(9)
8011 .channels(channels)
8012 .input_offset(464)
8013 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008014 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008015 }
8016 }
8017 }
8018#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8019
8020
8021#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8022 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_eq_32) {
8023 TEST_REQUIRES_X86_AVX2;
8024 DWConvMicrokernelTester()
8025 .cr(32)
8026 .kr(9)
8027 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08008028 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008029 }
8030
8031 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32) {
8032 TEST_REQUIRES_X86_AVX2;
8033 for (uint32_t channels = 64; channels < 512; channels += 96) {
8034 DWConvMicrokernelTester()
8035 .cr(32)
8036 .kr(9)
8037 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008038 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008039 }
8040 }
8041
8042 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
8043 TEST_REQUIRES_X86_AVX2;
8044 for (uint32_t channels = 64; channels < 512; channels += 96) {
8045 DWConvMicrokernelTester()
8046 .cr(32)
8047 .kr(9)
8048 .channels(channels)
8049 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008050 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008051 }
8052 }
8053
8054 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
8055 TEST_REQUIRES_X86_AVX2;
8056 for (uint32_t channels = 64; channels < 512; channels += 96) {
8057 DWConvMicrokernelTester()
8058 .cr(32)
8059 .kr(9)
8060 .channels(channels)
8061 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008062 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008063 }
8064 }
8065
8066 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_lt_32) {
8067 TEST_REQUIRES_X86_AVX2;
8068 for (uint32_t channels = 1; channels < 32; channels++) {
8069 DWConvMicrokernelTester()
8070 .cr(32)
8071 .kr(9)
8072 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008073 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008074 }
8075 }
8076
8077 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32) {
8078 TEST_REQUIRES_X86_AVX2;
8079 for (uint32_t channels = 33; channels < 64; channels++) {
8080 DWConvMicrokernelTester()
8081 .cr(32)
8082 .kr(9)
8083 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008084 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008085 }
8086 }
8087
8088 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
8089 TEST_REQUIRES_X86_AVX2;
8090 for (uint32_t channels = 33; channels < 64; channels++) {
8091 DWConvMicrokernelTester()
8092 .cr(32)
8093 .kr(9)
8094 .channels(channels)
8095 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008096 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008097 }
8098 }
8099
8100 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
8101 TEST_REQUIRES_X86_AVX2;
8102 for (uint32_t channels = 33; channels < 64; channels++) {
8103 DWConvMicrokernelTester()
8104 .cr(32)
8105 .kr(9)
8106 .channels(channels)
8107 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008108 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008109 }
8110 }
8111
8112 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel) {
8113 TEST_REQUIRES_X86_AVX2;
8114 for (size_t channels = 1; channels <= 160; channels += 31) {
8115 DWConvMicrokernelTester()
8116 .cr(32)
8117 .kr(9)
8118 .channels(channels)
8119 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008120 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008121 }
8122 }
8123
8124 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_step) {
8125 TEST_REQUIRES_X86_AVX2;
8126 for (size_t channels = 1; channels <= 160; channels += 31) {
8127 for (size_t step = 2; step <= 9; step++) {
8128 DWConvMicrokernelTester()
8129 .cr(32)
8130 .kr(9)
8131 .channels(channels)
8132 .width(3)
8133 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008134 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008135 }
8136 }
8137 }
8138
8139 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
8140 TEST_REQUIRES_X86_AVX2;
8141 for (size_t channels = 1; channels <= 160; channels += 31) {
8142 DWConvMicrokernelTester()
8143 .cr(32)
8144 .kr(9)
8145 .channels(32)
8146 .width(5)
8147 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08008148 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008149 }
8150 }
8151
8152 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
8153 TEST_REQUIRES_X86_AVX2;
8154 for (size_t channels = 1; channels <= 160; channels += 31) {
8155 DWConvMicrokernelTester()
8156 .cr(32)
8157 .kr(9)
8158 .channels(channels)
8159 .width(3)
8160 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008161 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008162 }
8163 }
8164
8165 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
8166 TEST_REQUIRES_X86_AVX2;
8167 for (size_t channels = 1; channels <= 160; channels += 31) {
8168 DWConvMicrokernelTester()
8169 .cr(32)
8170 .kr(9)
8171 .channels(channels)
8172 .width(3)
8173 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008174 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008175 }
8176 }
8177
8178 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_offset) {
8179 TEST_REQUIRES_X86_AVX2;
8180 for (uint32_t channels = 64; channels < 512; channels += 96) {
8181 DWConvMicrokernelTester()
8182 .cr(32)
8183 .kr(9)
8184 .channels(channels)
8185 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08008186 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008187 }
8188 }
8189
8190 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, zero) {
8191 TEST_REQUIRES_X86_AVX2;
8192 for (uint32_t mz = 0; mz < 9; mz++) {
8193 for (uint32_t channels = 64; channels < 512; channels += 96) {
8194 DWConvMicrokernelTester()
8195 .cr(32)
8196 .kr(9)
8197 .channels(channels)
8198 .input_offset(592)
8199 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008200 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -07008201 }
8202 }
8203 }
8204#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8205
8206
8207#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan71855ee2021-05-25 19:05:06 -07008208 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_eq_16) {
8209 TEST_REQUIRES_X86_AVX512SKX;
8210 DWConvMicrokernelTester()
8211 .cr(16)
8212 .kr(9)
8213 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008214 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008215 }
8216
8217 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16) {
8218 TEST_REQUIRES_X86_AVX512SKX;
8219 for (uint32_t channels = 32; channels < 256; channels += 48) {
8220 DWConvMicrokernelTester()
8221 .cr(16)
8222 .kr(9)
8223 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008224 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008225 }
8226 }
8227
8228 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
8229 TEST_REQUIRES_X86_AVX512SKX;
8230 for (uint32_t channels = 32; channels < 256; channels += 48) {
8231 DWConvMicrokernelTester()
8232 .cr(16)
8233 .kr(9)
8234 .channels(channels)
8235 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008236 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008237 }
8238 }
8239
8240 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
8241 TEST_REQUIRES_X86_AVX512SKX;
8242 for (uint32_t channels = 32; channels < 256; channels += 48) {
8243 DWConvMicrokernelTester()
8244 .cr(16)
8245 .kr(9)
8246 .channels(channels)
8247 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008248 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008249 }
8250 }
8251
8252 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_lt_16) {
8253 TEST_REQUIRES_X86_AVX512SKX;
8254 for (uint32_t channels = 1; channels < 16; channels++) {
8255 DWConvMicrokernelTester()
8256 .cr(16)
8257 .kr(9)
8258 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008259 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008260 }
8261 }
8262
8263 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16) {
8264 TEST_REQUIRES_X86_AVX512SKX;
8265 for (uint32_t channels = 17; channels < 32; channels++) {
8266 DWConvMicrokernelTester()
8267 .cr(16)
8268 .kr(9)
8269 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008270 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008271 }
8272 }
8273
8274 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
8275 TEST_REQUIRES_X86_AVX512SKX;
8276 for (uint32_t channels = 17; channels < 32; channels++) {
8277 DWConvMicrokernelTester()
8278 .cr(16)
8279 .kr(9)
8280 .channels(channels)
8281 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008282 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008283 }
8284 }
8285
8286 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
8287 TEST_REQUIRES_X86_AVX512SKX;
8288 for (uint32_t channels = 17; channels < 32; channels++) {
8289 DWConvMicrokernelTester()
8290 .cr(16)
8291 .kr(9)
8292 .channels(channels)
8293 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008294 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008295 }
8296 }
8297
8298 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel) {
8299 TEST_REQUIRES_X86_AVX512SKX;
8300 for (size_t channels = 1; channels <= 80; channels += 15) {
8301 DWConvMicrokernelTester()
8302 .cr(16)
8303 .kr(9)
8304 .channels(channels)
8305 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008306 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008307 }
8308 }
8309
8310 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
8311 TEST_REQUIRES_X86_AVX512SKX;
8312 for (size_t channels = 1; channels <= 80; channels += 15) {
8313 for (size_t step = 2; step <= 9; step++) {
8314 DWConvMicrokernelTester()
8315 .cr(16)
8316 .kr(9)
8317 .channels(channels)
8318 .width(3)
8319 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008320 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008321 }
8322 }
8323 }
8324
8325 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
8326 TEST_REQUIRES_X86_AVX512SKX;
8327 for (size_t channels = 1; channels <= 80; channels += 15) {
8328 DWConvMicrokernelTester()
8329 .cr(16)
8330 .kr(9)
8331 .channels(16)
8332 .width(5)
8333 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008334 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008335 }
8336 }
8337
8338 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
8339 TEST_REQUIRES_X86_AVX512SKX;
8340 for (size_t channels = 1; channels <= 80; channels += 15) {
8341 DWConvMicrokernelTester()
8342 .cr(16)
8343 .kr(9)
8344 .channels(channels)
8345 .width(3)
8346 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008347 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008348 }
8349 }
8350
8351 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
8352 TEST_REQUIRES_X86_AVX512SKX;
8353 for (size_t channels = 1; channels <= 80; channels += 15) {
8354 DWConvMicrokernelTester()
8355 .cr(16)
8356 .kr(9)
8357 .channels(channels)
8358 .width(3)
8359 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008360 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008361 }
8362 }
8363
8364 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_offset) {
8365 TEST_REQUIRES_X86_AVX512SKX;
8366 for (uint32_t channels = 32; channels < 256; channels += 48) {
8367 DWConvMicrokernelTester()
8368 .cr(16)
8369 .kr(9)
8370 .channels(channels)
8371 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08008372 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008373 }
8374 }
8375
8376 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, zero) {
8377 TEST_REQUIRES_X86_AVX512SKX;
8378 for (uint32_t mz = 0; mz < 9; mz++) {
8379 for (uint32_t channels = 32; channels < 256; channels += 48) {
8380 DWConvMicrokernelTester()
8381 .cr(16)
8382 .kr(9)
8383 .channels(channels)
8384 .input_offset(304)
8385 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008386 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008387 }
8388 }
8389 }
8390#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8391
8392
8393#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8394 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_eq_32) {
8395 TEST_REQUIRES_X86_AVX512SKX;
8396 DWConvMicrokernelTester()
8397 .cr(32)
8398 .kr(9)
8399 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08008400 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008401 }
8402
8403 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32) {
8404 TEST_REQUIRES_X86_AVX512SKX;
8405 for (uint32_t channels = 64; channels < 512; channels += 96) {
8406 DWConvMicrokernelTester()
8407 .cr(32)
8408 .kr(9)
8409 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008410 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008411 }
8412 }
8413
8414 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
8415 TEST_REQUIRES_X86_AVX512SKX;
8416 for (uint32_t channels = 64; channels < 512; channels += 96) {
8417 DWConvMicrokernelTester()
8418 .cr(32)
8419 .kr(9)
8420 .channels(channels)
8421 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008422 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008423 }
8424 }
8425
8426 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
8427 TEST_REQUIRES_X86_AVX512SKX;
8428 for (uint32_t channels = 64; channels < 512; channels += 96) {
8429 DWConvMicrokernelTester()
8430 .cr(32)
8431 .kr(9)
8432 .channels(channels)
8433 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008434 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008435 }
8436 }
8437
8438 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_lt_32) {
8439 TEST_REQUIRES_X86_AVX512SKX;
8440 for (uint32_t channels = 1; channels < 32; channels++) {
8441 DWConvMicrokernelTester()
8442 .cr(32)
8443 .kr(9)
8444 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008445 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008446 }
8447 }
8448
8449 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32) {
8450 TEST_REQUIRES_X86_AVX512SKX;
8451 for (uint32_t channels = 33; channels < 64; channels++) {
8452 DWConvMicrokernelTester()
8453 .cr(32)
8454 .kr(9)
8455 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008456 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008457 }
8458 }
8459
8460 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
8461 TEST_REQUIRES_X86_AVX512SKX;
8462 for (uint32_t channels = 33; channels < 64; channels++) {
8463 DWConvMicrokernelTester()
8464 .cr(32)
8465 .kr(9)
8466 .channels(channels)
8467 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008468 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008469 }
8470 }
8471
8472 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
8473 TEST_REQUIRES_X86_AVX512SKX;
8474 for (uint32_t channels = 33; channels < 64; channels++) {
8475 DWConvMicrokernelTester()
8476 .cr(32)
8477 .kr(9)
8478 .channels(channels)
8479 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008480 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008481 }
8482 }
8483
8484 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel) {
8485 TEST_REQUIRES_X86_AVX512SKX;
8486 for (size_t channels = 1; channels <= 160; channels += 31) {
8487 DWConvMicrokernelTester()
8488 .cr(32)
8489 .kr(9)
8490 .channels(channels)
8491 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008492 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008493 }
8494 }
8495
8496 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
8497 TEST_REQUIRES_X86_AVX512SKX;
8498 for (size_t channels = 1; channels <= 160; channels += 31) {
8499 for (size_t step = 2; step <= 9; step++) {
8500 DWConvMicrokernelTester()
8501 .cr(32)
8502 .kr(9)
8503 .channels(channels)
8504 .width(3)
8505 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008506 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008507 }
8508 }
8509 }
8510
8511 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
8512 TEST_REQUIRES_X86_AVX512SKX;
8513 for (size_t channels = 1; channels <= 160; channels += 31) {
8514 DWConvMicrokernelTester()
8515 .cr(32)
8516 .kr(9)
8517 .channels(32)
8518 .width(5)
8519 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08008520 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008521 }
8522 }
8523
8524 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
8525 TEST_REQUIRES_X86_AVX512SKX;
8526 for (size_t channels = 1; channels <= 160; channels += 31) {
8527 DWConvMicrokernelTester()
8528 .cr(32)
8529 .kr(9)
8530 .channels(channels)
8531 .width(3)
8532 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008533 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008534 }
8535 }
8536
8537 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
8538 TEST_REQUIRES_X86_AVX512SKX;
8539 for (size_t channels = 1; channels <= 160; channels += 31) {
8540 DWConvMicrokernelTester()
8541 .cr(32)
8542 .kr(9)
8543 .channels(channels)
8544 .width(3)
8545 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008546 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008547 }
8548 }
8549
8550 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_offset) {
8551 TEST_REQUIRES_X86_AVX512SKX;
8552 for (uint32_t channels = 64; channels < 512; channels += 96) {
8553 DWConvMicrokernelTester()
8554 .cr(32)
8555 .kr(9)
8556 .channels(channels)
8557 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08008558 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008559 }
8560 }
8561
8562 TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, zero) {
8563 TEST_REQUIRES_X86_AVX512SKX;
8564 for (uint32_t mz = 0; mz < 9; mz++) {
8565 for (uint32_t channels = 64; channels < 512; channels += 96) {
8566 DWConvMicrokernelTester()
8567 .cr(32)
8568 .kr(9)
8569 .channels(channels)
8570 .input_offset(592)
8571 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008572 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -07008573 }
8574 }
8575 }
8576#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8577
8578
Marat Dukhan4c617792021-12-21 15:47:58 -08008579#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -07008580 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_eq_8) {
8581 DWConvMicrokernelTester()
8582 .cr(8)
8583 .kr(9)
8584 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008585 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008586 }
8587
8588 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8) {
8589 for (uint32_t channels = 16; channels < 128; channels += 24) {
8590 DWConvMicrokernelTester()
8591 .cr(8)
8592 .kr(9)
8593 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008594 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008595 }
8596 }
8597
8598 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
8599 for (uint32_t channels = 16; channels < 128; channels += 24) {
8600 DWConvMicrokernelTester()
8601 .cr(8)
8602 .kr(9)
8603 .channels(channels)
8604 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008605 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008606 }
8607 }
8608
8609 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
8610 for (uint32_t channels = 16; channels < 128; channels += 24) {
8611 DWConvMicrokernelTester()
8612 .cr(8)
8613 .kr(9)
8614 .channels(channels)
8615 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008616 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008617 }
8618 }
8619
8620 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_lt_8) {
8621 for (uint32_t channels = 1; channels < 8; channels++) {
8622 DWConvMicrokernelTester()
8623 .cr(8)
8624 .kr(9)
8625 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008626 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008627 }
8628 }
8629
8630 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8) {
8631 for (uint32_t channels = 9; channels < 16; channels++) {
8632 DWConvMicrokernelTester()
8633 .cr(8)
8634 .kr(9)
8635 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008636 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008637 }
8638 }
8639
8640 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
8641 for (uint32_t channels = 9; channels < 16; channels++) {
8642 DWConvMicrokernelTester()
8643 .cr(8)
8644 .kr(9)
8645 .channels(channels)
8646 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008647 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008648 }
8649 }
8650
8651 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
8652 for (uint32_t channels = 9; channels < 16; channels++) {
8653 DWConvMicrokernelTester()
8654 .cr(8)
8655 .kr(9)
8656 .channels(channels)
8657 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008658 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008659 }
8660 }
8661
8662 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel) {
8663 for (size_t channels = 1; channels <= 40; channels += 7) {
8664 DWConvMicrokernelTester()
8665 .cr(8)
8666 .kr(9)
8667 .channels(channels)
8668 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008669 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008670 }
8671 }
8672
8673 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
8674 for (size_t channels = 1; channels <= 40; channels += 7) {
8675 for (size_t step = 2; step <= 9; step++) {
8676 DWConvMicrokernelTester()
8677 .cr(8)
8678 .kr(9)
8679 .channels(channels)
8680 .width(3)
8681 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008682 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008683 }
8684 }
8685 }
8686
8687 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
8688 for (size_t channels = 1; channels <= 40; channels += 7) {
8689 DWConvMicrokernelTester()
8690 .cr(8)
8691 .kr(9)
8692 .channels(8)
8693 .width(5)
8694 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008695 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008696 }
8697 }
8698
8699 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
8700 for (size_t channels = 1; channels <= 40; channels += 7) {
8701 DWConvMicrokernelTester()
8702 .cr(8)
8703 .kr(9)
8704 .channels(channels)
8705 .width(3)
8706 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008707 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008708 }
8709 }
8710
8711 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
8712 for (size_t channels = 1; channels <= 40; channels += 7) {
8713 DWConvMicrokernelTester()
8714 .cr(8)
8715 .kr(9)
8716 .channels(channels)
8717 .width(3)
8718 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008719 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008720 }
8721 }
8722
8723 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, input_offset) {
8724 for (uint32_t channels = 16; channels < 128; channels += 24) {
8725 DWConvMicrokernelTester()
8726 .cr(8)
8727 .kr(9)
8728 .channels(channels)
8729 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08008730 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008731 }
8732 }
8733
8734 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, zero) {
8735 for (uint32_t mz = 0; mz < 9; mz++) {
8736 for (uint32_t channels = 16; channels < 128; channels += 24) {
8737 DWConvMicrokernelTester()
8738 .cr(8)
8739 .kr(9)
8740 .channels(channels)
8741 .input_offset(176)
8742 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008743 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008744 }
8745 }
8746 }
Marat Dukhan4c617792021-12-21 15:47:58 -08008747#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -07008748
8749
Marat Dukhan4c617792021-12-21 15:47:58 -08008750#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -07008751 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_eq_16) {
8752 DWConvMicrokernelTester()
8753 .cr(16)
8754 .kr(9)
8755 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008756 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008757 }
8758
8759 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16) {
8760 for (uint32_t channels = 32; channels < 256; channels += 48) {
8761 DWConvMicrokernelTester()
8762 .cr(16)
8763 .kr(9)
8764 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008765 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008766 }
8767 }
8768
8769 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
8770 for (uint32_t channels = 32; channels < 256; channels += 48) {
8771 DWConvMicrokernelTester()
8772 .cr(16)
8773 .kr(9)
8774 .channels(channels)
8775 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008776 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008777 }
8778 }
8779
8780 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
8781 for (uint32_t channels = 32; channels < 256; channels += 48) {
8782 DWConvMicrokernelTester()
8783 .cr(16)
8784 .kr(9)
8785 .channels(channels)
8786 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008787 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008788 }
8789 }
8790
8791 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_lt_16) {
8792 for (uint32_t channels = 1; channels < 16; channels++) {
8793 DWConvMicrokernelTester()
8794 .cr(16)
8795 .kr(9)
8796 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008797 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008798 }
8799 }
8800
8801 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16) {
8802 for (uint32_t channels = 17; channels < 32; channels++) {
8803 DWConvMicrokernelTester()
8804 .cr(16)
8805 .kr(9)
8806 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008807 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008808 }
8809 }
8810
8811 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
8812 for (uint32_t channels = 17; channels < 32; channels++) {
8813 DWConvMicrokernelTester()
8814 .cr(16)
8815 .kr(9)
8816 .channels(channels)
8817 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008818 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008819 }
8820 }
8821
8822 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
8823 for (uint32_t channels = 17; channels < 32; channels++) {
8824 DWConvMicrokernelTester()
8825 .cr(16)
8826 .kr(9)
8827 .channels(channels)
8828 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008829 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008830 }
8831 }
8832
8833 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel) {
8834 for (size_t channels = 1; channels <= 80; channels += 15) {
8835 DWConvMicrokernelTester()
8836 .cr(16)
8837 .kr(9)
8838 .channels(channels)
8839 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008840 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008841 }
8842 }
8843
8844 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
8845 for (size_t channels = 1; channels <= 80; channels += 15) {
8846 for (size_t step = 2; step <= 9; step++) {
8847 DWConvMicrokernelTester()
8848 .cr(16)
8849 .kr(9)
8850 .channels(channels)
8851 .width(3)
8852 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008853 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008854 }
8855 }
8856 }
8857
8858 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
8859 for (size_t channels = 1; channels <= 80; channels += 15) {
8860 DWConvMicrokernelTester()
8861 .cr(16)
8862 .kr(9)
8863 .channels(16)
8864 .width(5)
8865 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008866 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008867 }
8868 }
8869
8870 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
8871 for (size_t channels = 1; channels <= 80; channels += 15) {
8872 DWConvMicrokernelTester()
8873 .cr(16)
8874 .kr(9)
8875 .channels(channels)
8876 .width(3)
8877 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008878 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008879 }
8880 }
8881
8882 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
8883 for (size_t channels = 1; channels <= 80; channels += 15) {
8884 DWConvMicrokernelTester()
8885 .cr(16)
8886 .kr(9)
8887 .channels(channels)
8888 .width(3)
8889 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008890 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008891 }
8892 }
8893
8894 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, input_offset) {
8895 for (uint32_t channels = 32; channels < 256; channels += 48) {
8896 DWConvMicrokernelTester()
8897 .cr(16)
8898 .kr(9)
8899 .channels(channels)
8900 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08008901 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008902 }
8903 }
8904
8905 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, zero) {
8906 for (uint32_t mz = 0; mz < 9; mz++) {
8907 for (uint32_t channels = 32; channels < 256; channels += 48) {
8908 DWConvMicrokernelTester()
8909 .cr(16)
8910 .kr(9)
8911 .channels(channels)
8912 .input_offset(304)
8913 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008914 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008915 }
8916 }
8917 }
Marat Dukhan4c617792021-12-21 15:47:58 -08008918#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -07008919
8920
Marat Dukhan4c617792021-12-21 15:47:58 -08008921#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -07008922 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_eq_24) {
8923 DWConvMicrokernelTester()
8924 .cr(24)
8925 .kr(9)
8926 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08008927 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008928 }
8929
8930 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24) {
8931 for (uint32_t channels = 48; channels < 384; channels += 72) {
8932 DWConvMicrokernelTester()
8933 .cr(24)
8934 .kr(9)
8935 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008936 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008937 }
8938 }
8939
8940 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
8941 for (uint32_t channels = 48; channels < 384; channels += 72) {
8942 DWConvMicrokernelTester()
8943 .cr(24)
8944 .kr(9)
8945 .channels(channels)
8946 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008947 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008948 }
8949 }
8950
8951 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
8952 for (uint32_t channels = 48; channels < 384; channels += 72) {
8953 DWConvMicrokernelTester()
8954 .cr(24)
8955 .kr(9)
8956 .channels(channels)
8957 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008958 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008959 }
8960 }
8961
8962 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_lt_24) {
8963 for (uint32_t channels = 1; channels < 24; channels++) {
8964 DWConvMicrokernelTester()
8965 .cr(24)
8966 .kr(9)
8967 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008968 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008969 }
8970 }
8971
8972 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24) {
8973 for (uint32_t channels = 25; channels < 48; channels++) {
8974 DWConvMicrokernelTester()
8975 .cr(24)
8976 .kr(9)
8977 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008978 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008979 }
8980 }
8981
8982 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
8983 for (uint32_t channels = 25; channels < 48; channels++) {
8984 DWConvMicrokernelTester()
8985 .cr(24)
8986 .kr(9)
8987 .channels(channels)
8988 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008989 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07008990 }
8991 }
8992
8993 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
8994 for (uint32_t channels = 25; channels < 48; channels++) {
8995 DWConvMicrokernelTester()
8996 .cr(24)
8997 .kr(9)
8998 .channels(channels)
8999 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009000 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07009001 }
9002 }
9003
9004 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel) {
9005 for (size_t channels = 1; channels <= 120; channels += 23) {
9006 DWConvMicrokernelTester()
9007 .cr(24)
9008 .kr(9)
9009 .channels(channels)
9010 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009011 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07009012 }
9013 }
9014
9015 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
9016 for (size_t channels = 1; channels <= 120; channels += 23) {
9017 for (size_t step = 2; step <= 9; step++) {
9018 DWConvMicrokernelTester()
9019 .cr(24)
9020 .kr(9)
9021 .channels(channels)
9022 .width(3)
9023 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009024 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07009025 }
9026 }
9027 }
9028
9029 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
9030 for (size_t channels = 1; channels <= 120; channels += 23) {
9031 DWConvMicrokernelTester()
9032 .cr(24)
9033 .kr(9)
9034 .channels(24)
9035 .width(5)
9036 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08009037 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07009038 }
9039 }
9040
9041 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
9042 for (size_t channels = 1; channels <= 120; channels += 23) {
9043 DWConvMicrokernelTester()
9044 .cr(24)
9045 .kr(9)
9046 .channels(channels)
9047 .width(3)
9048 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009049 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07009050 }
9051 }
9052
9053 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
9054 for (size_t channels = 1; channels <= 120; channels += 23) {
9055 DWConvMicrokernelTester()
9056 .cr(24)
9057 .kr(9)
9058 .channels(channels)
9059 .width(3)
9060 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009061 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07009062 }
9063 }
9064
9065 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, input_offset) {
9066 for (uint32_t channels = 48; channels < 384; channels += 72) {
9067 DWConvMicrokernelTester()
9068 .cr(24)
9069 .kr(9)
9070 .channels(channels)
9071 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08009072 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07009073 }
9074 }
9075
9076 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, zero) {
9077 for (uint32_t mz = 0; mz < 9; mz++) {
9078 for (uint32_t channels = 48; channels < 384; channels += 72) {
9079 DWConvMicrokernelTester()
9080 .cr(24)
9081 .kr(9)
9082 .channels(channels)
9083 .input_offset(464)
9084 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009085 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -07009086 }
9087 }
9088 }
Marat Dukhan4c617792021-12-21 15:47:58 -08009089#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -07009090
9091
Marat Dukhan4c617792021-12-21 15:47:58 -08009092#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -07009093 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_eq_8) {
9094 DWConvMicrokernelTester()
9095 .cr(8)
9096 .kr(9)
9097 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009098 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009099 }
9100
9101 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8) {
9102 for (uint32_t channels = 16; channels < 128; channels += 24) {
9103 DWConvMicrokernelTester()
9104 .cr(8)
9105 .kr(9)
9106 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009107 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009108 }
9109 }
9110
9111 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
9112 for (uint32_t channels = 16; channels < 128; channels += 24) {
9113 DWConvMicrokernelTester()
9114 .cr(8)
9115 .kr(9)
9116 .channels(channels)
9117 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009118 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009119 }
9120 }
9121
9122 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
9123 for (uint32_t channels = 16; channels < 128; channels += 24) {
9124 DWConvMicrokernelTester()
9125 .cr(8)
9126 .kr(9)
9127 .channels(channels)
9128 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009129 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009130 }
9131 }
9132
9133 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_lt_8) {
9134 for (uint32_t channels = 1; channels < 8; channels++) {
9135 DWConvMicrokernelTester()
9136 .cr(8)
9137 .kr(9)
9138 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009139 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009140 }
9141 }
9142
9143 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8) {
9144 for (uint32_t channels = 9; channels < 16; channels++) {
9145 DWConvMicrokernelTester()
9146 .cr(8)
9147 .kr(9)
9148 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009149 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009150 }
9151 }
9152
9153 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
9154 for (uint32_t channels = 9; channels < 16; channels++) {
9155 DWConvMicrokernelTester()
9156 .cr(8)
9157 .kr(9)
9158 .channels(channels)
9159 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009160 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009161 }
9162 }
9163
9164 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
9165 for (uint32_t channels = 9; channels < 16; channels++) {
9166 DWConvMicrokernelTester()
9167 .cr(8)
9168 .kr(9)
9169 .channels(channels)
9170 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009171 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009172 }
9173 }
9174
9175 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel) {
9176 for (size_t channels = 1; channels <= 40; channels += 7) {
9177 DWConvMicrokernelTester()
9178 .cr(8)
9179 .kr(9)
9180 .channels(channels)
9181 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009182 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009183 }
9184 }
9185
9186 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
9187 for (size_t channels = 1; channels <= 40; channels += 7) {
9188 for (size_t step = 2; step <= 9; step++) {
9189 DWConvMicrokernelTester()
9190 .cr(8)
9191 .kr(9)
9192 .channels(channels)
9193 .width(3)
9194 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009195 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009196 }
9197 }
9198 }
9199
9200 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
9201 for (size_t channels = 1; channels <= 40; channels += 7) {
9202 DWConvMicrokernelTester()
9203 .cr(8)
9204 .kr(9)
9205 .channels(8)
9206 .width(5)
9207 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009208 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009209 }
9210 }
9211
9212 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
9213 for (size_t channels = 1; channels <= 40; channels += 7) {
9214 DWConvMicrokernelTester()
9215 .cr(8)
9216 .kr(9)
9217 .channels(channels)
9218 .width(3)
9219 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009220 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009221 }
9222 }
9223
9224 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
9225 for (size_t channels = 1; channels <= 40; channels += 7) {
9226 DWConvMicrokernelTester()
9227 .cr(8)
9228 .kr(9)
9229 .channels(channels)
9230 .width(3)
9231 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009232 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009233 }
9234 }
9235
9236 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, input_offset) {
9237 for (uint32_t channels = 16; channels < 128; channels += 24) {
9238 DWConvMicrokernelTester()
9239 .cr(8)
9240 .kr(9)
9241 .channels(channels)
9242 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08009243 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009244 }
9245 }
9246
9247 TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, zero) {
9248 for (uint32_t mz = 0; mz < 9; mz++) {
9249 for (uint32_t channels = 16; channels < 128; channels += 24) {
9250 DWConvMicrokernelTester()
9251 .cr(8)
9252 .kr(9)
9253 .channels(channels)
9254 .input_offset(176)
9255 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009256 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009257 }
9258 }
9259 }
Marat Dukhan4c617792021-12-21 15:47:58 -08009260#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -07009261
9262
Marat Dukhan4c617792021-12-21 15:47:58 -08009263#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -07009264 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_eq_16) {
9265 DWConvMicrokernelTester()
9266 .cr(16)
9267 .kr(9)
9268 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08009269 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009270 }
9271
9272 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16) {
9273 for (uint32_t channels = 32; channels < 256; channels += 48) {
9274 DWConvMicrokernelTester()
9275 .cr(16)
9276 .kr(9)
9277 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009278 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009279 }
9280 }
9281
9282 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
9283 for (uint32_t channels = 32; channels < 256; channels += 48) {
9284 DWConvMicrokernelTester()
9285 .cr(16)
9286 .kr(9)
9287 .channels(channels)
9288 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009289 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009290 }
9291 }
9292
9293 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
9294 for (uint32_t channels = 32; channels < 256; channels += 48) {
9295 DWConvMicrokernelTester()
9296 .cr(16)
9297 .kr(9)
9298 .channels(channels)
9299 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009300 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009301 }
9302 }
9303
9304 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_lt_16) {
9305 for (uint32_t channels = 1; channels < 16; channels++) {
9306 DWConvMicrokernelTester()
9307 .cr(16)
9308 .kr(9)
9309 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009310 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009311 }
9312 }
9313
9314 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16) {
9315 for (uint32_t channels = 17; channels < 32; channels++) {
9316 DWConvMicrokernelTester()
9317 .cr(16)
9318 .kr(9)
9319 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009320 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009321 }
9322 }
9323
9324 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
9325 for (uint32_t channels = 17; channels < 32; channels++) {
9326 DWConvMicrokernelTester()
9327 .cr(16)
9328 .kr(9)
9329 .channels(channels)
9330 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009331 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009332 }
9333 }
9334
9335 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
9336 for (uint32_t channels = 17; channels < 32; channels++) {
9337 DWConvMicrokernelTester()
9338 .cr(16)
9339 .kr(9)
9340 .channels(channels)
9341 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009342 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009343 }
9344 }
9345
9346 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel) {
9347 for (size_t channels = 1; channels <= 80; channels += 15) {
9348 DWConvMicrokernelTester()
9349 .cr(16)
9350 .kr(9)
9351 .channels(channels)
9352 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009353 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009354 }
9355 }
9356
9357 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
9358 for (size_t channels = 1; channels <= 80; channels += 15) {
9359 for (size_t step = 2; step <= 9; step++) {
9360 DWConvMicrokernelTester()
9361 .cr(16)
9362 .kr(9)
9363 .channels(channels)
9364 .width(3)
9365 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009366 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009367 }
9368 }
9369 }
9370
9371 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
9372 for (size_t channels = 1; channels <= 80; channels += 15) {
9373 DWConvMicrokernelTester()
9374 .cr(16)
9375 .kr(9)
9376 .channels(16)
9377 .width(5)
9378 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009379 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009380 }
9381 }
9382
9383 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
9384 for (size_t channels = 1; channels <= 80; channels += 15) {
9385 DWConvMicrokernelTester()
9386 .cr(16)
9387 .kr(9)
9388 .channels(channels)
9389 .width(3)
9390 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009391 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009392 }
9393 }
9394
9395 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
9396 for (size_t channels = 1; channels <= 80; channels += 15) {
9397 DWConvMicrokernelTester()
9398 .cr(16)
9399 .kr(9)
9400 .channels(channels)
9401 .width(3)
9402 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009403 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009404 }
9405 }
9406
9407 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, input_offset) {
9408 for (uint32_t channels = 32; channels < 256; channels += 48) {
9409 DWConvMicrokernelTester()
9410 .cr(16)
9411 .kr(9)
9412 .channels(channels)
9413 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08009414 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009415 }
9416 }
9417
9418 TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, zero) {
9419 for (uint32_t mz = 0; mz < 9; mz++) {
9420 for (uint32_t channels = 32; channels < 256; channels += 48) {
9421 DWConvMicrokernelTester()
9422 .cr(16)
9423 .kr(9)
9424 .channels(channels)
9425 .input_offset(304)
9426 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009427 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009428 }
9429 }
9430 }
Marat Dukhan4c617792021-12-21 15:47:58 -08009431#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -07009432
9433
Marat Dukhan4c617792021-12-21 15:47:58 -08009434#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -07009435 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_eq_24) {
9436 DWConvMicrokernelTester()
9437 .cr(24)
9438 .kr(9)
9439 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08009440 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009441 }
9442
9443 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24) {
9444 for (uint32_t channels = 48; channels < 384; channels += 72) {
9445 DWConvMicrokernelTester()
9446 .cr(24)
9447 .kr(9)
9448 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009449 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009450 }
9451 }
9452
9453 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
9454 for (uint32_t channels = 48; channels < 384; channels += 72) {
9455 DWConvMicrokernelTester()
9456 .cr(24)
9457 .kr(9)
9458 .channels(channels)
9459 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009460 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009461 }
9462 }
9463
9464 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
9465 for (uint32_t channels = 48; channels < 384; channels += 72) {
9466 DWConvMicrokernelTester()
9467 .cr(24)
9468 .kr(9)
9469 .channels(channels)
9470 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009471 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009472 }
9473 }
9474
9475 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_lt_24) {
9476 for (uint32_t channels = 1; channels < 24; channels++) {
9477 DWConvMicrokernelTester()
9478 .cr(24)
9479 .kr(9)
9480 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009481 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009482 }
9483 }
9484
9485 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24) {
9486 for (uint32_t channels = 25; channels < 48; channels++) {
9487 DWConvMicrokernelTester()
9488 .cr(24)
9489 .kr(9)
9490 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009491 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009492 }
9493 }
9494
9495 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
9496 for (uint32_t channels = 25; channels < 48; channels++) {
9497 DWConvMicrokernelTester()
9498 .cr(24)
9499 .kr(9)
9500 .channels(channels)
9501 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009502 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009503 }
9504 }
9505
9506 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
9507 for (uint32_t channels = 25; channels < 48; channels++) {
9508 DWConvMicrokernelTester()
9509 .cr(24)
9510 .kr(9)
9511 .channels(channels)
9512 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009513 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009514 }
9515 }
9516
9517 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel) {
9518 for (size_t channels = 1; channels <= 120; channels += 23) {
9519 DWConvMicrokernelTester()
9520 .cr(24)
9521 .kr(9)
9522 .channels(channels)
9523 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009524 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009525 }
9526 }
9527
9528 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
9529 for (size_t channels = 1; channels <= 120; channels += 23) {
9530 for (size_t step = 2; step <= 9; step++) {
9531 DWConvMicrokernelTester()
9532 .cr(24)
9533 .kr(9)
9534 .channels(channels)
9535 .width(3)
9536 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009537 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009538 }
9539 }
9540 }
9541
9542 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
9543 for (size_t channels = 1; channels <= 120; channels += 23) {
9544 DWConvMicrokernelTester()
9545 .cr(24)
9546 .kr(9)
9547 .channels(24)
9548 .width(5)
9549 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08009550 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009551 }
9552 }
9553
9554 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
9555 for (size_t channels = 1; channels <= 120; channels += 23) {
9556 DWConvMicrokernelTester()
9557 .cr(24)
9558 .kr(9)
9559 .channels(channels)
9560 .width(3)
9561 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009562 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009563 }
9564 }
9565
9566 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
9567 for (size_t channels = 1; channels <= 120; channels += 23) {
9568 DWConvMicrokernelTester()
9569 .cr(24)
9570 .kr(9)
9571 .channels(channels)
9572 .width(3)
9573 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009574 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009575 }
9576 }
9577
9578 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, input_offset) {
9579 for (uint32_t channels = 48; channels < 384; channels += 72) {
9580 DWConvMicrokernelTester()
9581 .cr(24)
9582 .kr(9)
9583 .channels(channels)
9584 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08009585 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009586 }
9587 }
9588
9589 TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, zero) {
9590 for (uint32_t mz = 0; mz < 9; mz++) {
9591 for (uint32_t channels = 48; channels < 384; channels += 72) {
9592 DWConvMicrokernelTester()
9593 .cr(24)
9594 .kr(9)
9595 .channels(channels)
9596 .input_offset(464)
9597 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009598 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -07009599 }
9600 }
9601 }
Marat Dukhan4c617792021-12-21 15:47:58 -08009602#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -07009603
9604
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009605#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9606 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_eq_1) {
9607 DWConvMicrokernelTester()
9608 .cr(1)
9609 .kr(9)
9610 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009611 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009612 }
9613
9614 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1) {
9615 for (uint32_t channels = 2; channels < 10; channels++) {
9616 DWConvMicrokernelTester()
9617 .cr(1)
9618 .kr(9)
9619 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009620 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009621 }
9622 }
9623
9624 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmin) {
9625 for (uint32_t channels = 2; channels < 10; channels++) {
9626 DWConvMicrokernelTester()
9627 .cr(1)
9628 .kr(9)
9629 .channels(channels)
9630 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009631 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009632 }
9633 }
9634
9635 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmax) {
9636 for (uint32_t channels = 2; channels < 10; channels++) {
9637 DWConvMicrokernelTester()
9638 .cr(1)
9639 .kr(9)
9640 .channels(channels)
9641 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009642 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009643 }
9644 }
9645
9646 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel) {
9647 for (size_t channels = 1; channels <= 5; channels += 1) {
9648 DWConvMicrokernelTester()
9649 .cr(1)
9650 .kr(9)
9651 .channels(channels)
9652 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009653 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009654 }
9655 }
9656
9657 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_step) {
9658 for (size_t channels = 1; channels <= 5; channels += 1) {
9659 for (size_t step = 2; step <= 9; step++) {
9660 DWConvMicrokernelTester()
9661 .cr(1)
9662 .kr(9)
9663 .channels(channels)
9664 .width(3)
9665 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009666 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009667 }
9668 }
9669 }
9670
9671 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_output_stride) {
9672 for (size_t channels = 1; channels <= 5; channels += 1) {
9673 DWConvMicrokernelTester()
9674 .cr(1)
9675 .kr(9)
9676 .channels(1)
9677 .width(5)
9678 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009679 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009680 }
9681 }
9682
9683 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmin) {
9684 for (size_t channels = 1; channels <= 5; channels += 1) {
9685 DWConvMicrokernelTester()
9686 .cr(1)
9687 .kr(9)
9688 .channels(channels)
9689 .width(3)
9690 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009691 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009692 }
9693 }
9694
9695 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmax) {
9696 for (size_t channels = 1; channels <= 5; channels += 1) {
9697 DWConvMicrokernelTester()
9698 .cr(1)
9699 .kr(9)
9700 .channels(channels)
9701 .width(3)
9702 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009703 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009704 }
9705 }
9706
9707 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, input_offset) {
9708 for (uint32_t channels = 2; channels < 16; channels += 3) {
9709 DWConvMicrokernelTester()
9710 .cr(1)
9711 .kr(9)
9712 .channels(channels)
9713 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -08009714 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009715 }
9716 }
9717
9718 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, zero) {
9719 for (uint32_t mz = 0; mz < 9; mz++) {
9720 for (uint32_t channels = 2; channels < 16; channels += 3) {
9721 DWConvMicrokernelTester()
9722 .cr(1)
9723 .kr(9)
9724 .channels(channels)
9725 .input_offset(48)
9726 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009727 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009728 }
9729 }
9730 }
9731#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9732
9733
9734#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9735 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_eq_2) {
9736 DWConvMicrokernelTester()
9737 .cr(2)
9738 .kr(9)
9739 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -08009740 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009741 }
9742
9743 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2) {
9744 for (uint32_t channels = 4; channels < 32; channels += 6) {
9745 DWConvMicrokernelTester()
9746 .cr(2)
9747 .kr(9)
9748 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009749 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009750 }
9751 }
9752
9753 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmin) {
9754 for (uint32_t channels = 4; channels < 32; channels += 6) {
9755 DWConvMicrokernelTester()
9756 .cr(2)
9757 .kr(9)
9758 .channels(channels)
9759 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009760 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009761 }
9762 }
9763
9764 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmax) {
9765 for (uint32_t channels = 4; channels < 32; channels += 6) {
9766 DWConvMicrokernelTester()
9767 .cr(2)
9768 .kr(9)
9769 .channels(channels)
9770 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009771 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009772 }
9773 }
9774
9775 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_lt_2) {
9776 for (uint32_t channels = 1; channels < 2; channels++) {
9777 DWConvMicrokernelTester()
9778 .cr(2)
9779 .kr(9)
9780 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009781 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009782 }
9783 }
9784
9785 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2) {
9786 for (uint32_t channels = 3; channels < 4; channels++) {
9787 DWConvMicrokernelTester()
9788 .cr(2)
9789 .kr(9)
9790 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009791 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009792 }
9793 }
9794
9795 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmin) {
9796 for (uint32_t channels = 3; channels < 4; channels++) {
9797 DWConvMicrokernelTester()
9798 .cr(2)
9799 .kr(9)
9800 .channels(channels)
9801 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009802 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009803 }
9804 }
9805
9806 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmax) {
9807 for (uint32_t channels = 3; channels < 4; channels++) {
9808 DWConvMicrokernelTester()
9809 .cr(2)
9810 .kr(9)
9811 .channels(channels)
9812 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009813 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009814 }
9815 }
9816
9817 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel) {
9818 for (size_t channels = 1; channels <= 10; channels += 1) {
9819 DWConvMicrokernelTester()
9820 .cr(2)
9821 .kr(9)
9822 .channels(channels)
9823 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009824 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009825 }
9826 }
9827
9828 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_step) {
9829 for (size_t channels = 1; channels <= 10; channels += 1) {
9830 for (size_t step = 2; step <= 9; step++) {
9831 DWConvMicrokernelTester()
9832 .cr(2)
9833 .kr(9)
9834 .channels(channels)
9835 .width(3)
9836 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009837 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009838 }
9839 }
9840 }
9841
9842 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_output_stride) {
9843 for (size_t channels = 1; channels <= 10; channels += 1) {
9844 DWConvMicrokernelTester()
9845 .cr(2)
9846 .kr(9)
9847 .channels(2)
9848 .width(5)
9849 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -08009850 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009851 }
9852 }
9853
9854 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmin) {
9855 for (size_t channels = 1; channels <= 10; channels += 1) {
9856 DWConvMicrokernelTester()
9857 .cr(2)
9858 .kr(9)
9859 .channels(channels)
9860 .width(3)
9861 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009862 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009863 }
9864 }
9865
9866 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmax) {
9867 for (size_t channels = 1; channels <= 10; channels += 1) {
9868 DWConvMicrokernelTester()
9869 .cr(2)
9870 .kr(9)
9871 .channels(channels)
9872 .width(3)
9873 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009874 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009875 }
9876 }
9877
9878 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, input_offset) {
9879 for (uint32_t channels = 4; channels < 32; channels += 6) {
9880 DWConvMicrokernelTester()
9881 .cr(2)
9882 .kr(9)
9883 .channels(channels)
9884 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -08009885 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009886 }
9887 }
9888
9889 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, zero) {
9890 for (uint32_t mz = 0; mz < 9; mz++) {
9891 for (uint32_t channels = 4; channels < 32; channels += 6) {
9892 DWConvMicrokernelTester()
9893 .cr(2)
9894 .kr(9)
9895 .channels(channels)
9896 .input_offset(80)
9897 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009898 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009899 }
9900 }
9901 }
9902#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9903
9904
9905#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
9906 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_eq_4) {
9907 DWConvMicrokernelTester()
9908 .cr(4)
9909 .kr(9)
9910 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -08009911 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009912 }
9913
9914 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4) {
9915 for (uint32_t channels = 8; channels < 64; channels += 12) {
9916 DWConvMicrokernelTester()
9917 .cr(4)
9918 .kr(9)
9919 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009920 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009921 }
9922 }
9923
9924 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmin) {
9925 for (uint32_t channels = 8; channels < 64; channels += 12) {
9926 DWConvMicrokernelTester()
9927 .cr(4)
9928 .kr(9)
9929 .channels(channels)
9930 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009931 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009932 }
9933 }
9934
9935 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmax) {
9936 for (uint32_t channels = 8; channels < 64; channels += 12) {
9937 DWConvMicrokernelTester()
9938 .cr(4)
9939 .kr(9)
9940 .channels(channels)
9941 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009942 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009943 }
9944 }
9945
9946 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_lt_4) {
9947 for (uint32_t channels = 1; channels < 4; channels++) {
9948 DWConvMicrokernelTester()
9949 .cr(4)
9950 .kr(9)
9951 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009952 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009953 }
9954 }
9955
9956 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4) {
9957 for (uint32_t channels = 5; channels < 8; channels++) {
9958 DWConvMicrokernelTester()
9959 .cr(4)
9960 .kr(9)
9961 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009962 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009963 }
9964 }
9965
9966 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmin) {
9967 for (uint32_t channels = 5; channels < 8; channels++) {
9968 DWConvMicrokernelTester()
9969 .cr(4)
9970 .kr(9)
9971 .channels(channels)
9972 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009973 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009974 }
9975 }
9976
9977 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmax) {
9978 for (uint32_t channels = 5; channels < 8; channels++) {
9979 DWConvMicrokernelTester()
9980 .cr(4)
9981 .kr(9)
9982 .channels(channels)
9983 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009984 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009985 }
9986 }
9987
9988 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel) {
9989 for (size_t channels = 1; channels <= 20; channels += 3) {
9990 DWConvMicrokernelTester()
9991 .cr(4)
9992 .kr(9)
9993 .channels(channels)
9994 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009995 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08009996 }
9997 }
9998
9999 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_step) {
10000 for (size_t channels = 1; channels <= 20; channels += 3) {
10001 for (size_t step = 2; step <= 9; step++) {
10002 DWConvMicrokernelTester()
10003 .cr(4)
10004 .kr(9)
10005 .channels(channels)
10006 .width(3)
10007 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010008 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080010009 }
10010 }
10011 }
10012
10013 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_output_stride) {
10014 for (size_t channels = 1; channels <= 20; channels += 3) {
10015 DWConvMicrokernelTester()
10016 .cr(4)
10017 .kr(9)
10018 .channels(4)
10019 .width(5)
10020 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080010021 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080010022 }
10023 }
10024
10025 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmin) {
10026 for (size_t channels = 1; channels <= 20; channels += 3) {
10027 DWConvMicrokernelTester()
10028 .cr(4)
10029 .kr(9)
10030 .channels(channels)
10031 .width(3)
10032 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010033 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080010034 }
10035 }
10036
10037 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmax) {
10038 for (size_t channels = 1; channels <= 20; channels += 3) {
10039 DWConvMicrokernelTester()
10040 .cr(4)
10041 .kr(9)
10042 .channels(channels)
10043 .width(3)
10044 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010045 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080010046 }
10047 }
10048
10049 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, input_offset) {
10050 for (uint32_t channels = 8; channels < 64; channels += 12) {
10051 DWConvMicrokernelTester()
10052 .cr(4)
10053 .kr(9)
10054 .channels(channels)
10055 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080010056 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080010057 }
10058 }
10059
10060 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, zero) {
10061 for (uint32_t mz = 0; mz < 9; mz++) {
10062 for (uint32_t channels = 8; channels < 64; channels += 12) {
10063 DWConvMicrokernelTester()
10064 .cr(4)
10065 .kr(9)
10066 .channels(channels)
10067 .input_offset(112)
10068 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010069 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080010070 }
10071 }
10072 }
10073#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10074
10075
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010076TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010077 DWConvMicrokernelTester()
10078 .cr(1)
10079 .kr(9)
10080 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010081 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010082}
10083
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010084TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010085 for (uint32_t channels = 2; channels < 10; channels++) {
10086 DWConvMicrokernelTester()
10087 .cr(1)
10088 .kr(9)
10089 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010090 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010091 }
10092}
10093
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010094TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010095 for (uint32_t channels = 2; channels < 10; channels++) {
10096 DWConvMicrokernelTester()
10097 .cr(1)
10098 .kr(9)
10099 .channels(channels)
10100 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010101 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010102 }
10103}
10104
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010105TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010106 for (uint32_t channels = 2; channels < 10; channels++) {
10107 DWConvMicrokernelTester()
10108 .cr(1)
10109 .kr(9)
10110 .channels(channels)
10111 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010112 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010113 }
10114}
10115
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010116TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010117 for (size_t channels = 1; channels <= 5; channels += 1) {
10118 DWConvMicrokernelTester()
10119 .cr(1)
10120 .kr(9)
10121 .channels(channels)
10122 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010123 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010124 }
10125}
10126
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010127TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010128 for (size_t channels = 1; channels <= 5; channels += 1) {
10129 for (size_t step = 2; step <= 9; step++) {
10130 DWConvMicrokernelTester()
10131 .cr(1)
10132 .kr(9)
10133 .channels(channels)
10134 .width(3)
10135 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010136 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010137 }
10138 }
10139}
10140
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010141TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010142 for (size_t channels = 1; channels <= 5; channels += 1) {
10143 DWConvMicrokernelTester()
10144 .cr(1)
10145 .kr(9)
10146 .channels(1)
10147 .width(5)
10148 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010149 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010150 }
10151}
10152
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010153TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010154 for (size_t channels = 1; channels <= 5; channels += 1) {
10155 DWConvMicrokernelTester()
10156 .cr(1)
10157 .kr(9)
10158 .channels(channels)
10159 .width(3)
10160 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010161 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010162 }
10163}
10164
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010165TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010166 for (size_t channels = 1; channels <= 5; channels += 1) {
10167 DWConvMicrokernelTester()
10168 .cr(1)
10169 .kr(9)
10170 .channels(channels)
10171 .width(3)
10172 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010173 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010174 }
10175}
10176
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010177TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, input_offset) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010178 for (uint32_t channels = 2; channels < 16; channels += 3) {
10179 DWConvMicrokernelTester()
10180 .cr(1)
10181 .kr(9)
10182 .channels(channels)
10183 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080010184 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010185 }
10186}
10187
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010188TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, zero) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010189 for (uint32_t mz = 0; mz < 9; mz++) {
10190 for (uint32_t channels = 2; channels < 16; channels += 3) {
10191 DWConvMicrokernelTester()
10192 .cr(1)
10193 .kr(9)
10194 .channels(channels)
10195 .input_offset(48)
10196 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010197 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010198 }
10199 }
10200}
10201
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010202TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_eq_2) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010203 DWConvMicrokernelTester()
10204 .cr(2)
10205 .kr(9)
10206 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080010207 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010208}
10209
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010210TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010211 for (uint32_t channels = 4; channels < 32; channels += 6) {
10212 DWConvMicrokernelTester()
10213 .cr(2)
10214 .kr(9)
10215 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010216 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010217 }
10218}
10219
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010220TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010221 for (uint32_t channels = 4; channels < 32; channels += 6) {
10222 DWConvMicrokernelTester()
10223 .cr(2)
10224 .kr(9)
10225 .channels(channels)
10226 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010227 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010228 }
10229}
10230
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010231TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010232 for (uint32_t channels = 4; channels < 32; channels += 6) {
10233 DWConvMicrokernelTester()
10234 .cr(2)
10235 .kr(9)
10236 .channels(channels)
10237 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010238 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010239 }
10240}
10241
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010242TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_lt_2) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010243 for (uint32_t channels = 1; channels < 2; channels++) {
10244 DWConvMicrokernelTester()
10245 .cr(2)
10246 .kr(9)
10247 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010248 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010249 }
10250}
10251
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010252TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010253 for (uint32_t channels = 3; channels < 4; channels++) {
10254 DWConvMicrokernelTester()
10255 .cr(2)
10256 .kr(9)
10257 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010258 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010259 }
10260}
10261
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010262TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010263 for (uint32_t channels = 3; channels < 4; channels++) {
10264 DWConvMicrokernelTester()
10265 .cr(2)
10266 .kr(9)
10267 .channels(channels)
10268 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010269 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010270 }
10271}
10272
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010273TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010274 for (uint32_t channels = 3; channels < 4; channels++) {
10275 DWConvMicrokernelTester()
10276 .cr(2)
10277 .kr(9)
10278 .channels(channels)
10279 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010280 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010281 }
10282}
10283
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010284TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010285 for (size_t channels = 1; channels <= 10; channels += 1) {
10286 DWConvMicrokernelTester()
10287 .cr(2)
10288 .kr(9)
10289 .channels(channels)
10290 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010291 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010292 }
10293}
10294
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010295TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010296 for (size_t channels = 1; channels <= 10; channels += 1) {
10297 for (size_t step = 2; step <= 9; step++) {
10298 DWConvMicrokernelTester()
10299 .cr(2)
10300 .kr(9)
10301 .channels(channels)
10302 .width(3)
10303 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010304 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010305 }
10306 }
10307}
10308
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010309TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010310 for (size_t channels = 1; channels <= 10; channels += 1) {
10311 DWConvMicrokernelTester()
10312 .cr(2)
10313 .kr(9)
10314 .channels(2)
10315 .width(5)
10316 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080010317 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010318 }
10319}
10320
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010321TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010322 for (size_t channels = 1; channels <= 10; channels += 1) {
10323 DWConvMicrokernelTester()
10324 .cr(2)
10325 .kr(9)
10326 .channels(channels)
10327 .width(3)
10328 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010329 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010330 }
10331}
10332
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010333TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010334 for (size_t channels = 1; channels <= 10; channels += 1) {
10335 DWConvMicrokernelTester()
10336 .cr(2)
10337 .kr(9)
10338 .channels(channels)
10339 .width(3)
10340 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010341 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010342 }
10343}
10344
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010345TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, input_offset) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010346 for (uint32_t channels = 4; channels < 32; channels += 6) {
10347 DWConvMicrokernelTester()
10348 .cr(2)
10349 .kr(9)
10350 .channels(channels)
10351 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080010352 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010353 }
10354}
10355
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010356TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, zero) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010357 for (uint32_t mz = 0; mz < 9; mz++) {
10358 for (uint32_t channels = 4; channels < 32; channels += 6) {
10359 DWConvMicrokernelTester()
10360 .cr(2)
10361 .kr(9)
10362 .channels(channels)
10363 .input_offset(80)
10364 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010365 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010366 }
10367 }
10368}
10369
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010370TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_eq_4) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010371 DWConvMicrokernelTester()
10372 .cr(4)
10373 .kr(9)
10374 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080010375 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010376}
10377
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010378TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010379 for (uint32_t channels = 8; channels < 64; channels += 12) {
10380 DWConvMicrokernelTester()
10381 .cr(4)
10382 .kr(9)
10383 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010384 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010385 }
10386}
10387
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010388TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010389 for (uint32_t channels = 8; channels < 64; channels += 12) {
10390 DWConvMicrokernelTester()
10391 .cr(4)
10392 .kr(9)
10393 .channels(channels)
10394 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010395 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010396 }
10397}
10398
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010399TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010400 for (uint32_t channels = 8; channels < 64; channels += 12) {
10401 DWConvMicrokernelTester()
10402 .cr(4)
10403 .kr(9)
10404 .channels(channels)
10405 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010406 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010407 }
10408}
10409
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010410TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_lt_4) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010411 for (uint32_t channels = 1; channels < 4; channels++) {
10412 DWConvMicrokernelTester()
10413 .cr(4)
10414 .kr(9)
10415 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010416 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010417 }
10418}
10419
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010420TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010421 for (uint32_t channels = 5; channels < 8; channels++) {
10422 DWConvMicrokernelTester()
10423 .cr(4)
10424 .kr(9)
10425 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010426 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010427 }
10428}
10429
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010430TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010431 for (uint32_t channels = 5; channels < 8; channels++) {
10432 DWConvMicrokernelTester()
10433 .cr(4)
10434 .kr(9)
10435 .channels(channels)
10436 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010437 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010438 }
10439}
10440
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010441TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010442 for (uint32_t channels = 5; channels < 8; channels++) {
10443 DWConvMicrokernelTester()
10444 .cr(4)
10445 .kr(9)
10446 .channels(channels)
10447 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010448 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010449 }
10450}
10451
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010452TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010453 for (size_t channels = 1; channels <= 20; channels += 3) {
10454 DWConvMicrokernelTester()
10455 .cr(4)
10456 .kr(9)
10457 .channels(channels)
10458 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010459 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010460 }
10461}
10462
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010463TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010464 for (size_t channels = 1; channels <= 20; channels += 3) {
10465 for (size_t step = 2; step <= 9; step++) {
10466 DWConvMicrokernelTester()
10467 .cr(4)
10468 .kr(9)
10469 .channels(channels)
10470 .width(3)
10471 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010472 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010473 }
10474 }
10475}
10476
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010477TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010478 for (size_t channels = 1; channels <= 20; channels += 3) {
10479 DWConvMicrokernelTester()
10480 .cr(4)
10481 .kr(9)
10482 .channels(4)
10483 .width(5)
10484 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080010485 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010486 }
10487}
10488
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010489TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010490 for (size_t channels = 1; channels <= 20; channels += 3) {
10491 DWConvMicrokernelTester()
10492 .cr(4)
10493 .kr(9)
10494 .channels(channels)
10495 .width(3)
10496 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010497 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010498 }
10499}
10500
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010501TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010502 for (size_t channels = 1; channels <= 20; channels += 3) {
10503 DWConvMicrokernelTester()
10504 .cr(4)
10505 .kr(9)
10506 .channels(channels)
10507 .width(3)
10508 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010509 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010510 }
10511}
10512
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010513TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, input_offset) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010514 for (uint32_t channels = 8; channels < 64; channels += 12) {
10515 DWConvMicrokernelTester()
10516 .cr(4)
10517 .kr(9)
10518 .channels(channels)
10519 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080010520 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010521 }
10522}
10523
Marat Dukhan2ac722e2022-01-04 01:54:20 -080010524TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, zero) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070010525 for (uint32_t mz = 0; mz < 9; mz++) {
10526 for (uint32_t channels = 8; channels < 64; channels += 12) {
10527 DWConvMicrokernelTester()
10528 .cr(4)
10529 .kr(9)
10530 .channels(channels)
10531 .input_offset(112)
10532 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010533 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070010534 }
10535 }
10536}
10537
Marat Dukhan272d4d92022-01-04 15:07:14 -080010538TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
10539 DWConvMicrokernelTester()
10540 .cr(1)
10541 .kr(9)
10542 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010543 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010544}
10545
10546TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
10547 for (uint32_t channels = 2; channels < 10; channels++) {
10548 DWConvMicrokernelTester()
10549 .cr(1)
10550 .kr(9)
10551 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010552 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010553 }
10554}
10555
10556TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
10557 for (uint32_t channels = 2; channels < 10; channels++) {
10558 DWConvMicrokernelTester()
10559 .cr(1)
10560 .kr(9)
10561 .channels(channels)
10562 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010563 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010564 }
10565}
10566
10567TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
10568 for (uint32_t channels = 2; channels < 10; channels++) {
10569 DWConvMicrokernelTester()
10570 .cr(1)
10571 .kr(9)
10572 .channels(channels)
10573 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010574 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010575 }
10576}
10577
10578TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
10579 for (size_t channels = 1; channels <= 5; channels += 1) {
10580 DWConvMicrokernelTester()
10581 .cr(1)
10582 .kr(9)
10583 .channels(channels)
10584 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010585 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010586 }
10587}
10588
10589TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
10590 for (size_t channels = 1; channels <= 5; channels += 1) {
10591 for (size_t step = 2; step <= 9; step++) {
10592 DWConvMicrokernelTester()
10593 .cr(1)
10594 .kr(9)
10595 .channels(channels)
10596 .width(3)
10597 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010598 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010599 }
10600 }
10601}
10602
10603TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
10604 for (size_t channels = 1; channels <= 5; channels += 1) {
10605 DWConvMicrokernelTester()
10606 .cr(1)
10607 .kr(9)
10608 .channels(1)
10609 .width(5)
10610 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010611 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010612 }
10613}
10614
10615TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
10616 for (size_t channels = 1; channels <= 5; channels += 1) {
10617 DWConvMicrokernelTester()
10618 .cr(1)
10619 .kr(9)
10620 .channels(channels)
10621 .width(3)
10622 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010623 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010624 }
10625}
10626
10627TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
10628 for (size_t channels = 1; channels <= 5; channels += 1) {
10629 DWConvMicrokernelTester()
10630 .cr(1)
10631 .kr(9)
10632 .channels(channels)
10633 .width(3)
10634 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010635 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010636 }
10637}
10638
10639TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
10640 for (uint32_t channels = 2; channels < 16; channels += 3) {
10641 DWConvMicrokernelTester()
10642 .cr(1)
10643 .kr(9)
10644 .channels(channels)
10645 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080010646 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010647 }
10648}
10649
10650TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
10651 for (uint32_t mz = 0; mz < 9; mz++) {
10652 for (uint32_t channels = 2; channels < 16; channels += 3) {
10653 DWConvMicrokernelTester()
10654 .cr(1)
10655 .kr(9)
10656 .channels(channels)
10657 .input_offset(48)
10658 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010659 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010660 }
10661 }
10662}
10663
10664TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
10665 DWConvMicrokernelTester()
10666 .cr(2)
10667 .kr(9)
10668 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080010669 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010670}
10671
10672TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
10673 for (uint32_t channels = 4; channels < 32; channels += 6) {
10674 DWConvMicrokernelTester()
10675 .cr(2)
10676 .kr(9)
10677 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010678 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010679 }
10680}
10681
10682TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
10683 for (uint32_t channels = 4; channels < 32; channels += 6) {
10684 DWConvMicrokernelTester()
10685 .cr(2)
10686 .kr(9)
10687 .channels(channels)
10688 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010689 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010690 }
10691}
10692
10693TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
10694 for (uint32_t channels = 4; channels < 32; channels += 6) {
10695 DWConvMicrokernelTester()
10696 .cr(2)
10697 .kr(9)
10698 .channels(channels)
10699 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010700 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010701 }
10702}
10703
10704TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
10705 for (uint32_t channels = 1; channels < 2; channels++) {
10706 DWConvMicrokernelTester()
10707 .cr(2)
10708 .kr(9)
10709 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010710 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010711 }
10712}
10713
10714TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
10715 for (uint32_t channels = 3; channels < 4; channels++) {
10716 DWConvMicrokernelTester()
10717 .cr(2)
10718 .kr(9)
10719 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010720 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010721 }
10722}
10723
10724TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
10725 for (uint32_t channels = 3; channels < 4; channels++) {
10726 DWConvMicrokernelTester()
10727 .cr(2)
10728 .kr(9)
10729 .channels(channels)
10730 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010731 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010732 }
10733}
10734
10735TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
10736 for (uint32_t channels = 3; channels < 4; channels++) {
10737 DWConvMicrokernelTester()
10738 .cr(2)
10739 .kr(9)
10740 .channels(channels)
10741 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010742 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010743 }
10744}
10745
10746TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
10747 for (size_t channels = 1; channels <= 10; channels += 1) {
10748 DWConvMicrokernelTester()
10749 .cr(2)
10750 .kr(9)
10751 .channels(channels)
10752 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010753 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010754 }
10755}
10756
10757TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
10758 for (size_t channels = 1; channels <= 10; channels += 1) {
10759 for (size_t step = 2; step <= 9; step++) {
10760 DWConvMicrokernelTester()
10761 .cr(2)
10762 .kr(9)
10763 .channels(channels)
10764 .width(3)
10765 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010766 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010767 }
10768 }
10769}
10770
10771TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
10772 for (size_t channels = 1; channels <= 10; channels += 1) {
10773 DWConvMicrokernelTester()
10774 .cr(2)
10775 .kr(9)
10776 .channels(2)
10777 .width(5)
10778 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080010779 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010780 }
10781}
10782
10783TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
10784 for (size_t channels = 1; channels <= 10; channels += 1) {
10785 DWConvMicrokernelTester()
10786 .cr(2)
10787 .kr(9)
10788 .channels(channels)
10789 .width(3)
10790 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010791 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010792 }
10793}
10794
10795TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
10796 for (size_t channels = 1; channels <= 10; channels += 1) {
10797 DWConvMicrokernelTester()
10798 .cr(2)
10799 .kr(9)
10800 .channels(channels)
10801 .width(3)
10802 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010803 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010804 }
10805}
10806
10807TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
10808 for (uint32_t channels = 4; channels < 32; channels += 6) {
10809 DWConvMicrokernelTester()
10810 .cr(2)
10811 .kr(9)
10812 .channels(channels)
10813 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080010814 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010815 }
10816}
10817
10818TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
10819 for (uint32_t mz = 0; mz < 9; mz++) {
10820 for (uint32_t channels = 4; channels < 32; channels += 6) {
10821 DWConvMicrokernelTester()
10822 .cr(2)
10823 .kr(9)
10824 .channels(channels)
10825 .input_offset(80)
10826 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010827 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010828 }
10829 }
10830}
10831
10832TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
10833 DWConvMicrokernelTester()
10834 .cr(4)
10835 .kr(9)
10836 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080010837 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010838}
10839
10840TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
10841 for (uint32_t channels = 8; channels < 64; channels += 12) {
10842 DWConvMicrokernelTester()
10843 .cr(4)
10844 .kr(9)
10845 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010846 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010847 }
10848}
10849
10850TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
10851 for (uint32_t channels = 8; channels < 64; channels += 12) {
10852 DWConvMicrokernelTester()
10853 .cr(4)
10854 .kr(9)
10855 .channels(channels)
10856 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010857 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010858 }
10859}
10860
10861TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
10862 for (uint32_t channels = 8; channels < 64; channels += 12) {
10863 DWConvMicrokernelTester()
10864 .cr(4)
10865 .kr(9)
10866 .channels(channels)
10867 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010868 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010869 }
10870}
10871
10872TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
10873 for (uint32_t channels = 1; channels < 4; channels++) {
10874 DWConvMicrokernelTester()
10875 .cr(4)
10876 .kr(9)
10877 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010878 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010879 }
10880}
10881
10882TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
10883 for (uint32_t channels = 5; channels < 8; channels++) {
10884 DWConvMicrokernelTester()
10885 .cr(4)
10886 .kr(9)
10887 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010888 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010889 }
10890}
10891
10892TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
10893 for (uint32_t channels = 5; channels < 8; channels++) {
10894 DWConvMicrokernelTester()
10895 .cr(4)
10896 .kr(9)
10897 .channels(channels)
10898 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010899 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010900 }
10901}
10902
10903TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
10904 for (uint32_t channels = 5; channels < 8; channels++) {
10905 DWConvMicrokernelTester()
10906 .cr(4)
10907 .kr(9)
10908 .channels(channels)
10909 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010910 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010911 }
10912}
10913
10914TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
10915 for (size_t channels = 1; channels <= 20; channels += 3) {
10916 DWConvMicrokernelTester()
10917 .cr(4)
10918 .kr(9)
10919 .channels(channels)
10920 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010921 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010922 }
10923}
10924
10925TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
10926 for (size_t channels = 1; channels <= 20; channels += 3) {
10927 for (size_t step = 2; step <= 9; step++) {
10928 DWConvMicrokernelTester()
10929 .cr(4)
10930 .kr(9)
10931 .channels(channels)
10932 .width(3)
10933 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010934 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010935 }
10936 }
10937}
10938
10939TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
10940 for (size_t channels = 1; channels <= 20; channels += 3) {
10941 DWConvMicrokernelTester()
10942 .cr(4)
10943 .kr(9)
10944 .channels(4)
10945 .width(5)
10946 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080010947 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010948 }
10949}
10950
10951TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
10952 for (size_t channels = 1; channels <= 20; channels += 3) {
10953 DWConvMicrokernelTester()
10954 .cr(4)
10955 .kr(9)
10956 .channels(channels)
10957 .width(3)
10958 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010959 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010960 }
10961}
10962
10963TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
10964 for (size_t channels = 1; channels <= 20; channels += 3) {
10965 DWConvMicrokernelTester()
10966 .cr(4)
10967 .kr(9)
10968 .channels(channels)
10969 .width(3)
10970 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010971 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010972 }
10973}
10974
10975TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
10976 for (uint32_t channels = 8; channels < 64; channels += 12) {
10977 DWConvMicrokernelTester()
10978 .cr(4)
10979 .kr(9)
10980 .channels(channels)
10981 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080010982 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010983 }
10984}
10985
10986TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
10987 for (uint32_t mz = 0; mz < 9; mz++) {
10988 for (uint32_t channels = 8; channels < 64; channels += 12) {
10989 DWConvMicrokernelTester()
10990 .cr(4)
10991 .kr(9)
10992 .channels(channels)
10993 .input_offset(112)
10994 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010995 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080010996 }
10997 }
10998}
10999
11000TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
11001 DWConvMicrokernelTester()
11002 .cr(1)
11003 .kr(9)
11004 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011005 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011006}
11007
11008TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
11009 for (uint32_t channels = 2; channels < 10; channels++) {
11010 DWConvMicrokernelTester()
11011 .cr(1)
11012 .kr(9)
11013 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011014 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011015 }
11016}
11017
11018TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
11019 for (uint32_t channels = 2; channels < 10; channels++) {
11020 DWConvMicrokernelTester()
11021 .cr(1)
11022 .kr(9)
11023 .channels(channels)
11024 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011025 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011026 }
11027}
11028
11029TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
11030 for (uint32_t channels = 2; channels < 10; channels++) {
11031 DWConvMicrokernelTester()
11032 .cr(1)
11033 .kr(9)
11034 .channels(channels)
11035 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011036 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011037 }
11038}
11039
11040TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
11041 for (size_t channels = 1; channels <= 5; channels += 1) {
11042 DWConvMicrokernelTester()
11043 .cr(1)
11044 .kr(9)
11045 .channels(channels)
11046 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011047 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011048 }
11049}
11050
11051TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
11052 for (size_t channels = 1; channels <= 5; channels += 1) {
11053 for (size_t step = 2; step <= 9; step++) {
11054 DWConvMicrokernelTester()
11055 .cr(1)
11056 .kr(9)
11057 .channels(channels)
11058 .width(3)
11059 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011060 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011061 }
11062 }
11063}
11064
11065TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
11066 for (size_t channels = 1; channels <= 5; channels += 1) {
11067 DWConvMicrokernelTester()
11068 .cr(1)
11069 .kr(9)
11070 .channels(1)
11071 .width(5)
11072 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011073 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011074 }
11075}
11076
11077TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
11078 for (size_t channels = 1; channels <= 5; channels += 1) {
11079 DWConvMicrokernelTester()
11080 .cr(1)
11081 .kr(9)
11082 .channels(channels)
11083 .width(3)
11084 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011085 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011086 }
11087}
11088
11089TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
11090 for (size_t channels = 1; channels <= 5; channels += 1) {
11091 DWConvMicrokernelTester()
11092 .cr(1)
11093 .kr(9)
11094 .channels(channels)
11095 .width(3)
11096 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011097 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011098 }
11099}
11100
11101TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
11102 for (uint32_t channels = 2; channels < 16; channels += 3) {
11103 DWConvMicrokernelTester()
11104 .cr(1)
11105 .kr(9)
11106 .channels(channels)
11107 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080011108 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011109 }
11110}
11111
11112TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
11113 for (uint32_t mz = 0; mz < 9; mz++) {
11114 for (uint32_t channels = 2; channels < 16; channels += 3) {
11115 DWConvMicrokernelTester()
11116 .cr(1)
11117 .kr(9)
11118 .channels(channels)
11119 .input_offset(48)
11120 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011121 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011122 }
11123 }
11124}
11125
11126TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
11127 DWConvMicrokernelTester()
11128 .cr(2)
11129 .kr(9)
11130 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080011131 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011132}
11133
11134TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
11135 for (uint32_t channels = 4; channels < 32; channels += 6) {
11136 DWConvMicrokernelTester()
11137 .cr(2)
11138 .kr(9)
11139 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011140 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011141 }
11142}
11143
11144TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
11145 for (uint32_t channels = 4; channels < 32; channels += 6) {
11146 DWConvMicrokernelTester()
11147 .cr(2)
11148 .kr(9)
11149 .channels(channels)
11150 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011151 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011152 }
11153}
11154
11155TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
11156 for (uint32_t channels = 4; channels < 32; channels += 6) {
11157 DWConvMicrokernelTester()
11158 .cr(2)
11159 .kr(9)
11160 .channels(channels)
11161 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011162 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011163 }
11164}
11165
11166TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
11167 for (uint32_t channels = 1; channels < 2; channels++) {
11168 DWConvMicrokernelTester()
11169 .cr(2)
11170 .kr(9)
11171 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011172 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011173 }
11174}
11175
11176TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
11177 for (uint32_t channels = 3; channels < 4; channels++) {
11178 DWConvMicrokernelTester()
11179 .cr(2)
11180 .kr(9)
11181 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011182 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011183 }
11184}
11185
11186TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
11187 for (uint32_t channels = 3; channels < 4; channels++) {
11188 DWConvMicrokernelTester()
11189 .cr(2)
11190 .kr(9)
11191 .channels(channels)
11192 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011193 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011194 }
11195}
11196
11197TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
11198 for (uint32_t channels = 3; channels < 4; channels++) {
11199 DWConvMicrokernelTester()
11200 .cr(2)
11201 .kr(9)
11202 .channels(channels)
11203 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011204 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011205 }
11206}
11207
11208TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
11209 for (size_t channels = 1; channels <= 10; channels += 1) {
11210 DWConvMicrokernelTester()
11211 .cr(2)
11212 .kr(9)
11213 .channels(channels)
11214 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011215 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011216 }
11217}
11218
11219TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
11220 for (size_t channels = 1; channels <= 10; channels += 1) {
11221 for (size_t step = 2; step <= 9; step++) {
11222 DWConvMicrokernelTester()
11223 .cr(2)
11224 .kr(9)
11225 .channels(channels)
11226 .width(3)
11227 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011228 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011229 }
11230 }
11231}
11232
11233TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
11234 for (size_t channels = 1; channels <= 10; channels += 1) {
11235 DWConvMicrokernelTester()
11236 .cr(2)
11237 .kr(9)
11238 .channels(2)
11239 .width(5)
11240 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080011241 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011242 }
11243}
11244
11245TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
11246 for (size_t channels = 1; channels <= 10; channels += 1) {
11247 DWConvMicrokernelTester()
11248 .cr(2)
11249 .kr(9)
11250 .channels(channels)
11251 .width(3)
11252 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011253 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011254 }
11255}
11256
11257TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
11258 for (size_t channels = 1; channels <= 10; channels += 1) {
11259 DWConvMicrokernelTester()
11260 .cr(2)
11261 .kr(9)
11262 .channels(channels)
11263 .width(3)
11264 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011265 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011266 }
11267}
11268
11269TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
11270 for (uint32_t channels = 4; channels < 32; channels += 6) {
11271 DWConvMicrokernelTester()
11272 .cr(2)
11273 .kr(9)
11274 .channels(channels)
11275 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080011276 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011277 }
11278}
11279
11280TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
11281 for (uint32_t mz = 0; mz < 9; mz++) {
11282 for (uint32_t channels = 4; channels < 32; channels += 6) {
11283 DWConvMicrokernelTester()
11284 .cr(2)
11285 .kr(9)
11286 .channels(channels)
11287 .input_offset(80)
11288 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011289 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011290 }
11291 }
11292}
11293
11294TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
11295 DWConvMicrokernelTester()
11296 .cr(4)
11297 .kr(9)
11298 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080011299 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011300}
11301
11302TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
11303 for (uint32_t channels = 8; channels < 64; channels += 12) {
11304 DWConvMicrokernelTester()
11305 .cr(4)
11306 .kr(9)
11307 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011308 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011309 }
11310}
11311
11312TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
11313 for (uint32_t channels = 8; channels < 64; channels += 12) {
11314 DWConvMicrokernelTester()
11315 .cr(4)
11316 .kr(9)
11317 .channels(channels)
11318 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011319 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011320 }
11321}
11322
11323TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
11324 for (uint32_t channels = 8; channels < 64; channels += 12) {
11325 DWConvMicrokernelTester()
11326 .cr(4)
11327 .kr(9)
11328 .channels(channels)
11329 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011330 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011331 }
11332}
11333
11334TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
11335 for (uint32_t channels = 1; channels < 4; channels++) {
11336 DWConvMicrokernelTester()
11337 .cr(4)
11338 .kr(9)
11339 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011340 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011341 }
11342}
11343
11344TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
11345 for (uint32_t channels = 5; channels < 8; channels++) {
11346 DWConvMicrokernelTester()
11347 .cr(4)
11348 .kr(9)
11349 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011350 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011351 }
11352}
11353
11354TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
11355 for (uint32_t channels = 5; channels < 8; channels++) {
11356 DWConvMicrokernelTester()
11357 .cr(4)
11358 .kr(9)
11359 .channels(channels)
11360 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011361 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011362 }
11363}
11364
11365TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
11366 for (uint32_t channels = 5; channels < 8; channels++) {
11367 DWConvMicrokernelTester()
11368 .cr(4)
11369 .kr(9)
11370 .channels(channels)
11371 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011372 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011373 }
11374}
11375
11376TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
11377 for (size_t channels = 1; channels <= 20; channels += 3) {
11378 DWConvMicrokernelTester()
11379 .cr(4)
11380 .kr(9)
11381 .channels(channels)
11382 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011383 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011384 }
11385}
11386
11387TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
11388 for (size_t channels = 1; channels <= 20; channels += 3) {
11389 for (size_t step = 2; step <= 9; step++) {
11390 DWConvMicrokernelTester()
11391 .cr(4)
11392 .kr(9)
11393 .channels(channels)
11394 .width(3)
11395 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011396 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011397 }
11398 }
11399}
11400
11401TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
11402 for (size_t channels = 1; channels <= 20; channels += 3) {
11403 DWConvMicrokernelTester()
11404 .cr(4)
11405 .kr(9)
11406 .channels(4)
11407 .width(5)
11408 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080011409 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011410 }
11411}
11412
11413TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
11414 for (size_t channels = 1; channels <= 20; channels += 3) {
11415 DWConvMicrokernelTester()
11416 .cr(4)
11417 .kr(9)
11418 .channels(channels)
11419 .width(3)
11420 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011421 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011422 }
11423}
11424
11425TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
11426 for (size_t channels = 1; channels <= 20; channels += 3) {
11427 DWConvMicrokernelTester()
11428 .cr(4)
11429 .kr(9)
11430 .channels(channels)
11431 .width(3)
11432 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011433 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011434 }
11435}
11436
11437TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
11438 for (uint32_t channels = 8; channels < 64; channels += 12) {
11439 DWConvMicrokernelTester()
11440 .cr(4)
11441 .kr(9)
11442 .channels(channels)
11443 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080011444 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011445 }
11446}
11447
11448TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
11449 for (uint32_t mz = 0; mz < 9; mz++) {
11450 for (uint32_t channels = 8; channels < 64; channels += 12) {
11451 DWConvMicrokernelTester()
11452 .cr(4)
11453 .kr(9)
11454 .channels(channels)
11455 .input_offset(112)
11456 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011457 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080011458 }
11459 }
11460}
11461
Marat Dukhan6f905292021-06-25 11:12:05 -070011462#if XNN_ARCH_ARM || XNN_ARCH_ARM64
11463 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_eq_8) {
11464 TEST_REQUIRES_ARM_NEON;
11465 DWConvMicrokernelTester()
11466 .cr(8)
11467 .kr(25)
11468 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011469 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011470 }
11471
11472 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8) {
11473 TEST_REQUIRES_ARM_NEON;
11474 for (uint32_t channels = 16; channels < 128; channels += 24) {
11475 DWConvMicrokernelTester()
11476 .cr(8)
11477 .kr(25)
11478 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011479 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011480 }
11481 }
11482
11483 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
11484 TEST_REQUIRES_ARM_NEON;
11485 for (uint32_t channels = 16; channels < 128; channels += 24) {
11486 DWConvMicrokernelTester()
11487 .cr(8)
11488 .kr(25)
11489 .channels(channels)
11490 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011491 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011492 }
11493 }
11494
11495 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
11496 TEST_REQUIRES_ARM_NEON;
11497 for (uint32_t channels = 16; channels < 128; channels += 24) {
11498 DWConvMicrokernelTester()
11499 .cr(8)
11500 .kr(25)
11501 .channels(channels)
11502 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011503 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011504 }
11505 }
11506
11507 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_lt_8) {
11508 TEST_REQUIRES_ARM_NEON;
11509 for (uint32_t channels = 1; channels < 8; channels++) {
11510 DWConvMicrokernelTester()
11511 .cr(8)
11512 .kr(25)
11513 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011514 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011515 }
11516 }
11517
11518 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8) {
11519 TEST_REQUIRES_ARM_NEON;
11520 for (uint32_t channels = 9; channels < 16; channels++) {
11521 DWConvMicrokernelTester()
11522 .cr(8)
11523 .kr(25)
11524 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011525 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011526 }
11527 }
11528
11529 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
11530 TEST_REQUIRES_ARM_NEON;
11531 for (uint32_t channels = 9; channels < 16; channels++) {
11532 DWConvMicrokernelTester()
11533 .cr(8)
11534 .kr(25)
11535 .channels(channels)
11536 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011537 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011538 }
11539 }
11540
11541 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
11542 TEST_REQUIRES_ARM_NEON;
11543 for (uint32_t channels = 9; channels < 16; channels++) {
11544 DWConvMicrokernelTester()
11545 .cr(8)
11546 .kr(25)
11547 .channels(channels)
11548 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011549 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011550 }
11551 }
11552
11553 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel) {
11554 TEST_REQUIRES_ARM_NEON;
11555 for (size_t channels = 1; channels <= 40; channels += 7) {
11556 DWConvMicrokernelTester()
11557 .cr(8)
11558 .kr(25)
11559 .channels(channels)
11560 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011561 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011562 }
11563 }
11564
11565 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_step) {
11566 TEST_REQUIRES_ARM_NEON;
11567 for (size_t channels = 1; channels <= 40; channels += 7) {
11568 for (size_t step = 2; step <= 25; step++) {
11569 DWConvMicrokernelTester()
11570 .cr(8)
11571 .kr(25)
11572 .channels(channels)
11573 .width(3)
11574 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011575 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011576 }
11577 }
11578 }
11579
11580 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
11581 TEST_REQUIRES_ARM_NEON;
11582 for (size_t channels = 1; channels <= 40; channels += 7) {
11583 DWConvMicrokernelTester()
11584 .cr(8)
11585 .kr(25)
11586 .channels(8)
11587 .width(5)
11588 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011589 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011590 }
11591 }
11592
11593 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmin) {
11594 TEST_REQUIRES_ARM_NEON;
11595 for (size_t channels = 1; channels <= 40; channels += 7) {
11596 DWConvMicrokernelTester()
11597 .cr(8)
11598 .kr(25)
11599 .channels(channels)
11600 .width(3)
11601 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011602 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011603 }
11604 }
11605
11606 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmax) {
11607 TEST_REQUIRES_ARM_NEON;
11608 for (size_t channels = 1; channels <= 40; channels += 7) {
11609 DWConvMicrokernelTester()
11610 .cr(8)
11611 .kr(25)
11612 .channels(channels)
11613 .width(3)
11614 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011615 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011616 }
11617 }
11618
11619 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, input_offset) {
11620 TEST_REQUIRES_ARM_NEON;
11621 for (uint32_t channels = 16; channels < 128; channels += 24) {
11622 DWConvMicrokernelTester()
11623 .cr(8)
11624 .kr(25)
11625 .channels(channels)
11626 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080011627 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011628 }
11629 }
11630
11631 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, zero) {
11632 TEST_REQUIRES_ARM_NEON;
11633 for (uint32_t mz = 0; mz < 25; mz++) {
11634 for (uint32_t channels = 16; channels < 128; channels += 24) {
11635 DWConvMicrokernelTester()
11636 .cr(8)
11637 .kr(25)
11638 .channels(channels)
11639 .input_offset(176)
11640 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011641 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011642 }
11643 }
11644 }
11645#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11646
11647
11648#if XNN_ARCH_ARM || XNN_ARCH_ARM64
11649 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_eq_16) {
11650 TEST_REQUIRES_ARM_NEON;
11651 DWConvMicrokernelTester()
11652 .cr(16)
11653 .kr(25)
11654 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080011655 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011656 }
11657
11658 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16) {
11659 TEST_REQUIRES_ARM_NEON;
11660 for (uint32_t channels = 32; channels < 256; channels += 48) {
11661 DWConvMicrokernelTester()
11662 .cr(16)
11663 .kr(25)
11664 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011665 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011666 }
11667 }
11668
11669 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
11670 TEST_REQUIRES_ARM_NEON;
11671 for (uint32_t channels = 32; channels < 256; channels += 48) {
11672 DWConvMicrokernelTester()
11673 .cr(16)
11674 .kr(25)
11675 .channels(channels)
11676 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011677 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011678 }
11679 }
11680
11681 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
11682 TEST_REQUIRES_ARM_NEON;
11683 for (uint32_t channels = 32; channels < 256; channels += 48) {
11684 DWConvMicrokernelTester()
11685 .cr(16)
11686 .kr(25)
11687 .channels(channels)
11688 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011689 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011690 }
11691 }
11692
11693 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_lt_16) {
11694 TEST_REQUIRES_ARM_NEON;
11695 for (uint32_t channels = 1; channels < 16; channels++) {
11696 DWConvMicrokernelTester()
11697 .cr(16)
11698 .kr(25)
11699 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011700 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011701 }
11702 }
11703
11704 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16) {
11705 TEST_REQUIRES_ARM_NEON;
11706 for (uint32_t channels = 17; channels < 32; channels++) {
11707 DWConvMicrokernelTester()
11708 .cr(16)
11709 .kr(25)
11710 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011711 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011712 }
11713 }
11714
11715 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
11716 TEST_REQUIRES_ARM_NEON;
11717 for (uint32_t channels = 17; channels < 32; channels++) {
11718 DWConvMicrokernelTester()
11719 .cr(16)
11720 .kr(25)
11721 .channels(channels)
11722 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011723 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011724 }
11725 }
11726
11727 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
11728 TEST_REQUIRES_ARM_NEON;
11729 for (uint32_t channels = 17; channels < 32; channels++) {
11730 DWConvMicrokernelTester()
11731 .cr(16)
11732 .kr(25)
11733 .channels(channels)
11734 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011735 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011736 }
11737 }
11738
11739 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel) {
11740 TEST_REQUIRES_ARM_NEON;
11741 for (size_t channels = 1; channels <= 80; channels += 15) {
11742 DWConvMicrokernelTester()
11743 .cr(16)
11744 .kr(25)
11745 .channels(channels)
11746 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011747 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011748 }
11749 }
11750
11751 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_step) {
11752 TEST_REQUIRES_ARM_NEON;
11753 for (size_t channels = 1; channels <= 80; channels += 15) {
11754 for (size_t step = 2; step <= 25; step++) {
11755 DWConvMicrokernelTester()
11756 .cr(16)
11757 .kr(25)
11758 .channels(channels)
11759 .width(3)
11760 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011761 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011762 }
11763 }
11764 }
11765
11766 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
11767 TEST_REQUIRES_ARM_NEON;
11768 for (size_t channels = 1; channels <= 80; channels += 15) {
11769 DWConvMicrokernelTester()
11770 .cr(16)
11771 .kr(25)
11772 .channels(16)
11773 .width(5)
11774 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011775 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011776 }
11777 }
11778
11779 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmin) {
11780 TEST_REQUIRES_ARM_NEON;
11781 for (size_t channels = 1; channels <= 80; channels += 15) {
11782 DWConvMicrokernelTester()
11783 .cr(16)
11784 .kr(25)
11785 .channels(channels)
11786 .width(3)
11787 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011788 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011789 }
11790 }
11791
11792 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmax) {
11793 TEST_REQUIRES_ARM_NEON;
11794 for (size_t channels = 1; channels <= 80; channels += 15) {
11795 DWConvMicrokernelTester()
11796 .cr(16)
11797 .kr(25)
11798 .channels(channels)
11799 .width(3)
11800 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011801 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011802 }
11803 }
11804
11805 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, input_offset) {
11806 TEST_REQUIRES_ARM_NEON;
11807 for (uint32_t channels = 32; channels < 256; channels += 48) {
11808 DWConvMicrokernelTester()
11809 .cr(16)
11810 .kr(25)
11811 .channels(channels)
11812 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080011813 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011814 }
11815 }
11816
11817 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, zero) {
11818 TEST_REQUIRES_ARM_NEON;
11819 for (uint32_t mz = 0; mz < 25; mz++) {
11820 for (uint32_t channels = 32; channels < 256; channels += 48) {
11821 DWConvMicrokernelTester()
11822 .cr(16)
11823 .kr(25)
11824 .channels(channels)
11825 .input_offset(304)
11826 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011827 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011828 }
11829 }
11830 }
11831#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11832
11833
11834#if XNN_ARCH_ARM || XNN_ARCH_ARM64
11835 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_eq_24) {
11836 TEST_REQUIRES_ARM_NEON;
11837 DWConvMicrokernelTester()
11838 .cr(24)
11839 .kr(25)
11840 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080011841 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011842 }
11843
11844 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24) {
11845 TEST_REQUIRES_ARM_NEON;
11846 for (uint32_t channels = 48; channels < 384; channels += 72) {
11847 DWConvMicrokernelTester()
11848 .cr(24)
11849 .kr(25)
11850 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011851 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011852 }
11853 }
11854
11855 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
11856 TEST_REQUIRES_ARM_NEON;
11857 for (uint32_t channels = 48; channels < 384; channels += 72) {
11858 DWConvMicrokernelTester()
11859 .cr(24)
11860 .kr(25)
11861 .channels(channels)
11862 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011863 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011864 }
11865 }
11866
11867 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
11868 TEST_REQUIRES_ARM_NEON;
11869 for (uint32_t channels = 48; channels < 384; channels += 72) {
11870 DWConvMicrokernelTester()
11871 .cr(24)
11872 .kr(25)
11873 .channels(channels)
11874 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011875 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011876 }
11877 }
11878
11879 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_lt_24) {
11880 TEST_REQUIRES_ARM_NEON;
11881 for (uint32_t channels = 1; channels < 24; channels++) {
11882 DWConvMicrokernelTester()
11883 .cr(24)
11884 .kr(25)
11885 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011886 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011887 }
11888 }
11889
11890 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24) {
11891 TEST_REQUIRES_ARM_NEON;
11892 for (uint32_t channels = 25; channels < 48; channels++) {
11893 DWConvMicrokernelTester()
11894 .cr(24)
11895 .kr(25)
11896 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011897 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011898 }
11899 }
11900
11901 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
11902 TEST_REQUIRES_ARM_NEON;
11903 for (uint32_t channels = 25; channels < 48; channels++) {
11904 DWConvMicrokernelTester()
11905 .cr(24)
11906 .kr(25)
11907 .channels(channels)
11908 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011909 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011910 }
11911 }
11912
11913 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
11914 TEST_REQUIRES_ARM_NEON;
11915 for (uint32_t channels = 25; channels < 48; channels++) {
11916 DWConvMicrokernelTester()
11917 .cr(24)
11918 .kr(25)
11919 .channels(channels)
11920 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011921 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011922 }
11923 }
11924
11925 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel) {
11926 TEST_REQUIRES_ARM_NEON;
11927 for (size_t channels = 1; channels <= 120; channels += 23) {
11928 DWConvMicrokernelTester()
11929 .cr(24)
11930 .kr(25)
11931 .channels(channels)
11932 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011933 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011934 }
11935 }
11936
11937 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_step) {
11938 TEST_REQUIRES_ARM_NEON;
11939 for (size_t channels = 1; channels <= 120; channels += 23) {
11940 for (size_t step = 2; step <= 25; step++) {
11941 DWConvMicrokernelTester()
11942 .cr(24)
11943 .kr(25)
11944 .channels(channels)
11945 .width(3)
11946 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011947 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011948 }
11949 }
11950 }
11951
11952 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
11953 TEST_REQUIRES_ARM_NEON;
11954 for (size_t channels = 1; channels <= 120; channels += 23) {
11955 DWConvMicrokernelTester()
11956 .cr(24)
11957 .kr(25)
11958 .channels(24)
11959 .width(5)
11960 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080011961 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011962 }
11963 }
11964
11965 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmin) {
11966 TEST_REQUIRES_ARM_NEON;
11967 for (size_t channels = 1; channels <= 120; channels += 23) {
11968 DWConvMicrokernelTester()
11969 .cr(24)
11970 .kr(25)
11971 .channels(channels)
11972 .width(3)
11973 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011974 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011975 }
11976 }
11977
11978 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmax) {
11979 TEST_REQUIRES_ARM_NEON;
11980 for (size_t channels = 1; channels <= 120; channels += 23) {
11981 DWConvMicrokernelTester()
11982 .cr(24)
11983 .kr(25)
11984 .channels(channels)
11985 .width(3)
11986 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011987 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070011988 }
11989 }
11990
11991 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, input_offset) {
11992 TEST_REQUIRES_ARM_NEON;
11993 for (uint32_t channels = 48; channels < 384; channels += 72) {
11994 DWConvMicrokernelTester()
11995 .cr(24)
11996 .kr(25)
11997 .channels(channels)
11998 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080011999 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012000 }
12001 }
12002
12003 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, zero) {
12004 TEST_REQUIRES_ARM_NEON;
12005 for (uint32_t mz = 0; mz < 25; mz++) {
12006 for (uint32_t channels = 48; channels < 384; channels += 72) {
12007 DWConvMicrokernelTester()
12008 .cr(24)
12009 .kr(25)
12010 .channels(channels)
12011 .input_offset(464)
12012 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012013 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012014 }
12015 }
12016 }
12017#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12018
12019
12020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12021 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_eq_32) {
12022 TEST_REQUIRES_ARM_NEON;
12023 DWConvMicrokernelTester()
12024 .cr(32)
12025 .kr(25)
12026 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080012027 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012028 }
12029
12030 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32) {
12031 TEST_REQUIRES_ARM_NEON;
12032 for (uint32_t channels = 64; channels < 512; channels += 96) {
12033 DWConvMicrokernelTester()
12034 .cr(32)
12035 .kr(25)
12036 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012037 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012038 }
12039 }
12040
12041 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
12042 TEST_REQUIRES_ARM_NEON;
12043 for (uint32_t channels = 64; channels < 512; channels += 96) {
12044 DWConvMicrokernelTester()
12045 .cr(32)
12046 .kr(25)
12047 .channels(channels)
12048 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012049 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012050 }
12051 }
12052
12053 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
12054 TEST_REQUIRES_ARM_NEON;
12055 for (uint32_t channels = 64; channels < 512; channels += 96) {
12056 DWConvMicrokernelTester()
12057 .cr(32)
12058 .kr(25)
12059 .channels(channels)
12060 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012061 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012062 }
12063 }
12064
12065 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_lt_32) {
12066 TEST_REQUIRES_ARM_NEON;
12067 for (uint32_t channels = 1; channels < 32; channels++) {
12068 DWConvMicrokernelTester()
12069 .cr(32)
12070 .kr(25)
12071 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012072 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012073 }
12074 }
12075
12076 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32) {
12077 TEST_REQUIRES_ARM_NEON;
12078 for (uint32_t channels = 33; channels < 64; channels++) {
12079 DWConvMicrokernelTester()
12080 .cr(32)
12081 .kr(25)
12082 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012083 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012084 }
12085 }
12086
12087 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
12088 TEST_REQUIRES_ARM_NEON;
12089 for (uint32_t channels = 33; channels < 64; channels++) {
12090 DWConvMicrokernelTester()
12091 .cr(32)
12092 .kr(25)
12093 .channels(channels)
12094 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012095 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012096 }
12097 }
12098
12099 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
12100 TEST_REQUIRES_ARM_NEON;
12101 for (uint32_t channels = 33; channels < 64; channels++) {
12102 DWConvMicrokernelTester()
12103 .cr(32)
12104 .kr(25)
12105 .channels(channels)
12106 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012107 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012108 }
12109 }
12110
12111 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel) {
12112 TEST_REQUIRES_ARM_NEON;
12113 for (size_t channels = 1; channels <= 160; channels += 31) {
12114 DWConvMicrokernelTester()
12115 .cr(32)
12116 .kr(25)
12117 .channels(channels)
12118 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012119 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012120 }
12121 }
12122
12123 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_step) {
12124 TEST_REQUIRES_ARM_NEON;
12125 for (size_t channels = 1; channels <= 160; channels += 31) {
12126 for (size_t step = 2; step <= 25; step++) {
12127 DWConvMicrokernelTester()
12128 .cr(32)
12129 .kr(25)
12130 .channels(channels)
12131 .width(3)
12132 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012133 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012134 }
12135 }
12136 }
12137
12138 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
12139 TEST_REQUIRES_ARM_NEON;
12140 for (size_t channels = 1; channels <= 160; channels += 31) {
12141 DWConvMicrokernelTester()
12142 .cr(32)
12143 .kr(25)
12144 .channels(32)
12145 .width(5)
12146 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080012147 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012148 }
12149 }
12150
12151 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmin) {
12152 TEST_REQUIRES_ARM_NEON;
12153 for (size_t channels = 1; channels <= 160; channels += 31) {
12154 DWConvMicrokernelTester()
12155 .cr(32)
12156 .kr(25)
12157 .channels(channels)
12158 .width(3)
12159 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012160 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012161 }
12162 }
12163
12164 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmax) {
12165 TEST_REQUIRES_ARM_NEON;
12166 for (size_t channels = 1; channels <= 160; channels += 31) {
12167 DWConvMicrokernelTester()
12168 .cr(32)
12169 .kr(25)
12170 .channels(channels)
12171 .width(3)
12172 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012173 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012174 }
12175 }
12176
12177 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, input_offset) {
12178 TEST_REQUIRES_ARM_NEON;
12179 for (uint32_t channels = 64; channels < 512; channels += 96) {
12180 DWConvMicrokernelTester()
12181 .cr(32)
12182 .kr(25)
12183 .channels(channels)
12184 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080012185 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012186 }
12187 }
12188
12189 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, zero) {
12190 TEST_REQUIRES_ARM_NEON;
12191 for (uint32_t mz = 0; mz < 25; mz++) {
12192 for (uint32_t channels = 64; channels < 512; channels += 96) {
12193 DWConvMicrokernelTester()
12194 .cr(32)
12195 .kr(25)
12196 .channels(channels)
12197 .input_offset(592)
12198 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012199 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012200 }
12201 }
12202 }
12203#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12204
12205
12206#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12207 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_eq_8) {
12208 TEST_REQUIRES_ARM_NEON_V8;
12209 DWConvMicrokernelTester()
12210 .cr(8)
12211 .kr(25)
12212 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012213 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012214 }
12215
12216 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8) {
12217 TEST_REQUIRES_ARM_NEON_V8;
12218 for (uint32_t channels = 16; channels < 128; channels += 24) {
12219 DWConvMicrokernelTester()
12220 .cr(8)
12221 .kr(25)
12222 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012223 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012224 }
12225 }
12226
12227 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmin) {
12228 TEST_REQUIRES_ARM_NEON_V8;
12229 for (uint32_t channels = 16; channels < 128; channels += 24) {
12230 DWConvMicrokernelTester()
12231 .cr(8)
12232 .kr(25)
12233 .channels(channels)
12234 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012235 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012236 }
12237 }
12238
12239 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmax) {
12240 TEST_REQUIRES_ARM_NEON_V8;
12241 for (uint32_t channels = 16; channels < 128; channels += 24) {
12242 DWConvMicrokernelTester()
12243 .cr(8)
12244 .kr(25)
12245 .channels(channels)
12246 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012247 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012248 }
12249 }
12250
12251 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_lt_8) {
12252 TEST_REQUIRES_ARM_NEON_V8;
12253 for (uint32_t channels = 1; channels < 8; channels++) {
12254 DWConvMicrokernelTester()
12255 .cr(8)
12256 .kr(25)
12257 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012258 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012259 }
12260 }
12261
12262 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8) {
12263 TEST_REQUIRES_ARM_NEON_V8;
12264 for (uint32_t channels = 9; channels < 16; channels++) {
12265 DWConvMicrokernelTester()
12266 .cr(8)
12267 .kr(25)
12268 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012269 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012270 }
12271 }
12272
12273 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmin) {
12274 TEST_REQUIRES_ARM_NEON_V8;
12275 for (uint32_t channels = 9; channels < 16; channels++) {
12276 DWConvMicrokernelTester()
12277 .cr(8)
12278 .kr(25)
12279 .channels(channels)
12280 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012281 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012282 }
12283 }
12284
12285 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmax) {
12286 TEST_REQUIRES_ARM_NEON_V8;
12287 for (uint32_t channels = 9; channels < 16; channels++) {
12288 DWConvMicrokernelTester()
12289 .cr(8)
12290 .kr(25)
12291 .channels(channels)
12292 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012293 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012294 }
12295 }
12296
12297 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel) {
12298 TEST_REQUIRES_ARM_NEON_V8;
12299 for (size_t channels = 1; channels <= 40; channels += 7) {
12300 DWConvMicrokernelTester()
12301 .cr(8)
12302 .kr(25)
12303 .channels(channels)
12304 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012305 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012306 }
12307 }
12308
12309 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_step) {
12310 TEST_REQUIRES_ARM_NEON_V8;
12311 for (size_t channels = 1; channels <= 40; channels += 7) {
12312 for (size_t step = 2; step <= 25; step++) {
12313 DWConvMicrokernelTester()
12314 .cr(8)
12315 .kr(25)
12316 .channels(channels)
12317 .width(3)
12318 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012319 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012320 }
12321 }
12322 }
12323
12324 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_output_stride) {
12325 TEST_REQUIRES_ARM_NEON_V8;
12326 for (size_t channels = 1; channels <= 40; channels += 7) {
12327 DWConvMicrokernelTester()
12328 .cr(8)
12329 .kr(25)
12330 .channels(8)
12331 .width(5)
12332 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012333 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012334 }
12335 }
12336
12337 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmin) {
12338 TEST_REQUIRES_ARM_NEON_V8;
12339 for (size_t channels = 1; channels <= 40; channels += 7) {
12340 DWConvMicrokernelTester()
12341 .cr(8)
12342 .kr(25)
12343 .channels(channels)
12344 .width(3)
12345 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012346 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012347 }
12348 }
12349
12350 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmax) {
12351 TEST_REQUIRES_ARM_NEON_V8;
12352 for (size_t channels = 1; channels <= 40; channels += 7) {
12353 DWConvMicrokernelTester()
12354 .cr(8)
12355 .kr(25)
12356 .channels(channels)
12357 .width(3)
12358 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012359 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012360 }
12361 }
12362
12363 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, input_offset) {
12364 TEST_REQUIRES_ARM_NEON_V8;
12365 for (uint32_t channels = 16; channels < 128; channels += 24) {
12366 DWConvMicrokernelTester()
12367 .cr(8)
12368 .kr(25)
12369 .channels(channels)
12370 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080012371 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012372 }
12373 }
12374
12375 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, zero) {
12376 TEST_REQUIRES_ARM_NEON_V8;
12377 for (uint32_t mz = 0; mz < 25; mz++) {
12378 for (uint32_t channels = 16; channels < 128; channels += 24) {
12379 DWConvMicrokernelTester()
12380 .cr(8)
12381 .kr(25)
12382 .channels(channels)
12383 .input_offset(176)
12384 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012385 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012386 }
12387 }
12388 }
12389#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12390
12391
12392#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12393 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_eq_16) {
12394 TEST_REQUIRES_ARM_NEON_V8;
12395 DWConvMicrokernelTester()
12396 .cr(16)
12397 .kr(25)
12398 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080012399 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012400 }
12401
12402 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16) {
12403 TEST_REQUIRES_ARM_NEON_V8;
12404 for (uint32_t channels = 32; channels < 256; channels += 48) {
12405 DWConvMicrokernelTester()
12406 .cr(16)
12407 .kr(25)
12408 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012409 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012410 }
12411 }
12412
12413 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmin) {
12414 TEST_REQUIRES_ARM_NEON_V8;
12415 for (uint32_t channels = 32; channels < 256; channels += 48) {
12416 DWConvMicrokernelTester()
12417 .cr(16)
12418 .kr(25)
12419 .channels(channels)
12420 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012421 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012422 }
12423 }
12424
12425 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmax) {
12426 TEST_REQUIRES_ARM_NEON_V8;
12427 for (uint32_t channels = 32; channels < 256; channels += 48) {
12428 DWConvMicrokernelTester()
12429 .cr(16)
12430 .kr(25)
12431 .channels(channels)
12432 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012433 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012434 }
12435 }
12436
12437 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_lt_16) {
12438 TEST_REQUIRES_ARM_NEON_V8;
12439 for (uint32_t channels = 1; channels < 16; channels++) {
12440 DWConvMicrokernelTester()
12441 .cr(16)
12442 .kr(25)
12443 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012444 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012445 }
12446 }
12447
12448 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16) {
12449 TEST_REQUIRES_ARM_NEON_V8;
12450 for (uint32_t channels = 17; channels < 32; channels++) {
12451 DWConvMicrokernelTester()
12452 .cr(16)
12453 .kr(25)
12454 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012455 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012456 }
12457 }
12458
12459 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmin) {
12460 TEST_REQUIRES_ARM_NEON_V8;
12461 for (uint32_t channels = 17; channels < 32; channels++) {
12462 DWConvMicrokernelTester()
12463 .cr(16)
12464 .kr(25)
12465 .channels(channels)
12466 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012467 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012468 }
12469 }
12470
12471 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmax) {
12472 TEST_REQUIRES_ARM_NEON_V8;
12473 for (uint32_t channels = 17; channels < 32; channels++) {
12474 DWConvMicrokernelTester()
12475 .cr(16)
12476 .kr(25)
12477 .channels(channels)
12478 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012479 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012480 }
12481 }
12482
12483 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel) {
12484 TEST_REQUIRES_ARM_NEON_V8;
12485 for (size_t channels = 1; channels <= 80; channels += 15) {
12486 DWConvMicrokernelTester()
12487 .cr(16)
12488 .kr(25)
12489 .channels(channels)
12490 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012491 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012492 }
12493 }
12494
12495 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_step) {
12496 TEST_REQUIRES_ARM_NEON_V8;
12497 for (size_t channels = 1; channels <= 80; channels += 15) {
12498 for (size_t step = 2; step <= 25; step++) {
12499 DWConvMicrokernelTester()
12500 .cr(16)
12501 .kr(25)
12502 .channels(channels)
12503 .width(3)
12504 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012505 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012506 }
12507 }
12508 }
12509
12510 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_output_stride) {
12511 TEST_REQUIRES_ARM_NEON_V8;
12512 for (size_t channels = 1; channels <= 80; channels += 15) {
12513 DWConvMicrokernelTester()
12514 .cr(16)
12515 .kr(25)
12516 .channels(16)
12517 .width(5)
12518 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012519 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012520 }
12521 }
12522
12523 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmin) {
12524 TEST_REQUIRES_ARM_NEON_V8;
12525 for (size_t channels = 1; channels <= 80; channels += 15) {
12526 DWConvMicrokernelTester()
12527 .cr(16)
12528 .kr(25)
12529 .channels(channels)
12530 .width(3)
12531 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012532 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012533 }
12534 }
12535
12536 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmax) {
12537 TEST_REQUIRES_ARM_NEON_V8;
12538 for (size_t channels = 1; channels <= 80; channels += 15) {
12539 DWConvMicrokernelTester()
12540 .cr(16)
12541 .kr(25)
12542 .channels(channels)
12543 .width(3)
12544 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012545 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012546 }
12547 }
12548
12549 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, input_offset) {
12550 TEST_REQUIRES_ARM_NEON_V8;
12551 for (uint32_t channels = 32; channels < 256; channels += 48) {
12552 DWConvMicrokernelTester()
12553 .cr(16)
12554 .kr(25)
12555 .channels(channels)
12556 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080012557 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012558 }
12559 }
12560
12561 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, zero) {
12562 TEST_REQUIRES_ARM_NEON_V8;
12563 for (uint32_t mz = 0; mz < 25; mz++) {
12564 for (uint32_t channels = 32; channels < 256; channels += 48) {
12565 DWConvMicrokernelTester()
12566 .cr(16)
12567 .kr(25)
12568 .channels(channels)
12569 .input_offset(304)
12570 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012571 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012572 }
12573 }
12574 }
12575#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12576
12577
12578#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12579 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_eq_24) {
12580 TEST_REQUIRES_ARM_NEON_V8;
12581 DWConvMicrokernelTester()
12582 .cr(24)
12583 .kr(25)
12584 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080012585 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012586 }
12587
12588 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24) {
12589 TEST_REQUIRES_ARM_NEON_V8;
12590 for (uint32_t channels = 48; channels < 384; channels += 72) {
12591 DWConvMicrokernelTester()
12592 .cr(24)
12593 .kr(25)
12594 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012595 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012596 }
12597 }
12598
12599 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmin) {
12600 TEST_REQUIRES_ARM_NEON_V8;
12601 for (uint32_t channels = 48; channels < 384; channels += 72) {
12602 DWConvMicrokernelTester()
12603 .cr(24)
12604 .kr(25)
12605 .channels(channels)
12606 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012607 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012608 }
12609 }
12610
12611 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmax) {
12612 TEST_REQUIRES_ARM_NEON_V8;
12613 for (uint32_t channels = 48; channels < 384; channels += 72) {
12614 DWConvMicrokernelTester()
12615 .cr(24)
12616 .kr(25)
12617 .channels(channels)
12618 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012619 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012620 }
12621 }
12622
12623 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_lt_24) {
12624 TEST_REQUIRES_ARM_NEON_V8;
12625 for (uint32_t channels = 1; channels < 24; channels++) {
12626 DWConvMicrokernelTester()
12627 .cr(24)
12628 .kr(25)
12629 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012630 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012631 }
12632 }
12633
12634 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24) {
12635 TEST_REQUIRES_ARM_NEON_V8;
12636 for (uint32_t channels = 25; channels < 48; channels++) {
12637 DWConvMicrokernelTester()
12638 .cr(24)
12639 .kr(25)
12640 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012641 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012642 }
12643 }
12644
12645 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmin) {
12646 TEST_REQUIRES_ARM_NEON_V8;
12647 for (uint32_t channels = 25; channels < 48; channels++) {
12648 DWConvMicrokernelTester()
12649 .cr(24)
12650 .kr(25)
12651 .channels(channels)
12652 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012653 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012654 }
12655 }
12656
12657 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmax) {
12658 TEST_REQUIRES_ARM_NEON_V8;
12659 for (uint32_t channels = 25; channels < 48; channels++) {
12660 DWConvMicrokernelTester()
12661 .cr(24)
12662 .kr(25)
12663 .channels(channels)
12664 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012665 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012666 }
12667 }
12668
12669 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel) {
12670 TEST_REQUIRES_ARM_NEON_V8;
12671 for (size_t channels = 1; channels <= 120; channels += 23) {
12672 DWConvMicrokernelTester()
12673 .cr(24)
12674 .kr(25)
12675 .channels(channels)
12676 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012677 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012678 }
12679 }
12680
12681 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_step) {
12682 TEST_REQUIRES_ARM_NEON_V8;
12683 for (size_t channels = 1; channels <= 120; channels += 23) {
12684 for (size_t step = 2; step <= 25; step++) {
12685 DWConvMicrokernelTester()
12686 .cr(24)
12687 .kr(25)
12688 .channels(channels)
12689 .width(3)
12690 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012691 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012692 }
12693 }
12694 }
12695
12696 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_output_stride) {
12697 TEST_REQUIRES_ARM_NEON_V8;
12698 for (size_t channels = 1; channels <= 120; channels += 23) {
12699 DWConvMicrokernelTester()
12700 .cr(24)
12701 .kr(25)
12702 .channels(24)
12703 .width(5)
12704 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080012705 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012706 }
12707 }
12708
12709 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmin) {
12710 TEST_REQUIRES_ARM_NEON_V8;
12711 for (size_t channels = 1; channels <= 120; channels += 23) {
12712 DWConvMicrokernelTester()
12713 .cr(24)
12714 .kr(25)
12715 .channels(channels)
12716 .width(3)
12717 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012718 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012719 }
12720 }
12721
12722 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmax) {
12723 TEST_REQUIRES_ARM_NEON_V8;
12724 for (size_t channels = 1; channels <= 120; channels += 23) {
12725 DWConvMicrokernelTester()
12726 .cr(24)
12727 .kr(25)
12728 .channels(channels)
12729 .width(3)
12730 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012731 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012732 }
12733 }
12734
12735 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, input_offset) {
12736 TEST_REQUIRES_ARM_NEON_V8;
12737 for (uint32_t channels = 48; channels < 384; channels += 72) {
12738 DWConvMicrokernelTester()
12739 .cr(24)
12740 .kr(25)
12741 .channels(channels)
12742 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080012743 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012744 }
12745 }
12746
12747 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, zero) {
12748 TEST_REQUIRES_ARM_NEON_V8;
12749 for (uint32_t mz = 0; mz < 25; mz++) {
12750 for (uint32_t channels = 48; channels < 384; channels += 72) {
12751 DWConvMicrokernelTester()
12752 .cr(24)
12753 .kr(25)
12754 .channels(channels)
12755 .input_offset(464)
12756 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012757 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012758 }
12759 }
12760 }
12761#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12762
12763
12764#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12765 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_eq_32) {
12766 TEST_REQUIRES_ARM_NEON_V8;
12767 DWConvMicrokernelTester()
12768 .cr(32)
12769 .kr(25)
12770 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080012771 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012772 }
12773
12774 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32) {
12775 TEST_REQUIRES_ARM_NEON_V8;
12776 for (uint32_t channels = 64; channels < 512; channels += 96) {
12777 DWConvMicrokernelTester()
12778 .cr(32)
12779 .kr(25)
12780 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012781 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012782 }
12783 }
12784
12785 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmin) {
12786 TEST_REQUIRES_ARM_NEON_V8;
12787 for (uint32_t channels = 64; channels < 512; channels += 96) {
12788 DWConvMicrokernelTester()
12789 .cr(32)
12790 .kr(25)
12791 .channels(channels)
12792 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012793 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012794 }
12795 }
12796
12797 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmax) {
12798 TEST_REQUIRES_ARM_NEON_V8;
12799 for (uint32_t channels = 64; channels < 512; channels += 96) {
12800 DWConvMicrokernelTester()
12801 .cr(32)
12802 .kr(25)
12803 .channels(channels)
12804 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012805 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012806 }
12807 }
12808
12809 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_lt_32) {
12810 TEST_REQUIRES_ARM_NEON_V8;
12811 for (uint32_t channels = 1; channels < 32; channels++) {
12812 DWConvMicrokernelTester()
12813 .cr(32)
12814 .kr(25)
12815 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012816 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012817 }
12818 }
12819
12820 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32) {
12821 TEST_REQUIRES_ARM_NEON_V8;
12822 for (uint32_t channels = 33; channels < 64; channels++) {
12823 DWConvMicrokernelTester()
12824 .cr(32)
12825 .kr(25)
12826 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012827 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012828 }
12829 }
12830
12831 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmin) {
12832 TEST_REQUIRES_ARM_NEON_V8;
12833 for (uint32_t channels = 33; channels < 64; channels++) {
12834 DWConvMicrokernelTester()
12835 .cr(32)
12836 .kr(25)
12837 .channels(channels)
12838 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012839 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012840 }
12841 }
12842
12843 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmax) {
12844 TEST_REQUIRES_ARM_NEON_V8;
12845 for (uint32_t channels = 33; channels < 64; channels++) {
12846 DWConvMicrokernelTester()
12847 .cr(32)
12848 .kr(25)
12849 .channels(channels)
12850 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012851 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012852 }
12853 }
12854
12855 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel) {
12856 TEST_REQUIRES_ARM_NEON_V8;
12857 for (size_t channels = 1; channels <= 160; channels += 31) {
12858 DWConvMicrokernelTester()
12859 .cr(32)
12860 .kr(25)
12861 .channels(channels)
12862 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012863 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012864 }
12865 }
12866
12867 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_step) {
12868 TEST_REQUIRES_ARM_NEON_V8;
12869 for (size_t channels = 1; channels <= 160; channels += 31) {
12870 for (size_t step = 2; step <= 25; step++) {
12871 DWConvMicrokernelTester()
12872 .cr(32)
12873 .kr(25)
12874 .channels(channels)
12875 .width(3)
12876 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012877 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012878 }
12879 }
12880 }
12881
12882 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_output_stride) {
12883 TEST_REQUIRES_ARM_NEON_V8;
12884 for (size_t channels = 1; channels <= 160; channels += 31) {
12885 DWConvMicrokernelTester()
12886 .cr(32)
12887 .kr(25)
12888 .channels(32)
12889 .width(5)
12890 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080012891 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012892 }
12893 }
12894
12895 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmin) {
12896 TEST_REQUIRES_ARM_NEON_V8;
12897 for (size_t channels = 1; channels <= 160; channels += 31) {
12898 DWConvMicrokernelTester()
12899 .cr(32)
12900 .kr(25)
12901 .channels(channels)
12902 .width(3)
12903 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012904 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012905 }
12906 }
12907
12908 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmax) {
12909 TEST_REQUIRES_ARM_NEON_V8;
12910 for (size_t channels = 1; channels <= 160; channels += 31) {
12911 DWConvMicrokernelTester()
12912 .cr(32)
12913 .kr(25)
12914 .channels(channels)
12915 .width(3)
12916 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012917 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012918 }
12919 }
12920
12921 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, input_offset) {
12922 TEST_REQUIRES_ARM_NEON_V8;
12923 for (uint32_t channels = 64; channels < 512; channels += 96) {
12924 DWConvMicrokernelTester()
12925 .cr(32)
12926 .kr(25)
12927 .channels(channels)
12928 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080012929 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012930 }
12931 }
12932
12933 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, zero) {
12934 TEST_REQUIRES_ARM_NEON_V8;
12935 for (uint32_t mz = 0; mz < 25; mz++) {
12936 for (uint32_t channels = 64; channels < 512; channels += 96) {
12937 DWConvMicrokernelTester()
12938 .cr(32)
12939 .kr(25)
12940 .channels(channels)
12941 .input_offset(592)
12942 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012943 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan6f905292021-06-25 11:12:05 -070012944 }
12945 }
12946 }
12947#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12948
12949
Marat Dukhan71855ee2021-05-25 19:05:06 -070012950#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancaf48312021-06-01 20:20:58 -070012951 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_eq_8) {
12952 TEST_REQUIRES_X86_SSE2;
12953 DWConvMicrokernelTester()
12954 .cr(8)
12955 .kr(25)
12956 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012957 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070012958 }
12959
12960 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8) {
12961 TEST_REQUIRES_X86_SSE2;
12962 for (uint32_t channels = 16; channels < 128; channels += 24) {
12963 DWConvMicrokernelTester()
12964 .cr(8)
12965 .kr(25)
12966 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012967 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070012968 }
12969 }
12970
12971 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmin) {
12972 TEST_REQUIRES_X86_SSE2;
12973 for (uint32_t channels = 16; channels < 128; channels += 24) {
12974 DWConvMicrokernelTester()
12975 .cr(8)
12976 .kr(25)
12977 .channels(channels)
12978 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012979 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070012980 }
12981 }
12982
12983 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmax) {
12984 TEST_REQUIRES_X86_SSE2;
12985 for (uint32_t channels = 16; channels < 128; channels += 24) {
12986 DWConvMicrokernelTester()
12987 .cr(8)
12988 .kr(25)
12989 .channels(channels)
12990 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012991 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070012992 }
12993 }
12994
12995 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_lt_8) {
12996 TEST_REQUIRES_X86_SSE2;
12997 for (uint32_t channels = 1; channels < 8; channels++) {
12998 DWConvMicrokernelTester()
12999 .cr(8)
13000 .kr(25)
13001 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013002 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013003 }
13004 }
13005
13006 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8) {
13007 TEST_REQUIRES_X86_SSE2;
13008 for (uint32_t channels = 9; channels < 16; channels++) {
13009 DWConvMicrokernelTester()
13010 .cr(8)
13011 .kr(25)
13012 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013013 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013014 }
13015 }
13016
13017 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmin) {
13018 TEST_REQUIRES_X86_SSE2;
13019 for (uint32_t channels = 9; channels < 16; channels++) {
13020 DWConvMicrokernelTester()
13021 .cr(8)
13022 .kr(25)
13023 .channels(channels)
13024 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013025 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013026 }
13027 }
13028
13029 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmax) {
13030 TEST_REQUIRES_X86_SSE2;
13031 for (uint32_t channels = 9; channels < 16; channels++) {
13032 DWConvMicrokernelTester()
13033 .cr(8)
13034 .kr(25)
13035 .channels(channels)
13036 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013037 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013038 }
13039 }
13040
13041 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel) {
13042 TEST_REQUIRES_X86_SSE2;
13043 for (size_t channels = 1; channels <= 40; channels += 7) {
13044 DWConvMicrokernelTester()
13045 .cr(8)
13046 .kr(25)
13047 .channels(channels)
13048 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013049 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013050 }
13051 }
13052
13053 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_step) {
13054 TEST_REQUIRES_X86_SSE2;
13055 for (size_t channels = 1; channels <= 40; channels += 7) {
13056 for (size_t step = 2; step <= 25; step++) {
13057 DWConvMicrokernelTester()
13058 .cr(8)
13059 .kr(25)
13060 .channels(channels)
13061 .width(3)
13062 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013063 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013064 }
13065 }
13066 }
13067
13068 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_output_stride) {
13069 TEST_REQUIRES_X86_SSE2;
13070 for (size_t channels = 1; channels <= 40; channels += 7) {
13071 DWConvMicrokernelTester()
13072 .cr(8)
13073 .kr(25)
13074 .channels(8)
13075 .width(5)
13076 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013077 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013078 }
13079 }
13080
13081 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmin) {
13082 TEST_REQUIRES_X86_SSE2;
13083 for (size_t channels = 1; channels <= 40; channels += 7) {
13084 DWConvMicrokernelTester()
13085 .cr(8)
13086 .kr(25)
13087 .channels(channels)
13088 .width(3)
13089 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013090 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013091 }
13092 }
13093
13094 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmax) {
13095 TEST_REQUIRES_X86_SSE2;
13096 for (size_t channels = 1; channels <= 40; channels += 7) {
13097 DWConvMicrokernelTester()
13098 .cr(8)
13099 .kr(25)
13100 .channels(channels)
13101 .width(3)
13102 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013103 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013104 }
13105 }
13106
13107 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, input_offset) {
13108 TEST_REQUIRES_X86_SSE2;
13109 for (uint32_t channels = 16; channels < 128; channels += 24) {
13110 DWConvMicrokernelTester()
13111 .cr(8)
13112 .kr(25)
13113 .channels(channels)
13114 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080013115 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013116 }
13117 }
13118
13119 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, zero) {
13120 TEST_REQUIRES_X86_SSE2;
13121 for (uint32_t mz = 0; mz < 25; mz++) {
13122 for (uint32_t channels = 16; channels < 128; channels += 24) {
13123 DWConvMicrokernelTester()
13124 .cr(8)
13125 .kr(25)
13126 .channels(channels)
13127 .input_offset(176)
13128 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013129 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013130 }
13131 }
13132 }
13133#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13134
13135
13136#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13137 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_eq_16) {
13138 TEST_REQUIRES_X86_SSE2;
13139 DWConvMicrokernelTester()
13140 .cr(16)
13141 .kr(25)
13142 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080013143 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013144 }
13145
13146 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16) {
13147 TEST_REQUIRES_X86_SSE2;
13148 for (uint32_t channels = 32; channels < 256; channels += 48) {
13149 DWConvMicrokernelTester()
13150 .cr(16)
13151 .kr(25)
13152 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013153 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013154 }
13155 }
13156
13157 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmin) {
13158 TEST_REQUIRES_X86_SSE2;
13159 for (uint32_t channels = 32; channels < 256; channels += 48) {
13160 DWConvMicrokernelTester()
13161 .cr(16)
13162 .kr(25)
13163 .channels(channels)
13164 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013165 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013166 }
13167 }
13168
13169 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmax) {
13170 TEST_REQUIRES_X86_SSE2;
13171 for (uint32_t channels = 32; channels < 256; channels += 48) {
13172 DWConvMicrokernelTester()
13173 .cr(16)
13174 .kr(25)
13175 .channels(channels)
13176 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013177 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013178 }
13179 }
13180
13181 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_lt_16) {
13182 TEST_REQUIRES_X86_SSE2;
13183 for (uint32_t channels = 1; channels < 16; channels++) {
13184 DWConvMicrokernelTester()
13185 .cr(16)
13186 .kr(25)
13187 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013188 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013189 }
13190 }
13191
13192 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16) {
13193 TEST_REQUIRES_X86_SSE2;
13194 for (uint32_t channels = 17; channels < 32; channels++) {
13195 DWConvMicrokernelTester()
13196 .cr(16)
13197 .kr(25)
13198 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013199 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013200 }
13201 }
13202
13203 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmin) {
13204 TEST_REQUIRES_X86_SSE2;
13205 for (uint32_t channels = 17; channels < 32; channels++) {
13206 DWConvMicrokernelTester()
13207 .cr(16)
13208 .kr(25)
13209 .channels(channels)
13210 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013211 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013212 }
13213 }
13214
13215 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmax) {
13216 TEST_REQUIRES_X86_SSE2;
13217 for (uint32_t channels = 17; channels < 32; channels++) {
13218 DWConvMicrokernelTester()
13219 .cr(16)
13220 .kr(25)
13221 .channels(channels)
13222 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013223 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013224 }
13225 }
13226
13227 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel) {
13228 TEST_REQUIRES_X86_SSE2;
13229 for (size_t channels = 1; channels <= 80; channels += 15) {
13230 DWConvMicrokernelTester()
13231 .cr(16)
13232 .kr(25)
13233 .channels(channels)
13234 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013235 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013236 }
13237 }
13238
13239 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_step) {
13240 TEST_REQUIRES_X86_SSE2;
13241 for (size_t channels = 1; channels <= 80; channels += 15) {
13242 for (size_t step = 2; step <= 25; step++) {
13243 DWConvMicrokernelTester()
13244 .cr(16)
13245 .kr(25)
13246 .channels(channels)
13247 .width(3)
13248 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013249 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013250 }
13251 }
13252 }
13253
13254 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_output_stride) {
13255 TEST_REQUIRES_X86_SSE2;
13256 for (size_t channels = 1; channels <= 80; channels += 15) {
13257 DWConvMicrokernelTester()
13258 .cr(16)
13259 .kr(25)
13260 .channels(16)
13261 .width(5)
13262 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013263 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013264 }
13265 }
13266
13267 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmin) {
13268 TEST_REQUIRES_X86_SSE2;
13269 for (size_t channels = 1; channels <= 80; channels += 15) {
13270 DWConvMicrokernelTester()
13271 .cr(16)
13272 .kr(25)
13273 .channels(channels)
13274 .width(3)
13275 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013276 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013277 }
13278 }
13279
13280 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmax) {
13281 TEST_REQUIRES_X86_SSE2;
13282 for (size_t channels = 1; channels <= 80; channels += 15) {
13283 DWConvMicrokernelTester()
13284 .cr(16)
13285 .kr(25)
13286 .channels(channels)
13287 .width(3)
13288 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013289 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013290 }
13291 }
13292
13293 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, input_offset) {
13294 TEST_REQUIRES_X86_SSE2;
13295 for (uint32_t channels = 32; channels < 256; channels += 48) {
13296 DWConvMicrokernelTester()
13297 .cr(16)
13298 .kr(25)
13299 .channels(channels)
13300 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080013301 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013302 }
13303 }
13304
13305 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, zero) {
13306 TEST_REQUIRES_X86_SSE2;
13307 for (uint32_t mz = 0; mz < 25; mz++) {
13308 for (uint32_t channels = 32; channels < 256; channels += 48) {
13309 DWConvMicrokernelTester()
13310 .cr(16)
13311 .kr(25)
13312 .channels(channels)
13313 .input_offset(304)
13314 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013315 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013316 }
13317 }
13318 }
13319#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13320
13321
13322#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13323 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_eq_24) {
13324 TEST_REQUIRES_X86_SSE2;
13325 DWConvMicrokernelTester()
13326 .cr(24)
13327 .kr(25)
13328 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080013329 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013330 }
13331
13332 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24) {
13333 TEST_REQUIRES_X86_SSE2;
13334 for (uint32_t channels = 48; channels < 384; channels += 72) {
13335 DWConvMicrokernelTester()
13336 .cr(24)
13337 .kr(25)
13338 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013339 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013340 }
13341 }
13342
13343 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmin) {
13344 TEST_REQUIRES_X86_SSE2;
13345 for (uint32_t channels = 48; channels < 384; channels += 72) {
13346 DWConvMicrokernelTester()
13347 .cr(24)
13348 .kr(25)
13349 .channels(channels)
13350 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013351 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013352 }
13353 }
13354
13355 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmax) {
13356 TEST_REQUIRES_X86_SSE2;
13357 for (uint32_t channels = 48; channels < 384; channels += 72) {
13358 DWConvMicrokernelTester()
13359 .cr(24)
13360 .kr(25)
13361 .channels(channels)
13362 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013363 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013364 }
13365 }
13366
13367 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_lt_24) {
13368 TEST_REQUIRES_X86_SSE2;
13369 for (uint32_t channels = 1; channels < 24; channels++) {
13370 DWConvMicrokernelTester()
13371 .cr(24)
13372 .kr(25)
13373 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013374 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013375 }
13376 }
13377
13378 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24) {
13379 TEST_REQUIRES_X86_SSE2;
13380 for (uint32_t channels = 25; channels < 48; channels++) {
13381 DWConvMicrokernelTester()
13382 .cr(24)
13383 .kr(25)
13384 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013385 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013386 }
13387 }
13388
13389 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmin) {
13390 TEST_REQUIRES_X86_SSE2;
13391 for (uint32_t channels = 25; channels < 48; channels++) {
13392 DWConvMicrokernelTester()
13393 .cr(24)
13394 .kr(25)
13395 .channels(channels)
13396 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013397 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013398 }
13399 }
13400
13401 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmax) {
13402 TEST_REQUIRES_X86_SSE2;
13403 for (uint32_t channels = 25; channels < 48; channels++) {
13404 DWConvMicrokernelTester()
13405 .cr(24)
13406 .kr(25)
13407 .channels(channels)
13408 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013409 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013410 }
13411 }
13412
13413 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel) {
13414 TEST_REQUIRES_X86_SSE2;
13415 for (size_t channels = 1; channels <= 120; channels += 23) {
13416 DWConvMicrokernelTester()
13417 .cr(24)
13418 .kr(25)
13419 .channels(channels)
13420 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013421 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013422 }
13423 }
13424
13425 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_step) {
13426 TEST_REQUIRES_X86_SSE2;
13427 for (size_t channels = 1; channels <= 120; channels += 23) {
13428 for (size_t step = 2; step <= 25; step++) {
13429 DWConvMicrokernelTester()
13430 .cr(24)
13431 .kr(25)
13432 .channels(channels)
13433 .width(3)
13434 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013435 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013436 }
13437 }
13438 }
13439
13440 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_output_stride) {
13441 TEST_REQUIRES_X86_SSE2;
13442 for (size_t channels = 1; channels <= 120; channels += 23) {
13443 DWConvMicrokernelTester()
13444 .cr(24)
13445 .kr(25)
13446 .channels(24)
13447 .width(5)
13448 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080013449 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013450 }
13451 }
13452
13453 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmin) {
13454 TEST_REQUIRES_X86_SSE2;
13455 for (size_t channels = 1; channels <= 120; channels += 23) {
13456 DWConvMicrokernelTester()
13457 .cr(24)
13458 .kr(25)
13459 .channels(channels)
13460 .width(3)
13461 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013462 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013463 }
13464 }
13465
13466 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmax) {
13467 TEST_REQUIRES_X86_SSE2;
13468 for (size_t channels = 1; channels <= 120; channels += 23) {
13469 DWConvMicrokernelTester()
13470 .cr(24)
13471 .kr(25)
13472 .channels(channels)
13473 .width(3)
13474 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013475 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013476 }
13477 }
13478
13479 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, input_offset) {
13480 TEST_REQUIRES_X86_SSE2;
13481 for (uint32_t channels = 48; channels < 384; channels += 72) {
13482 DWConvMicrokernelTester()
13483 .cr(24)
13484 .kr(25)
13485 .channels(channels)
13486 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080013487 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013488 }
13489 }
13490
13491 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, zero) {
13492 TEST_REQUIRES_X86_SSE2;
13493 for (uint32_t mz = 0; mz < 25; mz++) {
13494 for (uint32_t channels = 48; channels < 384; channels += 72) {
13495 DWConvMicrokernelTester()
13496 .cr(24)
13497 .kr(25)
13498 .channels(channels)
13499 .input_offset(464)
13500 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013501 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013502 }
13503 }
13504 }
13505#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13506
13507
13508#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -070013509 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_eq_8) {
13510 TEST_REQUIRES_X86_SSE2;
13511 DWConvMicrokernelTester()
13512 .cr(8)
13513 .kr(25)
13514 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013515 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013516 }
13517
13518 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8) {
13519 TEST_REQUIRES_X86_SSE2;
13520 for (uint32_t channels = 16; channels < 128; channels += 24) {
13521 DWConvMicrokernelTester()
13522 .cr(8)
13523 .kr(25)
13524 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013525 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013526 }
13527 }
13528
13529 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
13530 TEST_REQUIRES_X86_SSE2;
13531 for (uint32_t channels = 16; channels < 128; channels += 24) {
13532 DWConvMicrokernelTester()
13533 .cr(8)
13534 .kr(25)
13535 .channels(channels)
13536 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013537 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013538 }
13539 }
13540
13541 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
13542 TEST_REQUIRES_X86_SSE2;
13543 for (uint32_t channels = 16; channels < 128; channels += 24) {
13544 DWConvMicrokernelTester()
13545 .cr(8)
13546 .kr(25)
13547 .channels(channels)
13548 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013549 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013550 }
13551 }
13552
13553 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_lt_8) {
13554 TEST_REQUIRES_X86_SSE2;
13555 for (uint32_t channels = 1; channels < 8; channels++) {
13556 DWConvMicrokernelTester()
13557 .cr(8)
13558 .kr(25)
13559 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013560 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013561 }
13562 }
13563
13564 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8) {
13565 TEST_REQUIRES_X86_SSE2;
13566 for (uint32_t channels = 9; channels < 16; channels++) {
13567 DWConvMicrokernelTester()
13568 .cr(8)
13569 .kr(25)
13570 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013571 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013572 }
13573 }
13574
13575 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
13576 TEST_REQUIRES_X86_SSE2;
13577 for (uint32_t channels = 9; channels < 16; channels++) {
13578 DWConvMicrokernelTester()
13579 .cr(8)
13580 .kr(25)
13581 .channels(channels)
13582 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013583 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013584 }
13585 }
13586
13587 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
13588 TEST_REQUIRES_X86_SSE2;
13589 for (uint32_t channels = 9; channels < 16; channels++) {
13590 DWConvMicrokernelTester()
13591 .cr(8)
13592 .kr(25)
13593 .channels(channels)
13594 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013595 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013596 }
13597 }
13598
13599 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel) {
13600 TEST_REQUIRES_X86_SSE2;
13601 for (size_t channels = 1; channels <= 40; channels += 7) {
13602 DWConvMicrokernelTester()
13603 .cr(8)
13604 .kr(25)
13605 .channels(channels)
13606 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013607 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013608 }
13609 }
13610
13611 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_step) {
13612 TEST_REQUIRES_X86_SSE2;
13613 for (size_t channels = 1; channels <= 40; channels += 7) {
13614 for (size_t step = 2; step <= 25; step++) {
13615 DWConvMicrokernelTester()
13616 .cr(8)
13617 .kr(25)
13618 .channels(channels)
13619 .width(3)
13620 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013621 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013622 }
13623 }
13624 }
13625
13626 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
13627 TEST_REQUIRES_X86_SSE2;
13628 for (size_t channels = 1; channels <= 40; channels += 7) {
13629 DWConvMicrokernelTester()
13630 .cr(8)
13631 .kr(25)
13632 .channels(8)
13633 .width(5)
13634 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013635 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013636 }
13637 }
13638
13639 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
13640 TEST_REQUIRES_X86_SSE2;
13641 for (size_t channels = 1; channels <= 40; channels += 7) {
13642 DWConvMicrokernelTester()
13643 .cr(8)
13644 .kr(25)
13645 .channels(channels)
13646 .width(3)
13647 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013648 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013649 }
13650 }
13651
13652 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
13653 TEST_REQUIRES_X86_SSE2;
13654 for (size_t channels = 1; channels <= 40; channels += 7) {
13655 DWConvMicrokernelTester()
13656 .cr(8)
13657 .kr(25)
13658 .channels(channels)
13659 .width(3)
13660 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013661 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013662 }
13663 }
13664
13665 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, input_offset) {
13666 TEST_REQUIRES_X86_SSE2;
13667 for (uint32_t channels = 16; channels < 128; channels += 24) {
13668 DWConvMicrokernelTester()
13669 .cr(8)
13670 .kr(25)
13671 .channels(channels)
13672 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080013673 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013674 }
13675 }
13676
13677 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, zero) {
13678 TEST_REQUIRES_X86_SSE2;
13679 for (uint32_t mz = 0; mz < 25; mz++) {
13680 for (uint32_t channels = 16; channels < 128; channels += 24) {
13681 DWConvMicrokernelTester()
13682 .cr(8)
13683 .kr(25)
13684 .channels(channels)
13685 .input_offset(176)
13686 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013687 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013688 }
13689 }
13690 }
13691#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13692
13693
13694#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13695 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_eq_16) {
13696 TEST_REQUIRES_X86_SSE2;
13697 DWConvMicrokernelTester()
13698 .cr(16)
13699 .kr(25)
13700 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080013701 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013702 }
13703
13704 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16) {
13705 TEST_REQUIRES_X86_SSE2;
13706 for (uint32_t channels = 32; channels < 256; channels += 48) {
13707 DWConvMicrokernelTester()
13708 .cr(16)
13709 .kr(25)
13710 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013711 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013712 }
13713 }
13714
13715 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
13716 TEST_REQUIRES_X86_SSE2;
13717 for (uint32_t channels = 32; channels < 256; channels += 48) {
13718 DWConvMicrokernelTester()
13719 .cr(16)
13720 .kr(25)
13721 .channels(channels)
13722 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013723 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013724 }
13725 }
13726
13727 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
13728 TEST_REQUIRES_X86_SSE2;
13729 for (uint32_t channels = 32; channels < 256; channels += 48) {
13730 DWConvMicrokernelTester()
13731 .cr(16)
13732 .kr(25)
13733 .channels(channels)
13734 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013735 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013736 }
13737 }
13738
13739 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_lt_16) {
13740 TEST_REQUIRES_X86_SSE2;
13741 for (uint32_t channels = 1; channels < 16; channels++) {
13742 DWConvMicrokernelTester()
13743 .cr(16)
13744 .kr(25)
13745 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013746 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013747 }
13748 }
13749
13750 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16) {
13751 TEST_REQUIRES_X86_SSE2;
13752 for (uint32_t channels = 17; channels < 32; channels++) {
13753 DWConvMicrokernelTester()
13754 .cr(16)
13755 .kr(25)
13756 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013757 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013758 }
13759 }
13760
13761 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
13762 TEST_REQUIRES_X86_SSE2;
13763 for (uint32_t channels = 17; channels < 32; channels++) {
13764 DWConvMicrokernelTester()
13765 .cr(16)
13766 .kr(25)
13767 .channels(channels)
13768 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013769 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013770 }
13771 }
13772
13773 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
13774 TEST_REQUIRES_X86_SSE2;
13775 for (uint32_t channels = 17; channels < 32; channels++) {
13776 DWConvMicrokernelTester()
13777 .cr(16)
13778 .kr(25)
13779 .channels(channels)
13780 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013781 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013782 }
13783 }
13784
13785 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel) {
13786 TEST_REQUIRES_X86_SSE2;
13787 for (size_t channels = 1; channels <= 80; channels += 15) {
13788 DWConvMicrokernelTester()
13789 .cr(16)
13790 .kr(25)
13791 .channels(channels)
13792 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013793 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013794 }
13795 }
13796
13797 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_step) {
13798 TEST_REQUIRES_X86_SSE2;
13799 for (size_t channels = 1; channels <= 80; channels += 15) {
13800 for (size_t step = 2; step <= 25; step++) {
13801 DWConvMicrokernelTester()
13802 .cr(16)
13803 .kr(25)
13804 .channels(channels)
13805 .width(3)
13806 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013807 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013808 }
13809 }
13810 }
13811
13812 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
13813 TEST_REQUIRES_X86_SSE2;
13814 for (size_t channels = 1; channels <= 80; channels += 15) {
13815 DWConvMicrokernelTester()
13816 .cr(16)
13817 .kr(25)
13818 .channels(16)
13819 .width(5)
13820 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013821 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013822 }
13823 }
13824
13825 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
13826 TEST_REQUIRES_X86_SSE2;
13827 for (size_t channels = 1; channels <= 80; channels += 15) {
13828 DWConvMicrokernelTester()
13829 .cr(16)
13830 .kr(25)
13831 .channels(channels)
13832 .width(3)
13833 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013834 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013835 }
13836 }
13837
13838 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
13839 TEST_REQUIRES_X86_SSE2;
13840 for (size_t channels = 1; channels <= 80; channels += 15) {
13841 DWConvMicrokernelTester()
13842 .cr(16)
13843 .kr(25)
13844 .channels(channels)
13845 .width(3)
13846 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013847 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013848 }
13849 }
13850
13851 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, input_offset) {
13852 TEST_REQUIRES_X86_SSE2;
13853 for (uint32_t channels = 32; channels < 256; channels += 48) {
13854 DWConvMicrokernelTester()
13855 .cr(16)
13856 .kr(25)
13857 .channels(channels)
13858 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080013859 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013860 }
13861 }
13862
13863 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, zero) {
13864 TEST_REQUIRES_X86_SSE2;
13865 for (uint32_t mz = 0; mz < 25; mz++) {
13866 for (uint32_t channels = 32; channels < 256; channels += 48) {
13867 DWConvMicrokernelTester()
13868 .cr(16)
13869 .kr(25)
13870 .channels(channels)
13871 .input_offset(304)
13872 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013873 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070013874 }
13875 }
13876 }
13877#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13878
13879
13880#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancaf48312021-06-01 20:20:58 -070013881 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_eq_8) {
13882 TEST_REQUIRES_X86_SSE41;
13883 DWConvMicrokernelTester()
13884 .cr(8)
13885 .kr(25)
13886 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013887 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013888 }
13889
13890 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8) {
13891 TEST_REQUIRES_X86_SSE41;
13892 for (uint32_t channels = 16; channels < 128; channels += 24) {
13893 DWConvMicrokernelTester()
13894 .cr(8)
13895 .kr(25)
13896 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013897 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013898 }
13899 }
13900
13901 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmin) {
13902 TEST_REQUIRES_X86_SSE41;
13903 for (uint32_t channels = 16; channels < 128; channels += 24) {
13904 DWConvMicrokernelTester()
13905 .cr(8)
13906 .kr(25)
13907 .channels(channels)
13908 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013909 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013910 }
13911 }
13912
13913 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmax) {
13914 TEST_REQUIRES_X86_SSE41;
13915 for (uint32_t channels = 16; channels < 128; channels += 24) {
13916 DWConvMicrokernelTester()
13917 .cr(8)
13918 .kr(25)
13919 .channels(channels)
13920 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013921 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013922 }
13923 }
13924
13925 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_lt_8) {
13926 TEST_REQUIRES_X86_SSE41;
13927 for (uint32_t channels = 1; channels < 8; channels++) {
13928 DWConvMicrokernelTester()
13929 .cr(8)
13930 .kr(25)
13931 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013932 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013933 }
13934 }
13935
13936 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8) {
13937 TEST_REQUIRES_X86_SSE41;
13938 for (uint32_t channels = 9; channels < 16; channels++) {
13939 DWConvMicrokernelTester()
13940 .cr(8)
13941 .kr(25)
13942 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013943 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013944 }
13945 }
13946
13947 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmin) {
13948 TEST_REQUIRES_X86_SSE41;
13949 for (uint32_t channels = 9; channels < 16; channels++) {
13950 DWConvMicrokernelTester()
13951 .cr(8)
13952 .kr(25)
13953 .channels(channels)
13954 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013955 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013956 }
13957 }
13958
13959 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmax) {
13960 TEST_REQUIRES_X86_SSE41;
13961 for (uint32_t channels = 9; channels < 16; channels++) {
13962 DWConvMicrokernelTester()
13963 .cr(8)
13964 .kr(25)
13965 .channels(channels)
13966 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013967 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013968 }
13969 }
13970
13971 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel) {
13972 TEST_REQUIRES_X86_SSE41;
13973 for (size_t channels = 1; channels <= 40; channels += 7) {
13974 DWConvMicrokernelTester()
13975 .cr(8)
13976 .kr(25)
13977 .channels(channels)
13978 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013979 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013980 }
13981 }
13982
13983 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_step) {
13984 TEST_REQUIRES_X86_SSE41;
13985 for (size_t channels = 1; channels <= 40; channels += 7) {
13986 for (size_t step = 2; step <= 25; step++) {
13987 DWConvMicrokernelTester()
13988 .cr(8)
13989 .kr(25)
13990 .channels(channels)
13991 .width(3)
13992 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013993 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070013994 }
13995 }
13996 }
13997
13998 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_output_stride) {
13999 TEST_REQUIRES_X86_SSE41;
14000 for (size_t channels = 1; channels <= 40; channels += 7) {
14001 DWConvMicrokernelTester()
14002 .cr(8)
14003 .kr(25)
14004 .channels(8)
14005 .width(5)
14006 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014007 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014008 }
14009 }
14010
14011 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmin) {
14012 TEST_REQUIRES_X86_SSE41;
14013 for (size_t channels = 1; channels <= 40; channels += 7) {
14014 DWConvMicrokernelTester()
14015 .cr(8)
14016 .kr(25)
14017 .channels(channels)
14018 .width(3)
14019 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014020 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014021 }
14022 }
14023
14024 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmax) {
14025 TEST_REQUIRES_X86_SSE41;
14026 for (size_t channels = 1; channels <= 40; channels += 7) {
14027 DWConvMicrokernelTester()
14028 .cr(8)
14029 .kr(25)
14030 .channels(channels)
14031 .width(3)
14032 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014033 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014034 }
14035 }
14036
14037 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, input_offset) {
14038 TEST_REQUIRES_X86_SSE41;
14039 for (uint32_t channels = 16; channels < 128; channels += 24) {
14040 DWConvMicrokernelTester()
14041 .cr(8)
14042 .kr(25)
14043 .channels(channels)
14044 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080014045 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014046 }
14047 }
14048
14049 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, zero) {
14050 TEST_REQUIRES_X86_SSE41;
14051 for (uint32_t mz = 0; mz < 25; mz++) {
14052 for (uint32_t channels = 16; channels < 128; channels += 24) {
14053 DWConvMicrokernelTester()
14054 .cr(8)
14055 .kr(25)
14056 .channels(channels)
14057 .input_offset(176)
14058 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014059 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014060 }
14061 }
14062 }
14063#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14064
14065
14066#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14067 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_eq_16) {
14068 TEST_REQUIRES_X86_SSE41;
14069 DWConvMicrokernelTester()
14070 .cr(16)
14071 .kr(25)
14072 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080014073 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014074 }
14075
14076 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16) {
14077 TEST_REQUIRES_X86_SSE41;
14078 for (uint32_t channels = 32; channels < 256; channels += 48) {
14079 DWConvMicrokernelTester()
14080 .cr(16)
14081 .kr(25)
14082 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014083 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014084 }
14085 }
14086
14087 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmin) {
14088 TEST_REQUIRES_X86_SSE41;
14089 for (uint32_t channels = 32; channels < 256; channels += 48) {
14090 DWConvMicrokernelTester()
14091 .cr(16)
14092 .kr(25)
14093 .channels(channels)
14094 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014095 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014096 }
14097 }
14098
14099 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmax) {
14100 TEST_REQUIRES_X86_SSE41;
14101 for (uint32_t channels = 32; channels < 256; channels += 48) {
14102 DWConvMicrokernelTester()
14103 .cr(16)
14104 .kr(25)
14105 .channels(channels)
14106 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014107 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014108 }
14109 }
14110
14111 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_lt_16) {
14112 TEST_REQUIRES_X86_SSE41;
14113 for (uint32_t channels = 1; channels < 16; channels++) {
14114 DWConvMicrokernelTester()
14115 .cr(16)
14116 .kr(25)
14117 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014118 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014119 }
14120 }
14121
14122 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16) {
14123 TEST_REQUIRES_X86_SSE41;
14124 for (uint32_t channels = 17; channels < 32; channels++) {
14125 DWConvMicrokernelTester()
14126 .cr(16)
14127 .kr(25)
14128 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014129 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014130 }
14131 }
14132
14133 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmin) {
14134 TEST_REQUIRES_X86_SSE41;
14135 for (uint32_t channels = 17; channels < 32; channels++) {
14136 DWConvMicrokernelTester()
14137 .cr(16)
14138 .kr(25)
14139 .channels(channels)
14140 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014141 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014142 }
14143 }
14144
14145 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmax) {
14146 TEST_REQUIRES_X86_SSE41;
14147 for (uint32_t channels = 17; channels < 32; channels++) {
14148 DWConvMicrokernelTester()
14149 .cr(16)
14150 .kr(25)
14151 .channels(channels)
14152 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014153 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014154 }
14155 }
14156
14157 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel) {
14158 TEST_REQUIRES_X86_SSE41;
14159 for (size_t channels = 1; channels <= 80; channels += 15) {
14160 DWConvMicrokernelTester()
14161 .cr(16)
14162 .kr(25)
14163 .channels(channels)
14164 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014165 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014166 }
14167 }
14168
14169 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_step) {
14170 TEST_REQUIRES_X86_SSE41;
14171 for (size_t channels = 1; channels <= 80; channels += 15) {
14172 for (size_t step = 2; step <= 25; step++) {
14173 DWConvMicrokernelTester()
14174 .cr(16)
14175 .kr(25)
14176 .channels(channels)
14177 .width(3)
14178 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014179 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014180 }
14181 }
14182 }
14183
14184 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_output_stride) {
14185 TEST_REQUIRES_X86_SSE41;
14186 for (size_t channels = 1; channels <= 80; channels += 15) {
14187 DWConvMicrokernelTester()
14188 .cr(16)
14189 .kr(25)
14190 .channels(16)
14191 .width(5)
14192 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014193 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014194 }
14195 }
14196
14197 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmin) {
14198 TEST_REQUIRES_X86_SSE41;
14199 for (size_t channels = 1; channels <= 80; channels += 15) {
14200 DWConvMicrokernelTester()
14201 .cr(16)
14202 .kr(25)
14203 .channels(channels)
14204 .width(3)
14205 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014206 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014207 }
14208 }
14209
14210 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmax) {
14211 TEST_REQUIRES_X86_SSE41;
14212 for (size_t channels = 1; channels <= 80; channels += 15) {
14213 DWConvMicrokernelTester()
14214 .cr(16)
14215 .kr(25)
14216 .channels(channels)
14217 .width(3)
14218 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014219 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014220 }
14221 }
14222
14223 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, input_offset) {
14224 TEST_REQUIRES_X86_SSE41;
14225 for (uint32_t channels = 32; channels < 256; channels += 48) {
14226 DWConvMicrokernelTester()
14227 .cr(16)
14228 .kr(25)
14229 .channels(channels)
14230 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080014231 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014232 }
14233 }
14234
14235 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, zero) {
14236 TEST_REQUIRES_X86_SSE41;
14237 for (uint32_t mz = 0; mz < 25; mz++) {
14238 for (uint32_t channels = 32; channels < 256; channels += 48) {
14239 DWConvMicrokernelTester()
14240 .cr(16)
14241 .kr(25)
14242 .channels(channels)
14243 .input_offset(304)
14244 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014245 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014246 }
14247 }
14248 }
14249#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14250
14251
14252#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14253 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_eq_24) {
14254 TEST_REQUIRES_X86_SSE41;
14255 DWConvMicrokernelTester()
14256 .cr(24)
14257 .kr(25)
14258 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080014259 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014260 }
14261
14262 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24) {
14263 TEST_REQUIRES_X86_SSE41;
14264 for (uint32_t channels = 48; channels < 384; channels += 72) {
14265 DWConvMicrokernelTester()
14266 .cr(24)
14267 .kr(25)
14268 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014269 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014270 }
14271 }
14272
14273 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmin) {
14274 TEST_REQUIRES_X86_SSE41;
14275 for (uint32_t channels = 48; channels < 384; channels += 72) {
14276 DWConvMicrokernelTester()
14277 .cr(24)
14278 .kr(25)
14279 .channels(channels)
14280 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014281 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014282 }
14283 }
14284
14285 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmax) {
14286 TEST_REQUIRES_X86_SSE41;
14287 for (uint32_t channels = 48; channels < 384; channels += 72) {
14288 DWConvMicrokernelTester()
14289 .cr(24)
14290 .kr(25)
14291 .channels(channels)
14292 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014293 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014294 }
14295 }
14296
14297 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_lt_24) {
14298 TEST_REQUIRES_X86_SSE41;
14299 for (uint32_t channels = 1; channels < 24; channels++) {
14300 DWConvMicrokernelTester()
14301 .cr(24)
14302 .kr(25)
14303 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014304 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014305 }
14306 }
14307
14308 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24) {
14309 TEST_REQUIRES_X86_SSE41;
14310 for (uint32_t channels = 25; channels < 48; channels++) {
14311 DWConvMicrokernelTester()
14312 .cr(24)
14313 .kr(25)
14314 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014315 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014316 }
14317 }
14318
14319 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmin) {
14320 TEST_REQUIRES_X86_SSE41;
14321 for (uint32_t channels = 25; channels < 48; channels++) {
14322 DWConvMicrokernelTester()
14323 .cr(24)
14324 .kr(25)
14325 .channels(channels)
14326 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014327 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014328 }
14329 }
14330
14331 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmax) {
14332 TEST_REQUIRES_X86_SSE41;
14333 for (uint32_t channels = 25; channels < 48; channels++) {
14334 DWConvMicrokernelTester()
14335 .cr(24)
14336 .kr(25)
14337 .channels(channels)
14338 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014339 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014340 }
14341 }
14342
14343 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel) {
14344 TEST_REQUIRES_X86_SSE41;
14345 for (size_t channels = 1; channels <= 120; channels += 23) {
14346 DWConvMicrokernelTester()
14347 .cr(24)
14348 .kr(25)
14349 .channels(channels)
14350 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014351 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014352 }
14353 }
14354
14355 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_step) {
14356 TEST_REQUIRES_X86_SSE41;
14357 for (size_t channels = 1; channels <= 120; channels += 23) {
14358 for (size_t step = 2; step <= 25; step++) {
14359 DWConvMicrokernelTester()
14360 .cr(24)
14361 .kr(25)
14362 .channels(channels)
14363 .width(3)
14364 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014365 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014366 }
14367 }
14368 }
14369
14370 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_output_stride) {
14371 TEST_REQUIRES_X86_SSE41;
14372 for (size_t channels = 1; channels <= 120; channels += 23) {
14373 DWConvMicrokernelTester()
14374 .cr(24)
14375 .kr(25)
14376 .channels(24)
14377 .width(5)
14378 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080014379 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014380 }
14381 }
14382
14383 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmin) {
14384 TEST_REQUIRES_X86_SSE41;
14385 for (size_t channels = 1; channels <= 120; channels += 23) {
14386 DWConvMicrokernelTester()
14387 .cr(24)
14388 .kr(25)
14389 .channels(channels)
14390 .width(3)
14391 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014392 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014393 }
14394 }
14395
14396 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmax) {
14397 TEST_REQUIRES_X86_SSE41;
14398 for (size_t channels = 1; channels <= 120; channels += 23) {
14399 DWConvMicrokernelTester()
14400 .cr(24)
14401 .kr(25)
14402 .channels(channels)
14403 .width(3)
14404 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014405 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014406 }
14407 }
14408
14409 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, input_offset) {
14410 TEST_REQUIRES_X86_SSE41;
14411 for (uint32_t channels = 48; channels < 384; channels += 72) {
14412 DWConvMicrokernelTester()
14413 .cr(24)
14414 .kr(25)
14415 .channels(channels)
14416 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080014417 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014418 }
14419 }
14420
14421 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, zero) {
14422 TEST_REQUIRES_X86_SSE41;
14423 for (uint32_t mz = 0; mz < 25; mz++) {
14424 for (uint32_t channels = 48; channels < 384; channels += 72) {
14425 DWConvMicrokernelTester()
14426 .cr(24)
14427 .kr(25)
14428 .channels(channels)
14429 .input_offset(464)
14430 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014431 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014432 }
14433 }
14434 }
14435#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14436
14437
14438#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -070014439 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_eq_8) {
14440 TEST_REQUIRES_X86_SSE41;
14441 DWConvMicrokernelTester()
14442 .cr(8)
14443 .kr(25)
14444 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014445 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014446 }
14447
14448 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8) {
14449 TEST_REQUIRES_X86_SSE41;
14450 for (uint32_t channels = 16; channels < 128; channels += 24) {
14451 DWConvMicrokernelTester()
14452 .cr(8)
14453 .kr(25)
14454 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014455 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014456 }
14457 }
14458
14459 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
14460 TEST_REQUIRES_X86_SSE41;
14461 for (uint32_t channels = 16; channels < 128; channels += 24) {
14462 DWConvMicrokernelTester()
14463 .cr(8)
14464 .kr(25)
14465 .channels(channels)
14466 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014467 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014468 }
14469 }
14470
14471 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
14472 TEST_REQUIRES_X86_SSE41;
14473 for (uint32_t channels = 16; channels < 128; channels += 24) {
14474 DWConvMicrokernelTester()
14475 .cr(8)
14476 .kr(25)
14477 .channels(channels)
14478 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014479 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014480 }
14481 }
14482
14483 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_lt_8) {
14484 TEST_REQUIRES_X86_SSE41;
14485 for (uint32_t channels = 1; channels < 8; channels++) {
14486 DWConvMicrokernelTester()
14487 .cr(8)
14488 .kr(25)
14489 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014490 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014491 }
14492 }
14493
14494 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8) {
14495 TEST_REQUIRES_X86_SSE41;
14496 for (uint32_t channels = 9; channels < 16; channels++) {
14497 DWConvMicrokernelTester()
14498 .cr(8)
14499 .kr(25)
14500 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014501 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014502 }
14503 }
14504
14505 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
14506 TEST_REQUIRES_X86_SSE41;
14507 for (uint32_t channels = 9; channels < 16; channels++) {
14508 DWConvMicrokernelTester()
14509 .cr(8)
14510 .kr(25)
14511 .channels(channels)
14512 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014513 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014514 }
14515 }
14516
14517 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
14518 TEST_REQUIRES_X86_SSE41;
14519 for (uint32_t channels = 9; channels < 16; channels++) {
14520 DWConvMicrokernelTester()
14521 .cr(8)
14522 .kr(25)
14523 .channels(channels)
14524 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014525 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014526 }
14527 }
14528
14529 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel) {
14530 TEST_REQUIRES_X86_SSE41;
14531 for (size_t channels = 1; channels <= 40; channels += 7) {
14532 DWConvMicrokernelTester()
14533 .cr(8)
14534 .kr(25)
14535 .channels(channels)
14536 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014537 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014538 }
14539 }
14540
14541 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_step) {
14542 TEST_REQUIRES_X86_SSE41;
14543 for (size_t channels = 1; channels <= 40; channels += 7) {
14544 for (size_t step = 2; step <= 25; step++) {
14545 DWConvMicrokernelTester()
14546 .cr(8)
14547 .kr(25)
14548 .channels(channels)
14549 .width(3)
14550 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014551 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014552 }
14553 }
14554 }
14555
14556 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
14557 TEST_REQUIRES_X86_SSE41;
14558 for (size_t channels = 1; channels <= 40; channels += 7) {
14559 DWConvMicrokernelTester()
14560 .cr(8)
14561 .kr(25)
14562 .channels(8)
14563 .width(5)
14564 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014565 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014566 }
14567 }
14568
14569 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
14570 TEST_REQUIRES_X86_SSE41;
14571 for (size_t channels = 1; channels <= 40; channels += 7) {
14572 DWConvMicrokernelTester()
14573 .cr(8)
14574 .kr(25)
14575 .channels(channels)
14576 .width(3)
14577 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014578 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014579 }
14580 }
14581
14582 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
14583 TEST_REQUIRES_X86_SSE41;
14584 for (size_t channels = 1; channels <= 40; channels += 7) {
14585 DWConvMicrokernelTester()
14586 .cr(8)
14587 .kr(25)
14588 .channels(channels)
14589 .width(3)
14590 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014591 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014592 }
14593 }
14594
14595 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, input_offset) {
14596 TEST_REQUIRES_X86_SSE41;
14597 for (uint32_t channels = 16; channels < 128; channels += 24) {
14598 DWConvMicrokernelTester()
14599 .cr(8)
14600 .kr(25)
14601 .channels(channels)
14602 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080014603 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014604 }
14605 }
14606
14607 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, zero) {
14608 TEST_REQUIRES_X86_SSE41;
14609 for (uint32_t mz = 0; mz < 25; mz++) {
14610 for (uint32_t channels = 16; channels < 128; channels += 24) {
14611 DWConvMicrokernelTester()
14612 .cr(8)
14613 .kr(25)
14614 .channels(channels)
14615 .input_offset(176)
14616 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014617 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014618 }
14619 }
14620 }
14621#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14622
14623
14624#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14625 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_eq_16) {
14626 TEST_REQUIRES_X86_SSE41;
14627 DWConvMicrokernelTester()
14628 .cr(16)
14629 .kr(25)
14630 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080014631 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014632 }
14633
14634 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16) {
14635 TEST_REQUIRES_X86_SSE41;
14636 for (uint32_t channels = 32; channels < 256; channels += 48) {
14637 DWConvMicrokernelTester()
14638 .cr(16)
14639 .kr(25)
14640 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014641 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014642 }
14643 }
14644
14645 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
14646 TEST_REQUIRES_X86_SSE41;
14647 for (uint32_t channels = 32; channels < 256; channels += 48) {
14648 DWConvMicrokernelTester()
14649 .cr(16)
14650 .kr(25)
14651 .channels(channels)
14652 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014653 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014654 }
14655 }
14656
14657 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
14658 TEST_REQUIRES_X86_SSE41;
14659 for (uint32_t channels = 32; channels < 256; channels += 48) {
14660 DWConvMicrokernelTester()
14661 .cr(16)
14662 .kr(25)
14663 .channels(channels)
14664 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014665 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014666 }
14667 }
14668
14669 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_lt_16) {
14670 TEST_REQUIRES_X86_SSE41;
14671 for (uint32_t channels = 1; channels < 16; channels++) {
14672 DWConvMicrokernelTester()
14673 .cr(16)
14674 .kr(25)
14675 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014676 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014677 }
14678 }
14679
14680 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16) {
14681 TEST_REQUIRES_X86_SSE41;
14682 for (uint32_t channels = 17; channels < 32; channels++) {
14683 DWConvMicrokernelTester()
14684 .cr(16)
14685 .kr(25)
14686 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014687 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014688 }
14689 }
14690
14691 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
14692 TEST_REQUIRES_X86_SSE41;
14693 for (uint32_t channels = 17; channels < 32; channels++) {
14694 DWConvMicrokernelTester()
14695 .cr(16)
14696 .kr(25)
14697 .channels(channels)
14698 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014699 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014700 }
14701 }
14702
14703 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
14704 TEST_REQUIRES_X86_SSE41;
14705 for (uint32_t channels = 17; channels < 32; channels++) {
14706 DWConvMicrokernelTester()
14707 .cr(16)
14708 .kr(25)
14709 .channels(channels)
14710 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014711 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014712 }
14713 }
14714
14715 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel) {
14716 TEST_REQUIRES_X86_SSE41;
14717 for (size_t channels = 1; channels <= 80; channels += 15) {
14718 DWConvMicrokernelTester()
14719 .cr(16)
14720 .kr(25)
14721 .channels(channels)
14722 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014723 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014724 }
14725 }
14726
14727 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_step) {
14728 TEST_REQUIRES_X86_SSE41;
14729 for (size_t channels = 1; channels <= 80; channels += 15) {
14730 for (size_t step = 2; step <= 25; step++) {
14731 DWConvMicrokernelTester()
14732 .cr(16)
14733 .kr(25)
14734 .channels(channels)
14735 .width(3)
14736 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014737 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014738 }
14739 }
14740 }
14741
14742 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
14743 TEST_REQUIRES_X86_SSE41;
14744 for (size_t channels = 1; channels <= 80; channels += 15) {
14745 DWConvMicrokernelTester()
14746 .cr(16)
14747 .kr(25)
14748 .channels(16)
14749 .width(5)
14750 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014751 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014752 }
14753 }
14754
14755 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
14756 TEST_REQUIRES_X86_SSE41;
14757 for (size_t channels = 1; channels <= 80; channels += 15) {
14758 DWConvMicrokernelTester()
14759 .cr(16)
14760 .kr(25)
14761 .channels(channels)
14762 .width(3)
14763 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014764 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014765 }
14766 }
14767
14768 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
14769 TEST_REQUIRES_X86_SSE41;
14770 for (size_t channels = 1; channels <= 80; channels += 15) {
14771 DWConvMicrokernelTester()
14772 .cr(16)
14773 .kr(25)
14774 .channels(channels)
14775 .width(3)
14776 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014777 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014778 }
14779 }
14780
14781 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, input_offset) {
14782 TEST_REQUIRES_X86_SSE41;
14783 for (uint32_t channels = 32; channels < 256; channels += 48) {
14784 DWConvMicrokernelTester()
14785 .cr(16)
14786 .kr(25)
14787 .channels(channels)
14788 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080014789 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014790 }
14791 }
14792
14793 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, zero) {
14794 TEST_REQUIRES_X86_SSE41;
14795 for (uint32_t mz = 0; mz < 25; mz++) {
14796 for (uint32_t channels = 32; channels < 256; channels += 48) {
14797 DWConvMicrokernelTester()
14798 .cr(16)
14799 .kr(25)
14800 .channels(channels)
14801 .input_offset(304)
14802 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014803 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070014804 }
14805 }
14806 }
14807#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14808
14809
14810#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancaf48312021-06-01 20:20:58 -070014811 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_eq_8) {
14812 TEST_REQUIRES_X86_AVX;
14813 DWConvMicrokernelTester()
14814 .cr(8)
14815 .kr(25)
14816 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014817 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014818 }
14819
14820 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8) {
14821 TEST_REQUIRES_X86_AVX;
14822 for (uint32_t channels = 16; channels < 128; channels += 24) {
14823 DWConvMicrokernelTester()
14824 .cr(8)
14825 .kr(25)
14826 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014827 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014828 }
14829 }
14830
14831 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmin) {
14832 TEST_REQUIRES_X86_AVX;
14833 for (uint32_t channels = 16; channels < 128; channels += 24) {
14834 DWConvMicrokernelTester()
14835 .cr(8)
14836 .kr(25)
14837 .channels(channels)
14838 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014839 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014840 }
14841 }
14842
14843 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmax) {
14844 TEST_REQUIRES_X86_AVX;
14845 for (uint32_t channels = 16; channels < 128; channels += 24) {
14846 DWConvMicrokernelTester()
14847 .cr(8)
14848 .kr(25)
14849 .channels(channels)
14850 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014851 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014852 }
14853 }
14854
14855 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_lt_8) {
14856 TEST_REQUIRES_X86_AVX;
14857 for (uint32_t channels = 1; channels < 8; channels++) {
14858 DWConvMicrokernelTester()
14859 .cr(8)
14860 .kr(25)
14861 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014862 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014863 }
14864 }
14865
14866 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8) {
14867 TEST_REQUIRES_X86_AVX;
14868 for (uint32_t channels = 9; channels < 16; channels++) {
14869 DWConvMicrokernelTester()
14870 .cr(8)
14871 .kr(25)
14872 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014873 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014874 }
14875 }
14876
14877 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmin) {
14878 TEST_REQUIRES_X86_AVX;
14879 for (uint32_t channels = 9; channels < 16; channels++) {
14880 DWConvMicrokernelTester()
14881 .cr(8)
14882 .kr(25)
14883 .channels(channels)
14884 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014885 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014886 }
14887 }
14888
14889 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmax) {
14890 TEST_REQUIRES_X86_AVX;
14891 for (uint32_t channels = 9; channels < 16; channels++) {
14892 DWConvMicrokernelTester()
14893 .cr(8)
14894 .kr(25)
14895 .channels(channels)
14896 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014897 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014898 }
14899 }
14900
14901 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel) {
14902 TEST_REQUIRES_X86_AVX;
14903 for (size_t channels = 1; channels <= 40; channels += 7) {
14904 DWConvMicrokernelTester()
14905 .cr(8)
14906 .kr(25)
14907 .channels(channels)
14908 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014909 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014910 }
14911 }
14912
14913 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_step) {
14914 TEST_REQUIRES_X86_AVX;
14915 for (size_t channels = 1; channels <= 40; channels += 7) {
14916 for (size_t step = 2; step <= 25; step++) {
14917 DWConvMicrokernelTester()
14918 .cr(8)
14919 .kr(25)
14920 .channels(channels)
14921 .width(3)
14922 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014923 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014924 }
14925 }
14926 }
14927
14928 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_output_stride) {
14929 TEST_REQUIRES_X86_AVX;
14930 for (size_t channels = 1; channels <= 40; channels += 7) {
14931 DWConvMicrokernelTester()
14932 .cr(8)
14933 .kr(25)
14934 .channels(8)
14935 .width(5)
14936 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014937 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014938 }
14939 }
14940
14941 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmin) {
14942 TEST_REQUIRES_X86_AVX;
14943 for (size_t channels = 1; channels <= 40; channels += 7) {
14944 DWConvMicrokernelTester()
14945 .cr(8)
14946 .kr(25)
14947 .channels(channels)
14948 .width(3)
14949 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014950 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014951 }
14952 }
14953
14954 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmax) {
14955 TEST_REQUIRES_X86_AVX;
14956 for (size_t channels = 1; channels <= 40; channels += 7) {
14957 DWConvMicrokernelTester()
14958 .cr(8)
14959 .kr(25)
14960 .channels(channels)
14961 .width(3)
14962 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014963 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014964 }
14965 }
14966
14967 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, input_offset) {
14968 TEST_REQUIRES_X86_AVX;
14969 for (uint32_t channels = 16; channels < 128; channels += 24) {
14970 DWConvMicrokernelTester()
14971 .cr(8)
14972 .kr(25)
14973 .channels(channels)
14974 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080014975 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014976 }
14977 }
14978
14979 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, zero) {
14980 TEST_REQUIRES_X86_AVX;
14981 for (uint32_t mz = 0; mz < 25; mz++) {
14982 for (uint32_t channels = 16; channels < 128; channels += 24) {
14983 DWConvMicrokernelTester()
14984 .cr(8)
14985 .kr(25)
14986 .channels(channels)
14987 .input_offset(176)
14988 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014989 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070014990 }
14991 }
14992 }
14993#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14994
14995
14996#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14997 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_eq_16) {
14998 TEST_REQUIRES_X86_AVX;
14999 DWConvMicrokernelTester()
15000 .cr(16)
15001 .kr(25)
15002 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015003 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015004 }
15005
15006 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16) {
15007 TEST_REQUIRES_X86_AVX;
15008 for (uint32_t channels = 32; channels < 256; channels += 48) {
15009 DWConvMicrokernelTester()
15010 .cr(16)
15011 .kr(25)
15012 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015013 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015014 }
15015 }
15016
15017 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmin) {
15018 TEST_REQUIRES_X86_AVX;
15019 for (uint32_t channels = 32; channels < 256; channels += 48) {
15020 DWConvMicrokernelTester()
15021 .cr(16)
15022 .kr(25)
15023 .channels(channels)
15024 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015025 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015026 }
15027 }
15028
15029 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmax) {
15030 TEST_REQUIRES_X86_AVX;
15031 for (uint32_t channels = 32; channels < 256; channels += 48) {
15032 DWConvMicrokernelTester()
15033 .cr(16)
15034 .kr(25)
15035 .channels(channels)
15036 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015037 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015038 }
15039 }
15040
15041 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_lt_16) {
15042 TEST_REQUIRES_X86_AVX;
15043 for (uint32_t channels = 1; channels < 16; channels++) {
15044 DWConvMicrokernelTester()
15045 .cr(16)
15046 .kr(25)
15047 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015048 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015049 }
15050 }
15051
15052 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16) {
15053 TEST_REQUIRES_X86_AVX;
15054 for (uint32_t channels = 17; channels < 32; channels++) {
15055 DWConvMicrokernelTester()
15056 .cr(16)
15057 .kr(25)
15058 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015059 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015060 }
15061 }
15062
15063 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmin) {
15064 TEST_REQUIRES_X86_AVX;
15065 for (uint32_t channels = 17; channels < 32; channels++) {
15066 DWConvMicrokernelTester()
15067 .cr(16)
15068 .kr(25)
15069 .channels(channels)
15070 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015071 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015072 }
15073 }
15074
15075 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmax) {
15076 TEST_REQUIRES_X86_AVX;
15077 for (uint32_t channels = 17; channels < 32; channels++) {
15078 DWConvMicrokernelTester()
15079 .cr(16)
15080 .kr(25)
15081 .channels(channels)
15082 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015083 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015084 }
15085 }
15086
15087 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel) {
15088 TEST_REQUIRES_X86_AVX;
15089 for (size_t channels = 1; channels <= 80; channels += 15) {
15090 DWConvMicrokernelTester()
15091 .cr(16)
15092 .kr(25)
15093 .channels(channels)
15094 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015095 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015096 }
15097 }
15098
15099 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_step) {
15100 TEST_REQUIRES_X86_AVX;
15101 for (size_t channels = 1; channels <= 80; channels += 15) {
15102 for (size_t step = 2; step <= 25; step++) {
15103 DWConvMicrokernelTester()
15104 .cr(16)
15105 .kr(25)
15106 .channels(channels)
15107 .width(3)
15108 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015109 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015110 }
15111 }
15112 }
15113
15114 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_output_stride) {
15115 TEST_REQUIRES_X86_AVX;
15116 for (size_t channels = 1; channels <= 80; channels += 15) {
15117 DWConvMicrokernelTester()
15118 .cr(16)
15119 .kr(25)
15120 .channels(16)
15121 .width(5)
15122 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015123 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015124 }
15125 }
15126
15127 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmin) {
15128 TEST_REQUIRES_X86_AVX;
15129 for (size_t channels = 1; channels <= 80; channels += 15) {
15130 DWConvMicrokernelTester()
15131 .cr(16)
15132 .kr(25)
15133 .channels(channels)
15134 .width(3)
15135 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015136 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015137 }
15138 }
15139
15140 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmax) {
15141 TEST_REQUIRES_X86_AVX;
15142 for (size_t channels = 1; channels <= 80; channels += 15) {
15143 DWConvMicrokernelTester()
15144 .cr(16)
15145 .kr(25)
15146 .channels(channels)
15147 .width(3)
15148 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015149 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015150 }
15151 }
15152
15153 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, input_offset) {
15154 TEST_REQUIRES_X86_AVX;
15155 for (uint32_t channels = 32; channels < 256; channels += 48) {
15156 DWConvMicrokernelTester()
15157 .cr(16)
15158 .kr(25)
15159 .channels(channels)
15160 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080015161 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015162 }
15163 }
15164
15165 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, zero) {
15166 TEST_REQUIRES_X86_AVX;
15167 for (uint32_t mz = 0; mz < 25; mz++) {
15168 for (uint32_t channels = 32; channels < 256; channels += 48) {
15169 DWConvMicrokernelTester()
15170 .cr(16)
15171 .kr(25)
15172 .channels(channels)
15173 .input_offset(304)
15174 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015175 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015176 }
15177 }
15178 }
15179#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15180
15181
15182#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15183 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_eq_24) {
15184 TEST_REQUIRES_X86_AVX;
15185 DWConvMicrokernelTester()
15186 .cr(24)
15187 .kr(25)
15188 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080015189 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015190 }
15191
15192 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24) {
15193 TEST_REQUIRES_X86_AVX;
15194 for (uint32_t channels = 48; channels < 384; channels += 72) {
15195 DWConvMicrokernelTester()
15196 .cr(24)
15197 .kr(25)
15198 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015199 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015200 }
15201 }
15202
15203 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmin) {
15204 TEST_REQUIRES_X86_AVX;
15205 for (uint32_t channels = 48; channels < 384; channels += 72) {
15206 DWConvMicrokernelTester()
15207 .cr(24)
15208 .kr(25)
15209 .channels(channels)
15210 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015211 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015212 }
15213 }
15214
15215 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmax) {
15216 TEST_REQUIRES_X86_AVX;
15217 for (uint32_t channels = 48; channels < 384; channels += 72) {
15218 DWConvMicrokernelTester()
15219 .cr(24)
15220 .kr(25)
15221 .channels(channels)
15222 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015223 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015224 }
15225 }
15226
15227 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_lt_24) {
15228 TEST_REQUIRES_X86_AVX;
15229 for (uint32_t channels = 1; channels < 24; channels++) {
15230 DWConvMicrokernelTester()
15231 .cr(24)
15232 .kr(25)
15233 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015234 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015235 }
15236 }
15237
15238 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24) {
15239 TEST_REQUIRES_X86_AVX;
15240 for (uint32_t channels = 25; channels < 48; channels++) {
15241 DWConvMicrokernelTester()
15242 .cr(24)
15243 .kr(25)
15244 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015245 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015246 }
15247 }
15248
15249 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmin) {
15250 TEST_REQUIRES_X86_AVX;
15251 for (uint32_t channels = 25; channels < 48; channels++) {
15252 DWConvMicrokernelTester()
15253 .cr(24)
15254 .kr(25)
15255 .channels(channels)
15256 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015257 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015258 }
15259 }
15260
15261 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmax) {
15262 TEST_REQUIRES_X86_AVX;
15263 for (uint32_t channels = 25; channels < 48; channels++) {
15264 DWConvMicrokernelTester()
15265 .cr(24)
15266 .kr(25)
15267 .channels(channels)
15268 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015269 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015270 }
15271 }
15272
15273 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel) {
15274 TEST_REQUIRES_X86_AVX;
15275 for (size_t channels = 1; channels <= 120; channels += 23) {
15276 DWConvMicrokernelTester()
15277 .cr(24)
15278 .kr(25)
15279 .channels(channels)
15280 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015281 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015282 }
15283 }
15284
15285 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_step) {
15286 TEST_REQUIRES_X86_AVX;
15287 for (size_t channels = 1; channels <= 120; channels += 23) {
15288 for (size_t step = 2; step <= 25; step++) {
15289 DWConvMicrokernelTester()
15290 .cr(24)
15291 .kr(25)
15292 .channels(channels)
15293 .width(3)
15294 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015295 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015296 }
15297 }
15298 }
15299
15300 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_output_stride) {
15301 TEST_REQUIRES_X86_AVX;
15302 for (size_t channels = 1; channels <= 120; channels += 23) {
15303 DWConvMicrokernelTester()
15304 .cr(24)
15305 .kr(25)
15306 .channels(24)
15307 .width(5)
15308 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080015309 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015310 }
15311 }
15312
15313 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmin) {
15314 TEST_REQUIRES_X86_AVX;
15315 for (size_t channels = 1; channels <= 120; channels += 23) {
15316 DWConvMicrokernelTester()
15317 .cr(24)
15318 .kr(25)
15319 .channels(channels)
15320 .width(3)
15321 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015322 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015323 }
15324 }
15325
15326 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmax) {
15327 TEST_REQUIRES_X86_AVX;
15328 for (size_t channels = 1; channels <= 120; channels += 23) {
15329 DWConvMicrokernelTester()
15330 .cr(24)
15331 .kr(25)
15332 .channels(channels)
15333 .width(3)
15334 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015335 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015336 }
15337 }
15338
15339 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, input_offset) {
15340 TEST_REQUIRES_X86_AVX;
15341 for (uint32_t channels = 48; channels < 384; channels += 72) {
15342 DWConvMicrokernelTester()
15343 .cr(24)
15344 .kr(25)
15345 .channels(channels)
15346 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080015347 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015348 }
15349 }
15350
15351 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, zero) {
15352 TEST_REQUIRES_X86_AVX;
15353 for (uint32_t mz = 0; mz < 25; mz++) {
15354 for (uint32_t channels = 48; channels < 384; channels += 72) {
15355 DWConvMicrokernelTester()
15356 .cr(24)
15357 .kr(25)
15358 .channels(channels)
15359 .input_offset(464)
15360 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015361 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070015362 }
15363 }
15364 }
15365#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15366
15367
15368#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -070015369 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_eq_8) {
15370 TEST_REQUIRES_X86_AVX;
15371 DWConvMicrokernelTester()
15372 .cr(8)
15373 .kr(25)
15374 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015375 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015376 }
15377
15378 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8) {
15379 TEST_REQUIRES_X86_AVX;
15380 for (uint32_t channels = 16; channels < 128; channels += 24) {
15381 DWConvMicrokernelTester()
15382 .cr(8)
15383 .kr(25)
15384 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015385 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015386 }
15387 }
15388
15389 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmin) {
15390 TEST_REQUIRES_X86_AVX;
15391 for (uint32_t channels = 16; channels < 128; channels += 24) {
15392 DWConvMicrokernelTester()
15393 .cr(8)
15394 .kr(25)
15395 .channels(channels)
15396 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015397 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015398 }
15399 }
15400
15401 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmax) {
15402 TEST_REQUIRES_X86_AVX;
15403 for (uint32_t channels = 16; channels < 128; channels += 24) {
15404 DWConvMicrokernelTester()
15405 .cr(8)
15406 .kr(25)
15407 .channels(channels)
15408 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015409 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015410 }
15411 }
15412
15413 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_lt_8) {
15414 TEST_REQUIRES_X86_AVX;
15415 for (uint32_t channels = 1; channels < 8; channels++) {
15416 DWConvMicrokernelTester()
15417 .cr(8)
15418 .kr(25)
15419 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015420 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015421 }
15422 }
15423
15424 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8) {
15425 TEST_REQUIRES_X86_AVX;
15426 for (uint32_t channels = 9; channels < 16; channels++) {
15427 DWConvMicrokernelTester()
15428 .cr(8)
15429 .kr(25)
15430 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015431 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015432 }
15433 }
15434
15435 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
15436 TEST_REQUIRES_X86_AVX;
15437 for (uint32_t channels = 9; channels < 16; channels++) {
15438 DWConvMicrokernelTester()
15439 .cr(8)
15440 .kr(25)
15441 .channels(channels)
15442 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015443 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015444 }
15445 }
15446
15447 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
15448 TEST_REQUIRES_X86_AVX;
15449 for (uint32_t channels = 9; channels < 16; channels++) {
15450 DWConvMicrokernelTester()
15451 .cr(8)
15452 .kr(25)
15453 .channels(channels)
15454 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015455 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015456 }
15457 }
15458
15459 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel) {
15460 TEST_REQUIRES_X86_AVX;
15461 for (size_t channels = 1; channels <= 40; channels += 7) {
15462 DWConvMicrokernelTester()
15463 .cr(8)
15464 .kr(25)
15465 .channels(channels)
15466 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015467 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015468 }
15469 }
15470
15471 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_step) {
15472 TEST_REQUIRES_X86_AVX;
15473 for (size_t channels = 1; channels <= 40; channels += 7) {
15474 for (size_t step = 2; step <= 25; step++) {
15475 DWConvMicrokernelTester()
15476 .cr(8)
15477 .kr(25)
15478 .channels(channels)
15479 .width(3)
15480 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015481 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015482 }
15483 }
15484 }
15485
15486 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
15487 TEST_REQUIRES_X86_AVX;
15488 for (size_t channels = 1; channels <= 40; channels += 7) {
15489 DWConvMicrokernelTester()
15490 .cr(8)
15491 .kr(25)
15492 .channels(8)
15493 .width(5)
15494 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015495 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015496 }
15497 }
15498
15499 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
15500 TEST_REQUIRES_X86_AVX;
15501 for (size_t channels = 1; channels <= 40; channels += 7) {
15502 DWConvMicrokernelTester()
15503 .cr(8)
15504 .kr(25)
15505 .channels(channels)
15506 .width(3)
15507 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015508 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015509 }
15510 }
15511
15512 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
15513 TEST_REQUIRES_X86_AVX;
15514 for (size_t channels = 1; channels <= 40; channels += 7) {
15515 DWConvMicrokernelTester()
15516 .cr(8)
15517 .kr(25)
15518 .channels(channels)
15519 .width(3)
15520 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015521 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015522 }
15523 }
15524
15525 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, input_offset) {
15526 TEST_REQUIRES_X86_AVX;
15527 for (uint32_t channels = 16; channels < 128; channels += 24) {
15528 DWConvMicrokernelTester()
15529 .cr(8)
15530 .kr(25)
15531 .channels(channels)
15532 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080015533 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015534 }
15535 }
15536
15537 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, zero) {
15538 TEST_REQUIRES_X86_AVX;
15539 for (uint32_t mz = 0; mz < 25; mz++) {
15540 for (uint32_t channels = 16; channels < 128; channels += 24) {
15541 DWConvMicrokernelTester()
15542 .cr(8)
15543 .kr(25)
15544 .channels(channels)
15545 .input_offset(176)
15546 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015547 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015548 }
15549 }
15550 }
15551#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15552
15553
15554#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15555 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_eq_16) {
15556 TEST_REQUIRES_X86_AVX;
15557 DWConvMicrokernelTester()
15558 .cr(16)
15559 .kr(25)
15560 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015561 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015562 }
15563
15564 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16) {
15565 TEST_REQUIRES_X86_AVX;
15566 for (uint32_t channels = 32; channels < 256; channels += 48) {
15567 DWConvMicrokernelTester()
15568 .cr(16)
15569 .kr(25)
15570 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015571 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015572 }
15573 }
15574
15575 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmin) {
15576 TEST_REQUIRES_X86_AVX;
15577 for (uint32_t channels = 32; channels < 256; channels += 48) {
15578 DWConvMicrokernelTester()
15579 .cr(16)
15580 .kr(25)
15581 .channels(channels)
15582 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015583 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015584 }
15585 }
15586
15587 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmax) {
15588 TEST_REQUIRES_X86_AVX;
15589 for (uint32_t channels = 32; channels < 256; channels += 48) {
15590 DWConvMicrokernelTester()
15591 .cr(16)
15592 .kr(25)
15593 .channels(channels)
15594 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015595 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015596 }
15597 }
15598
15599 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_lt_16) {
15600 TEST_REQUIRES_X86_AVX;
15601 for (uint32_t channels = 1; channels < 16; channels++) {
15602 DWConvMicrokernelTester()
15603 .cr(16)
15604 .kr(25)
15605 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015606 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015607 }
15608 }
15609
15610 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16) {
15611 TEST_REQUIRES_X86_AVX;
15612 for (uint32_t channels = 17; channels < 32; channels++) {
15613 DWConvMicrokernelTester()
15614 .cr(16)
15615 .kr(25)
15616 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015617 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015618 }
15619 }
15620
15621 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
15622 TEST_REQUIRES_X86_AVX;
15623 for (uint32_t channels = 17; channels < 32; channels++) {
15624 DWConvMicrokernelTester()
15625 .cr(16)
15626 .kr(25)
15627 .channels(channels)
15628 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015629 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015630 }
15631 }
15632
15633 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
15634 TEST_REQUIRES_X86_AVX;
15635 for (uint32_t channels = 17; channels < 32; channels++) {
15636 DWConvMicrokernelTester()
15637 .cr(16)
15638 .kr(25)
15639 .channels(channels)
15640 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015641 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015642 }
15643 }
15644
15645 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel) {
15646 TEST_REQUIRES_X86_AVX;
15647 for (size_t channels = 1; channels <= 80; channels += 15) {
15648 DWConvMicrokernelTester()
15649 .cr(16)
15650 .kr(25)
15651 .channels(channels)
15652 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015653 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015654 }
15655 }
15656
15657 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_step) {
15658 TEST_REQUIRES_X86_AVX;
15659 for (size_t channels = 1; channels <= 80; channels += 15) {
15660 for (size_t step = 2; step <= 25; step++) {
15661 DWConvMicrokernelTester()
15662 .cr(16)
15663 .kr(25)
15664 .channels(channels)
15665 .width(3)
15666 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015667 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015668 }
15669 }
15670 }
15671
15672 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
15673 TEST_REQUIRES_X86_AVX;
15674 for (size_t channels = 1; channels <= 80; channels += 15) {
15675 DWConvMicrokernelTester()
15676 .cr(16)
15677 .kr(25)
15678 .channels(16)
15679 .width(5)
15680 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015681 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015682 }
15683 }
15684
15685 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
15686 TEST_REQUIRES_X86_AVX;
15687 for (size_t channels = 1; channels <= 80; channels += 15) {
15688 DWConvMicrokernelTester()
15689 .cr(16)
15690 .kr(25)
15691 .channels(channels)
15692 .width(3)
15693 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015694 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015695 }
15696 }
15697
15698 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
15699 TEST_REQUIRES_X86_AVX;
15700 for (size_t channels = 1; channels <= 80; channels += 15) {
15701 DWConvMicrokernelTester()
15702 .cr(16)
15703 .kr(25)
15704 .channels(channels)
15705 .width(3)
15706 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015707 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015708 }
15709 }
15710
15711 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, input_offset) {
15712 TEST_REQUIRES_X86_AVX;
15713 for (uint32_t channels = 32; channels < 256; channels += 48) {
15714 DWConvMicrokernelTester()
15715 .cr(16)
15716 .kr(25)
15717 .channels(channels)
15718 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080015719 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015720 }
15721 }
15722
15723 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, zero) {
15724 TEST_REQUIRES_X86_AVX;
15725 for (uint32_t mz = 0; mz < 25; mz++) {
15726 for (uint32_t channels = 32; channels < 256; channels += 48) {
15727 DWConvMicrokernelTester()
15728 .cr(16)
15729 .kr(25)
15730 .channels(channels)
15731 .input_offset(304)
15732 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015733 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015734 }
15735 }
15736 }
15737#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15738
15739
15740#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15741 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_eq_8) {
15742 TEST_REQUIRES_X86_XOP;
15743 DWConvMicrokernelTester()
15744 .cr(8)
15745 .kr(25)
15746 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015747 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015748 }
15749
15750 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8) {
15751 TEST_REQUIRES_X86_XOP;
15752 for (uint32_t channels = 16; channels < 128; channels += 24) {
15753 DWConvMicrokernelTester()
15754 .cr(8)
15755 .kr(25)
15756 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015757 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015758 }
15759 }
15760
15761 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmin) {
15762 TEST_REQUIRES_X86_XOP;
15763 for (uint32_t channels = 16; channels < 128; channels += 24) {
15764 DWConvMicrokernelTester()
15765 .cr(8)
15766 .kr(25)
15767 .channels(channels)
15768 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015769 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015770 }
15771 }
15772
15773 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmax) {
15774 TEST_REQUIRES_X86_XOP;
15775 for (uint32_t channels = 16; channels < 128; channels += 24) {
15776 DWConvMicrokernelTester()
15777 .cr(8)
15778 .kr(25)
15779 .channels(channels)
15780 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015781 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015782 }
15783 }
15784
15785 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_lt_8) {
15786 TEST_REQUIRES_X86_XOP;
15787 for (uint32_t channels = 1; channels < 8; channels++) {
15788 DWConvMicrokernelTester()
15789 .cr(8)
15790 .kr(25)
15791 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015792 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015793 }
15794 }
15795
15796 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8) {
15797 TEST_REQUIRES_X86_XOP;
15798 for (uint32_t channels = 9; channels < 16; channels++) {
15799 DWConvMicrokernelTester()
15800 .cr(8)
15801 .kr(25)
15802 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015803 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015804 }
15805 }
15806
15807 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
15808 TEST_REQUIRES_X86_XOP;
15809 for (uint32_t channels = 9; channels < 16; channels++) {
15810 DWConvMicrokernelTester()
15811 .cr(8)
15812 .kr(25)
15813 .channels(channels)
15814 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015815 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015816 }
15817 }
15818
15819 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
15820 TEST_REQUIRES_X86_XOP;
15821 for (uint32_t channels = 9; channels < 16; channels++) {
15822 DWConvMicrokernelTester()
15823 .cr(8)
15824 .kr(25)
15825 .channels(channels)
15826 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015827 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015828 }
15829 }
15830
15831 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel) {
15832 TEST_REQUIRES_X86_XOP;
15833 for (size_t channels = 1; channels <= 40; channels += 7) {
15834 DWConvMicrokernelTester()
15835 .cr(8)
15836 .kr(25)
15837 .channels(channels)
15838 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015839 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015840 }
15841 }
15842
15843 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_step) {
15844 TEST_REQUIRES_X86_XOP;
15845 for (size_t channels = 1; channels <= 40; channels += 7) {
15846 for (size_t step = 2; step <= 25; step++) {
15847 DWConvMicrokernelTester()
15848 .cr(8)
15849 .kr(25)
15850 .channels(channels)
15851 .width(3)
15852 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015853 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015854 }
15855 }
15856 }
15857
15858 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
15859 TEST_REQUIRES_X86_XOP;
15860 for (size_t channels = 1; channels <= 40; channels += 7) {
15861 DWConvMicrokernelTester()
15862 .cr(8)
15863 .kr(25)
15864 .channels(8)
15865 .width(5)
15866 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015867 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015868 }
15869 }
15870
15871 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
15872 TEST_REQUIRES_X86_XOP;
15873 for (size_t channels = 1; channels <= 40; channels += 7) {
15874 DWConvMicrokernelTester()
15875 .cr(8)
15876 .kr(25)
15877 .channels(channels)
15878 .width(3)
15879 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015880 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015881 }
15882 }
15883
15884 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
15885 TEST_REQUIRES_X86_XOP;
15886 for (size_t channels = 1; channels <= 40; channels += 7) {
15887 DWConvMicrokernelTester()
15888 .cr(8)
15889 .kr(25)
15890 .channels(channels)
15891 .width(3)
15892 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015893 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015894 }
15895 }
15896
15897 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, input_offset) {
15898 TEST_REQUIRES_X86_XOP;
15899 for (uint32_t channels = 16; channels < 128; channels += 24) {
15900 DWConvMicrokernelTester()
15901 .cr(8)
15902 .kr(25)
15903 .channels(channels)
15904 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080015905 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015906 }
15907 }
15908
15909 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, zero) {
15910 TEST_REQUIRES_X86_XOP;
15911 for (uint32_t mz = 0; mz < 25; mz++) {
15912 for (uint32_t channels = 16; channels < 128; channels += 24) {
15913 DWConvMicrokernelTester()
15914 .cr(8)
15915 .kr(25)
15916 .channels(channels)
15917 .input_offset(176)
15918 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015919 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015920 }
15921 }
15922 }
15923#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15924
15925
15926#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15927 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_eq_16) {
15928 TEST_REQUIRES_X86_XOP;
15929 DWConvMicrokernelTester()
15930 .cr(16)
15931 .kr(25)
15932 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015933 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015934 }
15935
15936 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16) {
15937 TEST_REQUIRES_X86_XOP;
15938 for (uint32_t channels = 32; channels < 256; channels += 48) {
15939 DWConvMicrokernelTester()
15940 .cr(16)
15941 .kr(25)
15942 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015943 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015944 }
15945 }
15946
15947 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmin) {
15948 TEST_REQUIRES_X86_XOP;
15949 for (uint32_t channels = 32; channels < 256; channels += 48) {
15950 DWConvMicrokernelTester()
15951 .cr(16)
15952 .kr(25)
15953 .channels(channels)
15954 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015955 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015956 }
15957 }
15958
15959 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmax) {
15960 TEST_REQUIRES_X86_XOP;
15961 for (uint32_t channels = 32; channels < 256; channels += 48) {
15962 DWConvMicrokernelTester()
15963 .cr(16)
15964 .kr(25)
15965 .channels(channels)
15966 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015967 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015968 }
15969 }
15970
15971 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_lt_16) {
15972 TEST_REQUIRES_X86_XOP;
15973 for (uint32_t channels = 1; channels < 16; channels++) {
15974 DWConvMicrokernelTester()
15975 .cr(16)
15976 .kr(25)
15977 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015978 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015979 }
15980 }
15981
15982 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16) {
15983 TEST_REQUIRES_X86_XOP;
15984 for (uint32_t channels = 17; channels < 32; channels++) {
15985 DWConvMicrokernelTester()
15986 .cr(16)
15987 .kr(25)
15988 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015989 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070015990 }
15991 }
15992
15993 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
15994 TEST_REQUIRES_X86_XOP;
15995 for (uint32_t channels = 17; channels < 32; channels++) {
15996 DWConvMicrokernelTester()
15997 .cr(16)
15998 .kr(25)
15999 .channels(channels)
16000 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016001 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070016002 }
16003 }
16004
16005 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
16006 TEST_REQUIRES_X86_XOP;
16007 for (uint32_t channels = 17; channels < 32; channels++) {
16008 DWConvMicrokernelTester()
16009 .cr(16)
16010 .kr(25)
16011 .channels(channels)
16012 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016013 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070016014 }
16015 }
16016
16017 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel) {
16018 TEST_REQUIRES_X86_XOP;
16019 for (size_t channels = 1; channels <= 80; channels += 15) {
16020 DWConvMicrokernelTester()
16021 .cr(16)
16022 .kr(25)
16023 .channels(channels)
16024 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016025 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070016026 }
16027 }
16028
16029 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_step) {
16030 TEST_REQUIRES_X86_XOP;
16031 for (size_t channels = 1; channels <= 80; channels += 15) {
16032 for (size_t step = 2; step <= 25; step++) {
16033 DWConvMicrokernelTester()
16034 .cr(16)
16035 .kr(25)
16036 .channels(channels)
16037 .width(3)
16038 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016039 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070016040 }
16041 }
16042 }
16043
16044 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
16045 TEST_REQUIRES_X86_XOP;
16046 for (size_t channels = 1; channels <= 80; channels += 15) {
16047 DWConvMicrokernelTester()
16048 .cr(16)
16049 .kr(25)
16050 .channels(16)
16051 .width(5)
16052 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016053 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070016054 }
16055 }
16056
16057 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
16058 TEST_REQUIRES_X86_XOP;
16059 for (size_t channels = 1; channels <= 80; channels += 15) {
16060 DWConvMicrokernelTester()
16061 .cr(16)
16062 .kr(25)
16063 .channels(channels)
16064 .width(3)
16065 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016066 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070016067 }
16068 }
16069
16070 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
16071 TEST_REQUIRES_X86_XOP;
16072 for (size_t channels = 1; channels <= 80; channels += 15) {
16073 DWConvMicrokernelTester()
16074 .cr(16)
16075 .kr(25)
16076 .channels(channels)
16077 .width(3)
16078 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016079 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070016080 }
16081 }
16082
16083 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, input_offset) {
16084 TEST_REQUIRES_X86_XOP;
16085 for (uint32_t channels = 32; channels < 256; channels += 48) {
16086 DWConvMicrokernelTester()
16087 .cr(16)
16088 .kr(25)
16089 .channels(channels)
16090 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080016091 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070016092 }
16093 }
16094
16095 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, zero) {
16096 TEST_REQUIRES_X86_XOP;
16097 for (uint32_t mz = 0; mz < 25; mz++) {
16098 for (uint32_t channels = 32; channels < 256; channels += 48) {
16099 DWConvMicrokernelTester()
16100 .cr(16)
16101 .kr(25)
16102 .channels(channels)
16103 .input_offset(304)
16104 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016105 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070016106 }
16107 }
16108 }
16109#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16110
16111
16112#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan881ab022021-07-28 13:49:26 -070016113 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_eq_16) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016114 TEST_REQUIRES_X86_AVX2;
16115 DWConvMicrokernelTester()
16116 .cr(16)
16117 .kr(25)
16118 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080016119 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016120 }
16121
Marat Dukhan881ab022021-07-28 13:49:26 -070016122 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016123 TEST_REQUIRES_X86_AVX2;
16124 for (uint32_t channels = 32; channels < 256; channels += 48) {
16125 DWConvMicrokernelTester()
16126 .cr(16)
16127 .kr(25)
16128 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016129 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016130 }
16131 }
16132
Marat Dukhan881ab022021-07-28 13:49:26 -070016133 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016134 TEST_REQUIRES_X86_AVX2;
16135 for (uint32_t channels = 32; channels < 256; channels += 48) {
16136 DWConvMicrokernelTester()
16137 .cr(16)
16138 .kr(25)
16139 .channels(channels)
16140 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016141 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016142 }
16143 }
16144
Marat Dukhan881ab022021-07-28 13:49:26 -070016145 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016146 TEST_REQUIRES_X86_AVX2;
16147 for (uint32_t channels = 32; channels < 256; channels += 48) {
16148 DWConvMicrokernelTester()
16149 .cr(16)
16150 .kr(25)
16151 .channels(channels)
16152 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016153 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016154 }
16155 }
16156
Marat Dukhan881ab022021-07-28 13:49:26 -070016157 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_lt_16) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016158 TEST_REQUIRES_X86_AVX2;
16159 for (uint32_t channels = 1; channels < 16; channels++) {
16160 DWConvMicrokernelTester()
16161 .cr(16)
16162 .kr(25)
16163 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016164 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016165 }
16166 }
16167
Marat Dukhan881ab022021-07-28 13:49:26 -070016168 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016169 TEST_REQUIRES_X86_AVX2;
16170 for (uint32_t channels = 17; channels < 32; channels++) {
16171 DWConvMicrokernelTester()
16172 .cr(16)
16173 .kr(25)
16174 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016175 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016176 }
16177 }
16178
Marat Dukhan881ab022021-07-28 13:49:26 -070016179 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016180 TEST_REQUIRES_X86_AVX2;
16181 for (uint32_t channels = 17; channels < 32; channels++) {
16182 DWConvMicrokernelTester()
16183 .cr(16)
16184 .kr(25)
16185 .channels(channels)
16186 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016187 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016188 }
16189 }
16190
Marat Dukhan881ab022021-07-28 13:49:26 -070016191 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016192 TEST_REQUIRES_X86_AVX2;
16193 for (uint32_t channels = 17; channels < 32; channels++) {
16194 DWConvMicrokernelTester()
16195 .cr(16)
16196 .kr(25)
16197 .channels(channels)
16198 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016199 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016200 }
16201 }
16202
Marat Dukhan881ab022021-07-28 13:49:26 -070016203 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016204 TEST_REQUIRES_X86_AVX2;
16205 for (size_t channels = 1; channels <= 80; channels += 15) {
16206 DWConvMicrokernelTester()
16207 .cr(16)
16208 .kr(25)
16209 .channels(channels)
16210 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016211 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016212 }
16213 }
16214
Marat Dukhan881ab022021-07-28 13:49:26 -070016215 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016216 TEST_REQUIRES_X86_AVX2;
16217 for (size_t channels = 1; channels <= 80; channels += 15) {
16218 for (size_t step = 2; step <= 25; step++) {
16219 DWConvMicrokernelTester()
16220 .cr(16)
16221 .kr(25)
16222 .channels(channels)
16223 .width(3)
16224 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016225 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016226 }
16227 }
16228 }
16229
Marat Dukhan881ab022021-07-28 13:49:26 -070016230 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016231 TEST_REQUIRES_X86_AVX2;
16232 for (size_t channels = 1; channels <= 80; channels += 15) {
16233 DWConvMicrokernelTester()
16234 .cr(16)
16235 .kr(25)
16236 .channels(16)
16237 .width(5)
16238 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016239 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016240 }
16241 }
16242
Marat Dukhan881ab022021-07-28 13:49:26 -070016243 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016244 TEST_REQUIRES_X86_AVX2;
16245 for (size_t channels = 1; channels <= 80; channels += 15) {
16246 DWConvMicrokernelTester()
16247 .cr(16)
16248 .kr(25)
16249 .channels(channels)
16250 .width(3)
16251 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016252 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016253 }
16254 }
16255
Marat Dukhan881ab022021-07-28 13:49:26 -070016256 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016257 TEST_REQUIRES_X86_AVX2;
16258 for (size_t channels = 1; channels <= 80; channels += 15) {
16259 DWConvMicrokernelTester()
16260 .cr(16)
16261 .kr(25)
16262 .channels(channels)
16263 .width(3)
16264 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016265 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016266 }
16267 }
16268
Marat Dukhan881ab022021-07-28 13:49:26 -070016269 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, input_offset) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016270 TEST_REQUIRES_X86_AVX2;
16271 for (uint32_t channels = 32; channels < 256; channels += 48) {
16272 DWConvMicrokernelTester()
16273 .cr(16)
16274 .kr(25)
16275 .channels(channels)
16276 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080016277 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016278 }
16279 }
16280
Marat Dukhan881ab022021-07-28 13:49:26 -070016281 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, zero) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016282 TEST_REQUIRES_X86_AVX2;
16283 for (uint32_t mz = 0; mz < 25; mz++) {
16284 for (uint32_t channels = 32; channels < 256; channels += 48) {
16285 DWConvMicrokernelTester()
16286 .cr(16)
16287 .kr(25)
16288 .channels(channels)
16289 .input_offset(304)
16290 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016291 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016292 }
16293 }
16294 }
16295#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16296
16297
16298#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan881ab022021-07-28 13:49:26 -070016299 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_eq_32) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016300 TEST_REQUIRES_X86_AVX2;
16301 DWConvMicrokernelTester()
16302 .cr(32)
16303 .kr(25)
16304 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080016305 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016306 }
16307
Marat Dukhan881ab022021-07-28 13:49:26 -070016308 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016309 TEST_REQUIRES_X86_AVX2;
16310 for (uint32_t channels = 64; channels < 512; channels += 96) {
16311 DWConvMicrokernelTester()
16312 .cr(32)
16313 .kr(25)
16314 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016315 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016316 }
16317 }
16318
Marat Dukhan881ab022021-07-28 13:49:26 -070016319 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016320 TEST_REQUIRES_X86_AVX2;
16321 for (uint32_t channels = 64; channels < 512; channels += 96) {
16322 DWConvMicrokernelTester()
16323 .cr(32)
16324 .kr(25)
16325 .channels(channels)
16326 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016327 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016328 }
16329 }
16330
Marat Dukhan881ab022021-07-28 13:49:26 -070016331 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016332 TEST_REQUIRES_X86_AVX2;
16333 for (uint32_t channels = 64; channels < 512; channels += 96) {
16334 DWConvMicrokernelTester()
16335 .cr(32)
16336 .kr(25)
16337 .channels(channels)
16338 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016339 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016340 }
16341 }
16342
Marat Dukhan881ab022021-07-28 13:49:26 -070016343 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_lt_32) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016344 TEST_REQUIRES_X86_AVX2;
16345 for (uint32_t channels = 1; channels < 32; channels++) {
16346 DWConvMicrokernelTester()
16347 .cr(32)
16348 .kr(25)
16349 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016350 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016351 }
16352 }
16353
Marat Dukhan881ab022021-07-28 13:49:26 -070016354 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016355 TEST_REQUIRES_X86_AVX2;
16356 for (uint32_t channels = 33; channels < 64; channels++) {
16357 DWConvMicrokernelTester()
16358 .cr(32)
16359 .kr(25)
16360 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016361 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016362 }
16363 }
16364
Marat Dukhan881ab022021-07-28 13:49:26 -070016365 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016366 TEST_REQUIRES_X86_AVX2;
16367 for (uint32_t channels = 33; channels < 64; channels++) {
16368 DWConvMicrokernelTester()
16369 .cr(32)
16370 .kr(25)
16371 .channels(channels)
16372 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016373 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016374 }
16375 }
16376
Marat Dukhan881ab022021-07-28 13:49:26 -070016377 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016378 TEST_REQUIRES_X86_AVX2;
16379 for (uint32_t channels = 33; channels < 64; channels++) {
16380 DWConvMicrokernelTester()
16381 .cr(32)
16382 .kr(25)
16383 .channels(channels)
16384 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016385 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016386 }
16387 }
16388
Marat Dukhan881ab022021-07-28 13:49:26 -070016389 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016390 TEST_REQUIRES_X86_AVX2;
16391 for (size_t channels = 1; channels <= 160; channels += 31) {
16392 DWConvMicrokernelTester()
16393 .cr(32)
16394 .kr(25)
16395 .channels(channels)
16396 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016397 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016398 }
16399 }
16400
Marat Dukhan881ab022021-07-28 13:49:26 -070016401 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016402 TEST_REQUIRES_X86_AVX2;
16403 for (size_t channels = 1; channels <= 160; channels += 31) {
16404 for (size_t step = 2; step <= 25; step++) {
16405 DWConvMicrokernelTester()
16406 .cr(32)
16407 .kr(25)
16408 .channels(channels)
16409 .width(3)
16410 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016411 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016412 }
16413 }
16414 }
16415
Marat Dukhan881ab022021-07-28 13:49:26 -070016416 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016417 TEST_REQUIRES_X86_AVX2;
16418 for (size_t channels = 1; channels <= 160; channels += 31) {
16419 DWConvMicrokernelTester()
16420 .cr(32)
16421 .kr(25)
16422 .channels(32)
16423 .width(5)
16424 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080016425 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016426 }
16427 }
16428
Marat Dukhan881ab022021-07-28 13:49:26 -070016429 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016430 TEST_REQUIRES_X86_AVX2;
16431 for (size_t channels = 1; channels <= 160; channels += 31) {
16432 DWConvMicrokernelTester()
16433 .cr(32)
16434 .kr(25)
16435 .channels(channels)
16436 .width(3)
16437 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016438 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016439 }
16440 }
16441
Marat Dukhan881ab022021-07-28 13:49:26 -070016442 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016443 TEST_REQUIRES_X86_AVX2;
16444 for (size_t channels = 1; channels <= 160; channels += 31) {
16445 DWConvMicrokernelTester()
16446 .cr(32)
16447 .kr(25)
16448 .channels(channels)
16449 .width(3)
16450 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016451 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016452 }
16453 }
16454
Marat Dukhan881ab022021-07-28 13:49:26 -070016455 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, input_offset) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016456 TEST_REQUIRES_X86_AVX2;
16457 for (uint32_t channels = 64; channels < 512; channels += 96) {
16458 DWConvMicrokernelTester()
16459 .cr(32)
16460 .kr(25)
16461 .channels(channels)
16462 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080016463 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016464 }
16465 }
16466
Marat Dukhan881ab022021-07-28 13:49:26 -070016467 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, zero) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016468 TEST_REQUIRES_X86_AVX2;
16469 for (uint32_t mz = 0; mz < 25; mz++) {
16470 for (uint32_t channels = 64; channels < 512; channels += 96) {
16471 DWConvMicrokernelTester()
16472 .cr(32)
16473 .kr(25)
16474 .channels(channels)
16475 .input_offset(592)
16476 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016477 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016478 }
16479 }
16480 }
16481#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16482
16483
16484#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16485 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_eq_16) {
16486 TEST_REQUIRES_X86_AVX2;
16487 DWConvMicrokernelTester()
16488 .cr(16)
16489 .kr(25)
16490 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080016491 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016492 }
16493
16494 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16) {
16495 TEST_REQUIRES_X86_AVX2;
16496 for (uint32_t channels = 32; channels < 256; channels += 48) {
16497 DWConvMicrokernelTester()
16498 .cr(16)
16499 .kr(25)
16500 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016501 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016502 }
16503 }
16504
16505 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
16506 TEST_REQUIRES_X86_AVX2;
16507 for (uint32_t channels = 32; channels < 256; channels += 48) {
16508 DWConvMicrokernelTester()
16509 .cr(16)
16510 .kr(25)
16511 .channels(channels)
16512 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016513 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016514 }
16515 }
16516
16517 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
16518 TEST_REQUIRES_X86_AVX2;
16519 for (uint32_t channels = 32; channels < 256; channels += 48) {
16520 DWConvMicrokernelTester()
16521 .cr(16)
16522 .kr(25)
16523 .channels(channels)
16524 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016525 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016526 }
16527 }
16528
16529 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_lt_16) {
16530 TEST_REQUIRES_X86_AVX2;
16531 for (uint32_t channels = 1; channels < 16; channels++) {
16532 DWConvMicrokernelTester()
16533 .cr(16)
16534 .kr(25)
16535 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016536 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016537 }
16538 }
16539
16540 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16) {
16541 TEST_REQUIRES_X86_AVX2;
16542 for (uint32_t channels = 17; channels < 32; channels++) {
16543 DWConvMicrokernelTester()
16544 .cr(16)
16545 .kr(25)
16546 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016547 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016548 }
16549 }
16550
16551 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
16552 TEST_REQUIRES_X86_AVX2;
16553 for (uint32_t channels = 17; channels < 32; channels++) {
16554 DWConvMicrokernelTester()
16555 .cr(16)
16556 .kr(25)
16557 .channels(channels)
16558 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016559 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016560 }
16561 }
16562
16563 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
16564 TEST_REQUIRES_X86_AVX2;
16565 for (uint32_t channels = 17; channels < 32; channels++) {
16566 DWConvMicrokernelTester()
16567 .cr(16)
16568 .kr(25)
16569 .channels(channels)
16570 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016571 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016572 }
16573 }
16574
16575 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel) {
16576 TEST_REQUIRES_X86_AVX2;
16577 for (size_t channels = 1; channels <= 80; channels += 15) {
16578 DWConvMicrokernelTester()
16579 .cr(16)
16580 .kr(25)
16581 .channels(channels)
16582 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016583 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016584 }
16585 }
16586
16587 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
16588 TEST_REQUIRES_X86_AVX2;
16589 for (size_t channels = 1; channels <= 80; channels += 15) {
16590 for (size_t step = 2; step <= 25; step++) {
16591 DWConvMicrokernelTester()
16592 .cr(16)
16593 .kr(25)
16594 .channels(channels)
16595 .width(3)
16596 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016597 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016598 }
16599 }
16600 }
16601
16602 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
16603 TEST_REQUIRES_X86_AVX2;
16604 for (size_t channels = 1; channels <= 80; channels += 15) {
16605 DWConvMicrokernelTester()
16606 .cr(16)
16607 .kr(25)
16608 .channels(16)
16609 .width(5)
16610 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016611 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016612 }
16613 }
16614
16615 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
16616 TEST_REQUIRES_X86_AVX2;
16617 for (size_t channels = 1; channels <= 80; channels += 15) {
16618 DWConvMicrokernelTester()
16619 .cr(16)
16620 .kr(25)
16621 .channels(channels)
16622 .width(3)
16623 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016624 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016625 }
16626 }
16627
16628 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
16629 TEST_REQUIRES_X86_AVX2;
16630 for (size_t channels = 1; channels <= 80; channels += 15) {
16631 DWConvMicrokernelTester()
16632 .cr(16)
16633 .kr(25)
16634 .channels(channels)
16635 .width(3)
16636 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016637 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016638 }
16639 }
16640
16641 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, input_offset) {
16642 TEST_REQUIRES_X86_AVX2;
16643 for (uint32_t channels = 32; channels < 256; channels += 48) {
16644 DWConvMicrokernelTester()
16645 .cr(16)
16646 .kr(25)
16647 .channels(channels)
16648 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080016649 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016650 }
16651 }
16652
16653 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, zero) {
16654 TEST_REQUIRES_X86_AVX2;
16655 for (uint32_t mz = 0; mz < 25; mz++) {
16656 for (uint32_t channels = 32; channels < 256; channels += 48) {
16657 DWConvMicrokernelTester()
16658 .cr(16)
16659 .kr(25)
16660 .channels(channels)
16661 .input_offset(304)
16662 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016663 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016664 }
16665 }
16666 }
16667#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16668
16669
16670#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16671 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_eq_32) {
16672 TEST_REQUIRES_X86_AVX2;
16673 DWConvMicrokernelTester()
16674 .cr(32)
16675 .kr(25)
16676 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080016677 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016678 }
16679
16680 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32) {
16681 TEST_REQUIRES_X86_AVX2;
16682 for (uint32_t channels = 64; channels < 512; channels += 96) {
16683 DWConvMicrokernelTester()
16684 .cr(32)
16685 .kr(25)
16686 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016687 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016688 }
16689 }
16690
16691 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
16692 TEST_REQUIRES_X86_AVX2;
16693 for (uint32_t channels = 64; channels < 512; channels += 96) {
16694 DWConvMicrokernelTester()
16695 .cr(32)
16696 .kr(25)
16697 .channels(channels)
16698 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016699 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016700 }
16701 }
16702
16703 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
16704 TEST_REQUIRES_X86_AVX2;
16705 for (uint32_t channels = 64; channels < 512; channels += 96) {
16706 DWConvMicrokernelTester()
16707 .cr(32)
16708 .kr(25)
16709 .channels(channels)
16710 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016711 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016712 }
16713 }
16714
16715 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_lt_32) {
16716 TEST_REQUIRES_X86_AVX2;
16717 for (uint32_t channels = 1; channels < 32; channels++) {
16718 DWConvMicrokernelTester()
16719 .cr(32)
16720 .kr(25)
16721 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016722 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016723 }
16724 }
16725
16726 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32) {
16727 TEST_REQUIRES_X86_AVX2;
16728 for (uint32_t channels = 33; channels < 64; channels++) {
16729 DWConvMicrokernelTester()
16730 .cr(32)
16731 .kr(25)
16732 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016733 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016734 }
16735 }
16736
16737 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
16738 TEST_REQUIRES_X86_AVX2;
16739 for (uint32_t channels = 33; channels < 64; channels++) {
16740 DWConvMicrokernelTester()
16741 .cr(32)
16742 .kr(25)
16743 .channels(channels)
16744 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016745 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016746 }
16747 }
16748
16749 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
16750 TEST_REQUIRES_X86_AVX2;
16751 for (uint32_t channels = 33; channels < 64; channels++) {
16752 DWConvMicrokernelTester()
16753 .cr(32)
16754 .kr(25)
16755 .channels(channels)
16756 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016757 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016758 }
16759 }
16760
16761 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel) {
16762 TEST_REQUIRES_X86_AVX2;
16763 for (size_t channels = 1; channels <= 160; channels += 31) {
16764 DWConvMicrokernelTester()
16765 .cr(32)
16766 .kr(25)
16767 .channels(channels)
16768 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016769 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016770 }
16771 }
16772
16773 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
16774 TEST_REQUIRES_X86_AVX2;
16775 for (size_t channels = 1; channels <= 160; channels += 31) {
16776 for (size_t step = 2; step <= 25; step++) {
16777 DWConvMicrokernelTester()
16778 .cr(32)
16779 .kr(25)
16780 .channels(channels)
16781 .width(3)
16782 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016783 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016784 }
16785 }
16786 }
16787
16788 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
16789 TEST_REQUIRES_X86_AVX2;
16790 for (size_t channels = 1; channels <= 160; channels += 31) {
16791 DWConvMicrokernelTester()
16792 .cr(32)
16793 .kr(25)
16794 .channels(32)
16795 .width(5)
16796 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080016797 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016798 }
16799 }
16800
16801 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
16802 TEST_REQUIRES_X86_AVX2;
16803 for (size_t channels = 1; channels <= 160; channels += 31) {
16804 DWConvMicrokernelTester()
16805 .cr(32)
16806 .kr(25)
16807 .channels(channels)
16808 .width(3)
16809 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016810 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016811 }
16812 }
16813
16814 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
16815 TEST_REQUIRES_X86_AVX2;
16816 for (size_t channels = 1; channels <= 160; channels += 31) {
16817 DWConvMicrokernelTester()
16818 .cr(32)
16819 .kr(25)
16820 .channels(channels)
16821 .width(3)
16822 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016823 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016824 }
16825 }
16826
16827 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, input_offset) {
16828 TEST_REQUIRES_X86_AVX2;
16829 for (uint32_t channels = 64; channels < 512; channels += 96) {
16830 DWConvMicrokernelTester()
16831 .cr(32)
16832 .kr(25)
16833 .channels(channels)
16834 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080016835 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070016836 }
16837 }
16838
16839 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, zero) {
16840 TEST_REQUIRES_X86_AVX2;
16841 for (uint32_t mz = 0; mz < 25; mz++) {
16842 for (uint32_t channels = 64; channels < 512; channels += 96) {
16843 DWConvMicrokernelTester()
16844 .cr(32)
16845 .kr(25)
16846 .channels(channels)
16847 .input_offset(592)
16848 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016849 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070016850 }
16851 }
16852 }
16853#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16854
16855
16856#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016857 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
16858 TEST_REQUIRES_X86_AVX2;
16859 DWConvMicrokernelTester()
16860 .cr(16)
16861 .kr(25)
16862 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080016863 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016864 }
16865
16866 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
16867 TEST_REQUIRES_X86_AVX2;
16868 for (uint32_t channels = 32; channels < 256; channels += 48) {
16869 DWConvMicrokernelTester()
16870 .cr(16)
16871 .kr(25)
16872 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016873 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016874 }
16875 }
16876
16877 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
16878 TEST_REQUIRES_X86_AVX2;
16879 for (uint32_t channels = 32; channels < 256; channels += 48) {
16880 DWConvMicrokernelTester()
16881 .cr(16)
16882 .kr(25)
16883 .channels(channels)
16884 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016885 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016886 }
16887 }
16888
16889 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
16890 TEST_REQUIRES_X86_AVX2;
16891 for (uint32_t channels = 32; channels < 256; channels += 48) {
16892 DWConvMicrokernelTester()
16893 .cr(16)
16894 .kr(25)
16895 .channels(channels)
16896 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016897 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016898 }
16899 }
16900
16901 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
16902 TEST_REQUIRES_X86_AVX2;
16903 for (uint32_t channels = 1; channels < 16; channels++) {
16904 DWConvMicrokernelTester()
16905 .cr(16)
16906 .kr(25)
16907 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016908 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016909 }
16910 }
16911
16912 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
16913 TEST_REQUIRES_X86_AVX2;
16914 for (uint32_t channels = 17; channels < 32; channels++) {
16915 DWConvMicrokernelTester()
16916 .cr(16)
16917 .kr(25)
16918 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016919 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016920 }
16921 }
16922
16923 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
16924 TEST_REQUIRES_X86_AVX2;
16925 for (uint32_t channels = 17; channels < 32; channels++) {
16926 DWConvMicrokernelTester()
16927 .cr(16)
16928 .kr(25)
16929 .channels(channels)
16930 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016931 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016932 }
16933 }
16934
16935 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
16936 TEST_REQUIRES_X86_AVX2;
16937 for (uint32_t channels = 17; channels < 32; channels++) {
16938 DWConvMicrokernelTester()
16939 .cr(16)
16940 .kr(25)
16941 .channels(channels)
16942 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016943 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016944 }
16945 }
16946
16947 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
16948 TEST_REQUIRES_X86_AVX2;
16949 for (size_t channels = 1; channels <= 80; channels += 15) {
16950 DWConvMicrokernelTester()
16951 .cr(16)
16952 .kr(25)
16953 .channels(channels)
16954 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016955 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016956 }
16957 }
16958
16959 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
16960 TEST_REQUIRES_X86_AVX2;
16961 for (size_t channels = 1; channels <= 80; channels += 15) {
16962 for (size_t step = 2; step <= 25; step++) {
16963 DWConvMicrokernelTester()
16964 .cr(16)
16965 .kr(25)
16966 .channels(channels)
16967 .width(3)
16968 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016969 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016970 }
16971 }
16972 }
16973
16974 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
16975 TEST_REQUIRES_X86_AVX2;
16976 for (size_t channels = 1; channels <= 80; channels += 15) {
16977 DWConvMicrokernelTester()
16978 .cr(16)
16979 .kr(25)
16980 .channels(16)
16981 .width(5)
16982 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016983 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016984 }
16985 }
16986
16987 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
16988 TEST_REQUIRES_X86_AVX2;
16989 for (size_t channels = 1; channels <= 80; channels += 15) {
16990 DWConvMicrokernelTester()
16991 .cr(16)
16992 .kr(25)
16993 .channels(channels)
16994 .width(3)
16995 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016996 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070016997 }
16998 }
16999
17000 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
17001 TEST_REQUIRES_X86_AVX2;
17002 for (size_t channels = 1; channels <= 80; channels += 15) {
17003 DWConvMicrokernelTester()
17004 .cr(16)
17005 .kr(25)
17006 .channels(channels)
17007 .width(3)
17008 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017009 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017010 }
17011 }
17012
17013 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
17014 TEST_REQUIRES_X86_AVX2;
17015 for (uint32_t channels = 32; channels < 256; channels += 48) {
17016 DWConvMicrokernelTester()
17017 .cr(16)
17018 .kr(25)
17019 .channels(channels)
17020 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080017021 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017022 }
17023 }
17024
17025 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
17026 TEST_REQUIRES_X86_AVX2;
17027 for (uint32_t mz = 0; mz < 25; mz++) {
17028 for (uint32_t channels = 32; channels < 256; channels += 48) {
17029 DWConvMicrokernelTester()
17030 .cr(16)
17031 .kr(25)
17032 .channels(channels)
17033 .input_offset(304)
17034 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017035 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017036 }
17037 }
17038 }
17039#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17040
17041
17042#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17043 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
17044 TEST_REQUIRES_X86_AVX2;
17045 DWConvMicrokernelTester()
17046 .cr(32)
17047 .kr(25)
17048 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080017049 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017050 }
17051
17052 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
17053 TEST_REQUIRES_X86_AVX2;
17054 for (uint32_t channels = 64; channels < 512; channels += 96) {
17055 DWConvMicrokernelTester()
17056 .cr(32)
17057 .kr(25)
17058 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017059 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017060 }
17061 }
17062
17063 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
17064 TEST_REQUIRES_X86_AVX2;
17065 for (uint32_t channels = 64; channels < 512; channels += 96) {
17066 DWConvMicrokernelTester()
17067 .cr(32)
17068 .kr(25)
17069 .channels(channels)
17070 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017071 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017072 }
17073 }
17074
17075 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
17076 TEST_REQUIRES_X86_AVX2;
17077 for (uint32_t channels = 64; channels < 512; channels += 96) {
17078 DWConvMicrokernelTester()
17079 .cr(32)
17080 .kr(25)
17081 .channels(channels)
17082 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017083 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017084 }
17085 }
17086
17087 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
17088 TEST_REQUIRES_X86_AVX2;
17089 for (uint32_t channels = 1; channels < 32; channels++) {
17090 DWConvMicrokernelTester()
17091 .cr(32)
17092 .kr(25)
17093 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017094 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017095 }
17096 }
17097
17098 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
17099 TEST_REQUIRES_X86_AVX2;
17100 for (uint32_t channels = 33; channels < 64; channels++) {
17101 DWConvMicrokernelTester()
17102 .cr(32)
17103 .kr(25)
17104 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017105 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017106 }
17107 }
17108
17109 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
17110 TEST_REQUIRES_X86_AVX2;
17111 for (uint32_t channels = 33; channels < 64; channels++) {
17112 DWConvMicrokernelTester()
17113 .cr(32)
17114 .kr(25)
17115 .channels(channels)
17116 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017117 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017118 }
17119 }
17120
17121 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
17122 TEST_REQUIRES_X86_AVX2;
17123 for (uint32_t channels = 33; channels < 64; channels++) {
17124 DWConvMicrokernelTester()
17125 .cr(32)
17126 .kr(25)
17127 .channels(channels)
17128 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017129 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017130 }
17131 }
17132
17133 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
17134 TEST_REQUIRES_X86_AVX2;
17135 for (size_t channels = 1; channels <= 160; channels += 31) {
17136 DWConvMicrokernelTester()
17137 .cr(32)
17138 .kr(25)
17139 .channels(channels)
17140 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017141 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017142 }
17143 }
17144
17145 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
17146 TEST_REQUIRES_X86_AVX2;
17147 for (size_t channels = 1; channels <= 160; channels += 31) {
17148 for (size_t step = 2; step <= 25; step++) {
17149 DWConvMicrokernelTester()
17150 .cr(32)
17151 .kr(25)
17152 .channels(channels)
17153 .width(3)
17154 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017155 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017156 }
17157 }
17158 }
17159
17160 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
17161 TEST_REQUIRES_X86_AVX2;
17162 for (size_t channels = 1; channels <= 160; channels += 31) {
17163 DWConvMicrokernelTester()
17164 .cr(32)
17165 .kr(25)
17166 .channels(32)
17167 .width(5)
17168 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080017169 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017170 }
17171 }
17172
17173 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
17174 TEST_REQUIRES_X86_AVX2;
17175 for (size_t channels = 1; channels <= 160; channels += 31) {
17176 DWConvMicrokernelTester()
17177 .cr(32)
17178 .kr(25)
17179 .channels(channels)
17180 .width(3)
17181 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017182 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017183 }
17184 }
17185
17186 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
17187 TEST_REQUIRES_X86_AVX2;
17188 for (size_t channels = 1; channels <= 160; channels += 31) {
17189 DWConvMicrokernelTester()
17190 .cr(32)
17191 .kr(25)
17192 .channels(channels)
17193 .width(3)
17194 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017195 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017196 }
17197 }
17198
17199 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
17200 TEST_REQUIRES_X86_AVX2;
17201 for (uint32_t channels = 64; channels < 512; channels += 96) {
17202 DWConvMicrokernelTester()
17203 .cr(32)
17204 .kr(25)
17205 .channels(channels)
17206 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080017207 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017208 }
17209 }
17210
17211 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
17212 TEST_REQUIRES_X86_AVX2;
17213 for (uint32_t mz = 0; mz < 25; mz++) {
17214 for (uint32_t channels = 64; channels < 512; channels += 96) {
17215 DWConvMicrokernelTester()
17216 .cr(32)
17217 .kr(25)
17218 .channels(channels)
17219 .input_offset(592)
17220 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017221 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070017222 }
17223 }
17224 }
17225#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17226
17227
17228#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancaf48312021-06-01 20:20:58 -070017229 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_eq_8) {
17230 TEST_REQUIRES_X86_SSE41;
17231 DWConvMicrokernelTester()
17232 .cr(8)
17233 .kr(25)
17234 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017235 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017236 }
17237
17238 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8) {
17239 TEST_REQUIRES_X86_SSE41;
17240 for (uint32_t channels = 16; channels < 128; channels += 24) {
17241 DWConvMicrokernelTester()
17242 .cr(8)
17243 .kr(25)
17244 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017245 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017246 }
17247 }
17248
17249 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmin) {
17250 TEST_REQUIRES_X86_SSE41;
17251 for (uint32_t channels = 16; channels < 128; channels += 24) {
17252 DWConvMicrokernelTester()
17253 .cr(8)
17254 .kr(25)
17255 .channels(channels)
17256 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017257 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017258 }
17259 }
17260
17261 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmax) {
17262 TEST_REQUIRES_X86_SSE41;
17263 for (uint32_t channels = 16; channels < 128; channels += 24) {
17264 DWConvMicrokernelTester()
17265 .cr(8)
17266 .kr(25)
17267 .channels(channels)
17268 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017269 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017270 }
17271 }
17272
17273 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_lt_8) {
17274 TEST_REQUIRES_X86_SSE41;
17275 for (uint32_t channels = 1; channels < 8; channels++) {
17276 DWConvMicrokernelTester()
17277 .cr(8)
17278 .kr(25)
17279 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017280 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017281 }
17282 }
17283
17284 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8) {
17285 TEST_REQUIRES_X86_SSE41;
17286 for (uint32_t channels = 9; channels < 16; channels++) {
17287 DWConvMicrokernelTester()
17288 .cr(8)
17289 .kr(25)
17290 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017291 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017292 }
17293 }
17294
17295 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmin) {
17296 TEST_REQUIRES_X86_SSE41;
17297 for (uint32_t channels = 9; channels < 16; channels++) {
17298 DWConvMicrokernelTester()
17299 .cr(8)
17300 .kr(25)
17301 .channels(channels)
17302 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017303 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017304 }
17305 }
17306
17307 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmax) {
17308 TEST_REQUIRES_X86_SSE41;
17309 for (uint32_t channels = 9; channels < 16; channels++) {
17310 DWConvMicrokernelTester()
17311 .cr(8)
17312 .kr(25)
17313 .channels(channels)
17314 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017315 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017316 }
17317 }
17318
17319 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel) {
17320 TEST_REQUIRES_X86_SSE41;
17321 for (size_t channels = 1; channels <= 40; channels += 7) {
17322 DWConvMicrokernelTester()
17323 .cr(8)
17324 .kr(25)
17325 .channels(channels)
17326 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017327 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017328 }
17329 }
17330
17331 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_step) {
17332 TEST_REQUIRES_X86_SSE41;
17333 for (size_t channels = 1; channels <= 40; channels += 7) {
17334 for (size_t step = 2; step <= 25; step++) {
17335 DWConvMicrokernelTester()
17336 .cr(8)
17337 .kr(25)
17338 .channels(channels)
17339 .width(3)
17340 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017341 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017342 }
17343 }
17344 }
17345
17346 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_output_stride) {
17347 TEST_REQUIRES_X86_SSE41;
17348 for (size_t channels = 1; channels <= 40; channels += 7) {
17349 DWConvMicrokernelTester()
17350 .cr(8)
17351 .kr(25)
17352 .channels(8)
17353 .width(5)
17354 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017355 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017356 }
17357 }
17358
17359 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmin) {
17360 TEST_REQUIRES_X86_SSE41;
17361 for (size_t channels = 1; channels <= 40; channels += 7) {
17362 DWConvMicrokernelTester()
17363 .cr(8)
17364 .kr(25)
17365 .channels(channels)
17366 .width(3)
17367 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017368 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017369 }
17370 }
17371
17372 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmax) {
17373 TEST_REQUIRES_X86_SSE41;
17374 for (size_t channels = 1; channels <= 40; channels += 7) {
17375 DWConvMicrokernelTester()
17376 .cr(8)
17377 .kr(25)
17378 .channels(channels)
17379 .width(3)
17380 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017381 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017382 }
17383 }
17384
17385 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, input_offset) {
17386 TEST_REQUIRES_X86_SSE41;
17387 for (uint32_t channels = 16; channels < 128; channels += 24) {
17388 DWConvMicrokernelTester()
17389 .cr(8)
17390 .kr(25)
17391 .channels(channels)
17392 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080017393 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017394 }
17395 }
17396
17397 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, zero) {
17398 TEST_REQUIRES_X86_SSE41;
17399 for (uint32_t mz = 0; mz < 25; mz++) {
17400 for (uint32_t channels = 16; channels < 128; channels += 24) {
17401 DWConvMicrokernelTester()
17402 .cr(8)
17403 .kr(25)
17404 .channels(channels)
17405 .input_offset(176)
17406 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017407 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017408 }
17409 }
17410 }
17411#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17412
17413
17414#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17415 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_eq_16) {
17416 TEST_REQUIRES_X86_SSE41;
17417 DWConvMicrokernelTester()
17418 .cr(16)
17419 .kr(25)
17420 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080017421 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017422 }
17423
17424 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16) {
17425 TEST_REQUIRES_X86_SSE41;
17426 for (uint32_t channels = 32; channels < 256; channels += 48) {
17427 DWConvMicrokernelTester()
17428 .cr(16)
17429 .kr(25)
17430 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017431 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017432 }
17433 }
17434
17435 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmin) {
17436 TEST_REQUIRES_X86_SSE41;
17437 for (uint32_t channels = 32; channels < 256; channels += 48) {
17438 DWConvMicrokernelTester()
17439 .cr(16)
17440 .kr(25)
17441 .channels(channels)
17442 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017443 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017444 }
17445 }
17446
17447 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmax) {
17448 TEST_REQUIRES_X86_SSE41;
17449 for (uint32_t channels = 32; channels < 256; channels += 48) {
17450 DWConvMicrokernelTester()
17451 .cr(16)
17452 .kr(25)
17453 .channels(channels)
17454 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017455 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017456 }
17457 }
17458
17459 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_lt_16) {
17460 TEST_REQUIRES_X86_SSE41;
17461 for (uint32_t channels = 1; channels < 16; channels++) {
17462 DWConvMicrokernelTester()
17463 .cr(16)
17464 .kr(25)
17465 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017466 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017467 }
17468 }
17469
17470 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16) {
17471 TEST_REQUIRES_X86_SSE41;
17472 for (uint32_t channels = 17; channels < 32; channels++) {
17473 DWConvMicrokernelTester()
17474 .cr(16)
17475 .kr(25)
17476 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017477 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017478 }
17479 }
17480
17481 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmin) {
17482 TEST_REQUIRES_X86_SSE41;
17483 for (uint32_t channels = 17; channels < 32; channels++) {
17484 DWConvMicrokernelTester()
17485 .cr(16)
17486 .kr(25)
17487 .channels(channels)
17488 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017489 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017490 }
17491 }
17492
17493 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmax) {
17494 TEST_REQUIRES_X86_SSE41;
17495 for (uint32_t channels = 17; channels < 32; channels++) {
17496 DWConvMicrokernelTester()
17497 .cr(16)
17498 .kr(25)
17499 .channels(channels)
17500 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017501 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017502 }
17503 }
17504
17505 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel) {
17506 TEST_REQUIRES_X86_SSE41;
17507 for (size_t channels = 1; channels <= 80; channels += 15) {
17508 DWConvMicrokernelTester()
17509 .cr(16)
17510 .kr(25)
17511 .channels(channels)
17512 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017513 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017514 }
17515 }
17516
17517 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_step) {
17518 TEST_REQUIRES_X86_SSE41;
17519 for (size_t channels = 1; channels <= 80; channels += 15) {
17520 for (size_t step = 2; step <= 25; step++) {
17521 DWConvMicrokernelTester()
17522 .cr(16)
17523 .kr(25)
17524 .channels(channels)
17525 .width(3)
17526 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017527 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017528 }
17529 }
17530 }
17531
17532 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_output_stride) {
17533 TEST_REQUIRES_X86_SSE41;
17534 for (size_t channels = 1; channels <= 80; channels += 15) {
17535 DWConvMicrokernelTester()
17536 .cr(16)
17537 .kr(25)
17538 .channels(16)
17539 .width(5)
17540 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017541 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017542 }
17543 }
17544
17545 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmin) {
17546 TEST_REQUIRES_X86_SSE41;
17547 for (size_t channels = 1; channels <= 80; channels += 15) {
17548 DWConvMicrokernelTester()
17549 .cr(16)
17550 .kr(25)
17551 .channels(channels)
17552 .width(3)
17553 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017554 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017555 }
17556 }
17557
17558 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmax) {
17559 TEST_REQUIRES_X86_SSE41;
17560 for (size_t channels = 1; channels <= 80; channels += 15) {
17561 DWConvMicrokernelTester()
17562 .cr(16)
17563 .kr(25)
17564 .channels(channels)
17565 .width(3)
17566 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017567 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017568 }
17569 }
17570
17571 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, input_offset) {
17572 TEST_REQUIRES_X86_SSE41;
17573 for (uint32_t channels = 32; channels < 256; channels += 48) {
17574 DWConvMicrokernelTester()
17575 .cr(16)
17576 .kr(25)
17577 .channels(channels)
17578 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080017579 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017580 }
17581 }
17582
17583 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, zero) {
17584 TEST_REQUIRES_X86_SSE41;
17585 for (uint32_t mz = 0; mz < 25; mz++) {
17586 for (uint32_t channels = 32; channels < 256; channels += 48) {
17587 DWConvMicrokernelTester()
17588 .cr(16)
17589 .kr(25)
17590 .channels(channels)
17591 .input_offset(304)
17592 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017593 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017594 }
17595 }
17596 }
17597#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17598
17599
17600#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17601 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_eq_24) {
17602 TEST_REQUIRES_X86_SSE41;
17603 DWConvMicrokernelTester()
17604 .cr(24)
17605 .kr(25)
17606 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080017607 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017608 }
17609
17610 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24) {
17611 TEST_REQUIRES_X86_SSE41;
17612 for (uint32_t channels = 48; channels < 384; channels += 72) {
17613 DWConvMicrokernelTester()
17614 .cr(24)
17615 .kr(25)
17616 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017617 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017618 }
17619 }
17620
17621 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmin) {
17622 TEST_REQUIRES_X86_SSE41;
17623 for (uint32_t channels = 48; channels < 384; channels += 72) {
17624 DWConvMicrokernelTester()
17625 .cr(24)
17626 .kr(25)
17627 .channels(channels)
17628 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017629 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017630 }
17631 }
17632
17633 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmax) {
17634 TEST_REQUIRES_X86_SSE41;
17635 for (uint32_t channels = 48; channels < 384; channels += 72) {
17636 DWConvMicrokernelTester()
17637 .cr(24)
17638 .kr(25)
17639 .channels(channels)
17640 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017641 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017642 }
17643 }
17644
17645 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_lt_24) {
17646 TEST_REQUIRES_X86_SSE41;
17647 for (uint32_t channels = 1; channels < 24; channels++) {
17648 DWConvMicrokernelTester()
17649 .cr(24)
17650 .kr(25)
17651 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017652 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017653 }
17654 }
17655
17656 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24) {
17657 TEST_REQUIRES_X86_SSE41;
17658 for (uint32_t channels = 25; channels < 48; channels++) {
17659 DWConvMicrokernelTester()
17660 .cr(24)
17661 .kr(25)
17662 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017663 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017664 }
17665 }
17666
17667 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmin) {
17668 TEST_REQUIRES_X86_SSE41;
17669 for (uint32_t channels = 25; channels < 48; channels++) {
17670 DWConvMicrokernelTester()
17671 .cr(24)
17672 .kr(25)
17673 .channels(channels)
17674 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017675 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017676 }
17677 }
17678
17679 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmax) {
17680 TEST_REQUIRES_X86_SSE41;
17681 for (uint32_t channels = 25; channels < 48; channels++) {
17682 DWConvMicrokernelTester()
17683 .cr(24)
17684 .kr(25)
17685 .channels(channels)
17686 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017687 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017688 }
17689 }
17690
17691 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel) {
17692 TEST_REQUIRES_X86_SSE41;
17693 for (size_t channels = 1; channels <= 120; channels += 23) {
17694 DWConvMicrokernelTester()
17695 .cr(24)
17696 .kr(25)
17697 .channels(channels)
17698 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017699 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017700 }
17701 }
17702
17703 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_step) {
17704 TEST_REQUIRES_X86_SSE41;
17705 for (size_t channels = 1; channels <= 120; channels += 23) {
17706 for (size_t step = 2; step <= 25; step++) {
17707 DWConvMicrokernelTester()
17708 .cr(24)
17709 .kr(25)
17710 .channels(channels)
17711 .width(3)
17712 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017713 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017714 }
17715 }
17716 }
17717
17718 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_output_stride) {
17719 TEST_REQUIRES_X86_SSE41;
17720 for (size_t channels = 1; channels <= 120; channels += 23) {
17721 DWConvMicrokernelTester()
17722 .cr(24)
17723 .kr(25)
17724 .channels(24)
17725 .width(5)
17726 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080017727 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017728 }
17729 }
17730
17731 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmin) {
17732 TEST_REQUIRES_X86_SSE41;
17733 for (size_t channels = 1; channels <= 120; channels += 23) {
17734 DWConvMicrokernelTester()
17735 .cr(24)
17736 .kr(25)
17737 .channels(channels)
17738 .width(3)
17739 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017740 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017741 }
17742 }
17743
17744 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmax) {
17745 TEST_REQUIRES_X86_SSE41;
17746 for (size_t channels = 1; channels <= 120; channels += 23) {
17747 DWConvMicrokernelTester()
17748 .cr(24)
17749 .kr(25)
17750 .channels(channels)
17751 .width(3)
17752 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017753 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017754 }
17755 }
17756
17757 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, input_offset) {
17758 TEST_REQUIRES_X86_SSE41;
17759 for (uint32_t channels = 48; channels < 384; channels += 72) {
17760 DWConvMicrokernelTester()
17761 .cr(24)
17762 .kr(25)
17763 .channels(channels)
17764 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080017765 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017766 }
17767 }
17768
17769 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, zero) {
17770 TEST_REQUIRES_X86_SSE41;
17771 for (uint32_t mz = 0; mz < 25; mz++) {
17772 for (uint32_t channels = 48; channels < 384; channels += 72) {
17773 DWConvMicrokernelTester()
17774 .cr(24)
17775 .kr(25)
17776 .channels(channels)
17777 .input_offset(464)
17778 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017779 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017780 }
17781 }
17782 }
17783#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17784
17785
17786#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17787 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_eq_8) {
17788 TEST_REQUIRES_X86_AVX;
17789 DWConvMicrokernelTester()
17790 .cr(8)
17791 .kr(25)
17792 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017793 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017794 }
17795
17796 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8) {
17797 TEST_REQUIRES_X86_AVX;
17798 for (uint32_t channels = 16; channels < 128; channels += 24) {
17799 DWConvMicrokernelTester()
17800 .cr(8)
17801 .kr(25)
17802 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017803 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017804 }
17805 }
17806
17807 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmin) {
17808 TEST_REQUIRES_X86_AVX;
17809 for (uint32_t channels = 16; channels < 128; channels += 24) {
17810 DWConvMicrokernelTester()
17811 .cr(8)
17812 .kr(25)
17813 .channels(channels)
17814 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017815 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017816 }
17817 }
17818
17819 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmax) {
17820 TEST_REQUIRES_X86_AVX;
17821 for (uint32_t channels = 16; channels < 128; channels += 24) {
17822 DWConvMicrokernelTester()
17823 .cr(8)
17824 .kr(25)
17825 .channels(channels)
17826 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017827 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017828 }
17829 }
17830
17831 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_lt_8) {
17832 TEST_REQUIRES_X86_AVX;
17833 for (uint32_t channels = 1; channels < 8; channels++) {
17834 DWConvMicrokernelTester()
17835 .cr(8)
17836 .kr(25)
17837 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017838 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017839 }
17840 }
17841
17842 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8) {
17843 TEST_REQUIRES_X86_AVX;
17844 for (uint32_t channels = 9; channels < 16; channels++) {
17845 DWConvMicrokernelTester()
17846 .cr(8)
17847 .kr(25)
17848 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017849 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017850 }
17851 }
17852
17853 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmin) {
17854 TEST_REQUIRES_X86_AVX;
17855 for (uint32_t channels = 9; channels < 16; channels++) {
17856 DWConvMicrokernelTester()
17857 .cr(8)
17858 .kr(25)
17859 .channels(channels)
17860 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017861 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017862 }
17863 }
17864
17865 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmax) {
17866 TEST_REQUIRES_X86_AVX;
17867 for (uint32_t channels = 9; channels < 16; channels++) {
17868 DWConvMicrokernelTester()
17869 .cr(8)
17870 .kr(25)
17871 .channels(channels)
17872 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017873 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017874 }
17875 }
17876
17877 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel) {
17878 TEST_REQUIRES_X86_AVX;
17879 for (size_t channels = 1; channels <= 40; channels += 7) {
17880 DWConvMicrokernelTester()
17881 .cr(8)
17882 .kr(25)
17883 .channels(channels)
17884 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017885 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017886 }
17887 }
17888
17889 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_step) {
17890 TEST_REQUIRES_X86_AVX;
17891 for (size_t channels = 1; channels <= 40; channels += 7) {
17892 for (size_t step = 2; step <= 25; step++) {
17893 DWConvMicrokernelTester()
17894 .cr(8)
17895 .kr(25)
17896 .channels(channels)
17897 .width(3)
17898 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017899 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017900 }
17901 }
17902 }
17903
17904 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_output_stride) {
17905 TEST_REQUIRES_X86_AVX;
17906 for (size_t channels = 1; channels <= 40; channels += 7) {
17907 DWConvMicrokernelTester()
17908 .cr(8)
17909 .kr(25)
17910 .channels(8)
17911 .width(5)
17912 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017913 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017914 }
17915 }
17916
17917 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmin) {
17918 TEST_REQUIRES_X86_AVX;
17919 for (size_t channels = 1; channels <= 40; channels += 7) {
17920 DWConvMicrokernelTester()
17921 .cr(8)
17922 .kr(25)
17923 .channels(channels)
17924 .width(3)
17925 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017926 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017927 }
17928 }
17929
17930 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmax) {
17931 TEST_REQUIRES_X86_AVX;
17932 for (size_t channels = 1; channels <= 40; channels += 7) {
17933 DWConvMicrokernelTester()
17934 .cr(8)
17935 .kr(25)
17936 .channels(channels)
17937 .width(3)
17938 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017939 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017940 }
17941 }
17942
17943 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, input_offset) {
17944 TEST_REQUIRES_X86_AVX;
17945 for (uint32_t channels = 16; channels < 128; channels += 24) {
17946 DWConvMicrokernelTester()
17947 .cr(8)
17948 .kr(25)
17949 .channels(channels)
17950 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080017951 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017952 }
17953 }
17954
17955 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, zero) {
17956 TEST_REQUIRES_X86_AVX;
17957 for (uint32_t mz = 0; mz < 25; mz++) {
17958 for (uint32_t channels = 16; channels < 128; channels += 24) {
17959 DWConvMicrokernelTester()
17960 .cr(8)
17961 .kr(25)
17962 .channels(channels)
17963 .input_offset(176)
17964 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017965 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017966 }
17967 }
17968 }
17969#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17970
17971
17972#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17973 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_eq_16) {
17974 TEST_REQUIRES_X86_AVX;
17975 DWConvMicrokernelTester()
17976 .cr(16)
17977 .kr(25)
17978 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080017979 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017980 }
17981
17982 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16) {
17983 TEST_REQUIRES_X86_AVX;
17984 for (uint32_t channels = 32; channels < 256; channels += 48) {
17985 DWConvMicrokernelTester()
17986 .cr(16)
17987 .kr(25)
17988 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017989 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070017990 }
17991 }
17992
17993 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmin) {
17994 TEST_REQUIRES_X86_AVX;
17995 for (uint32_t channels = 32; channels < 256; channels += 48) {
17996 DWConvMicrokernelTester()
17997 .cr(16)
17998 .kr(25)
17999 .channels(channels)
18000 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018001 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018002 }
18003 }
18004
18005 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmax) {
18006 TEST_REQUIRES_X86_AVX;
18007 for (uint32_t channels = 32; channels < 256; channels += 48) {
18008 DWConvMicrokernelTester()
18009 .cr(16)
18010 .kr(25)
18011 .channels(channels)
18012 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018013 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018014 }
18015 }
18016
18017 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_lt_16) {
18018 TEST_REQUIRES_X86_AVX;
18019 for (uint32_t channels = 1; channels < 16; channels++) {
18020 DWConvMicrokernelTester()
18021 .cr(16)
18022 .kr(25)
18023 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018024 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018025 }
18026 }
18027
18028 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16) {
18029 TEST_REQUIRES_X86_AVX;
18030 for (uint32_t channels = 17; channels < 32; channels++) {
18031 DWConvMicrokernelTester()
18032 .cr(16)
18033 .kr(25)
18034 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018035 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018036 }
18037 }
18038
18039 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmin) {
18040 TEST_REQUIRES_X86_AVX;
18041 for (uint32_t channels = 17; channels < 32; channels++) {
18042 DWConvMicrokernelTester()
18043 .cr(16)
18044 .kr(25)
18045 .channels(channels)
18046 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018047 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018048 }
18049 }
18050
18051 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmax) {
18052 TEST_REQUIRES_X86_AVX;
18053 for (uint32_t channels = 17; channels < 32; channels++) {
18054 DWConvMicrokernelTester()
18055 .cr(16)
18056 .kr(25)
18057 .channels(channels)
18058 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018059 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018060 }
18061 }
18062
18063 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel) {
18064 TEST_REQUIRES_X86_AVX;
18065 for (size_t channels = 1; channels <= 80; channels += 15) {
18066 DWConvMicrokernelTester()
18067 .cr(16)
18068 .kr(25)
18069 .channels(channels)
18070 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018071 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018072 }
18073 }
18074
18075 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_step) {
18076 TEST_REQUIRES_X86_AVX;
18077 for (size_t channels = 1; channels <= 80; channels += 15) {
18078 for (size_t step = 2; step <= 25; step++) {
18079 DWConvMicrokernelTester()
18080 .cr(16)
18081 .kr(25)
18082 .channels(channels)
18083 .width(3)
18084 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018085 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018086 }
18087 }
18088 }
18089
18090 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_output_stride) {
18091 TEST_REQUIRES_X86_AVX;
18092 for (size_t channels = 1; channels <= 80; channels += 15) {
18093 DWConvMicrokernelTester()
18094 .cr(16)
18095 .kr(25)
18096 .channels(16)
18097 .width(5)
18098 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018099 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018100 }
18101 }
18102
18103 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmin) {
18104 TEST_REQUIRES_X86_AVX;
18105 for (size_t channels = 1; channels <= 80; channels += 15) {
18106 DWConvMicrokernelTester()
18107 .cr(16)
18108 .kr(25)
18109 .channels(channels)
18110 .width(3)
18111 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018112 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018113 }
18114 }
18115
18116 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmax) {
18117 TEST_REQUIRES_X86_AVX;
18118 for (size_t channels = 1; channels <= 80; channels += 15) {
18119 DWConvMicrokernelTester()
18120 .cr(16)
18121 .kr(25)
18122 .channels(channels)
18123 .width(3)
18124 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018125 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018126 }
18127 }
18128
18129 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, input_offset) {
18130 TEST_REQUIRES_X86_AVX;
18131 for (uint32_t channels = 32; channels < 256; channels += 48) {
18132 DWConvMicrokernelTester()
18133 .cr(16)
18134 .kr(25)
18135 .channels(channels)
18136 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080018137 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018138 }
18139 }
18140
18141 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, zero) {
18142 TEST_REQUIRES_X86_AVX;
18143 for (uint32_t mz = 0; mz < 25; mz++) {
18144 for (uint32_t channels = 32; channels < 256; channels += 48) {
18145 DWConvMicrokernelTester()
18146 .cr(16)
18147 .kr(25)
18148 .channels(channels)
18149 .input_offset(304)
18150 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018151 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018152 }
18153 }
18154 }
18155#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18156
18157
18158#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18159 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_eq_24) {
18160 TEST_REQUIRES_X86_AVX;
18161 DWConvMicrokernelTester()
18162 .cr(24)
18163 .kr(25)
18164 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080018165 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018166 }
18167
18168 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24) {
18169 TEST_REQUIRES_X86_AVX;
18170 for (uint32_t channels = 48; channels < 384; channels += 72) {
18171 DWConvMicrokernelTester()
18172 .cr(24)
18173 .kr(25)
18174 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018175 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018176 }
18177 }
18178
18179 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmin) {
18180 TEST_REQUIRES_X86_AVX;
18181 for (uint32_t channels = 48; channels < 384; channels += 72) {
18182 DWConvMicrokernelTester()
18183 .cr(24)
18184 .kr(25)
18185 .channels(channels)
18186 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018187 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018188 }
18189 }
18190
18191 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmax) {
18192 TEST_REQUIRES_X86_AVX;
18193 for (uint32_t channels = 48; channels < 384; channels += 72) {
18194 DWConvMicrokernelTester()
18195 .cr(24)
18196 .kr(25)
18197 .channels(channels)
18198 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018199 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018200 }
18201 }
18202
18203 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_lt_24) {
18204 TEST_REQUIRES_X86_AVX;
18205 for (uint32_t channels = 1; channels < 24; channels++) {
18206 DWConvMicrokernelTester()
18207 .cr(24)
18208 .kr(25)
18209 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018210 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018211 }
18212 }
18213
18214 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24) {
18215 TEST_REQUIRES_X86_AVX;
18216 for (uint32_t channels = 25; channels < 48; channels++) {
18217 DWConvMicrokernelTester()
18218 .cr(24)
18219 .kr(25)
18220 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018221 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018222 }
18223 }
18224
18225 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmin) {
18226 TEST_REQUIRES_X86_AVX;
18227 for (uint32_t channels = 25; channels < 48; channels++) {
18228 DWConvMicrokernelTester()
18229 .cr(24)
18230 .kr(25)
18231 .channels(channels)
18232 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018233 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018234 }
18235 }
18236
18237 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmax) {
18238 TEST_REQUIRES_X86_AVX;
18239 for (uint32_t channels = 25; channels < 48; channels++) {
18240 DWConvMicrokernelTester()
18241 .cr(24)
18242 .kr(25)
18243 .channels(channels)
18244 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018245 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018246 }
18247 }
18248
18249 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel) {
18250 TEST_REQUIRES_X86_AVX;
18251 for (size_t channels = 1; channels <= 120; channels += 23) {
18252 DWConvMicrokernelTester()
18253 .cr(24)
18254 .kr(25)
18255 .channels(channels)
18256 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018257 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018258 }
18259 }
18260
18261 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_step) {
18262 TEST_REQUIRES_X86_AVX;
18263 for (size_t channels = 1; channels <= 120; channels += 23) {
18264 for (size_t step = 2; step <= 25; step++) {
18265 DWConvMicrokernelTester()
18266 .cr(24)
18267 .kr(25)
18268 .channels(channels)
18269 .width(3)
18270 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018271 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018272 }
18273 }
18274 }
18275
18276 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_output_stride) {
18277 TEST_REQUIRES_X86_AVX;
18278 for (size_t channels = 1; channels <= 120; channels += 23) {
18279 DWConvMicrokernelTester()
18280 .cr(24)
18281 .kr(25)
18282 .channels(24)
18283 .width(5)
18284 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080018285 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018286 }
18287 }
18288
18289 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmin) {
18290 TEST_REQUIRES_X86_AVX;
18291 for (size_t channels = 1; channels <= 120; channels += 23) {
18292 DWConvMicrokernelTester()
18293 .cr(24)
18294 .kr(25)
18295 .channels(channels)
18296 .width(3)
18297 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018298 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018299 }
18300 }
18301
18302 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmax) {
18303 TEST_REQUIRES_X86_AVX;
18304 for (size_t channels = 1; channels <= 120; channels += 23) {
18305 DWConvMicrokernelTester()
18306 .cr(24)
18307 .kr(25)
18308 .channels(channels)
18309 .width(3)
18310 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018311 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018312 }
18313 }
18314
18315 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, input_offset) {
18316 TEST_REQUIRES_X86_AVX;
18317 for (uint32_t channels = 48; channels < 384; channels += 72) {
18318 DWConvMicrokernelTester()
18319 .cr(24)
18320 .kr(25)
18321 .channels(channels)
18322 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080018323 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018324 }
18325 }
18326
18327 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, zero) {
18328 TEST_REQUIRES_X86_AVX;
18329 for (uint32_t mz = 0; mz < 25; mz++) {
18330 for (uint32_t channels = 48; channels < 384; channels += 72) {
18331 DWConvMicrokernelTester()
18332 .cr(24)
18333 .kr(25)
18334 .channels(channels)
18335 .input_offset(464)
18336 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018337 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018338 }
18339 }
18340 }
18341#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18342
18343
18344#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18345 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_eq_8) {
18346 TEST_REQUIRES_X86_XOP;
18347 DWConvMicrokernelTester()
18348 .cr(8)
18349 .kr(25)
18350 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018351 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018352 }
18353
18354 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8) {
18355 TEST_REQUIRES_X86_XOP;
18356 for (uint32_t channels = 16; channels < 128; channels += 24) {
18357 DWConvMicrokernelTester()
18358 .cr(8)
18359 .kr(25)
18360 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018361 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018362 }
18363 }
18364
18365 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmin) {
18366 TEST_REQUIRES_X86_XOP;
18367 for (uint32_t channels = 16; channels < 128; channels += 24) {
18368 DWConvMicrokernelTester()
18369 .cr(8)
18370 .kr(25)
18371 .channels(channels)
18372 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018373 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018374 }
18375 }
18376
18377 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmax) {
18378 TEST_REQUIRES_X86_XOP;
18379 for (uint32_t channels = 16; channels < 128; channels += 24) {
18380 DWConvMicrokernelTester()
18381 .cr(8)
18382 .kr(25)
18383 .channels(channels)
18384 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018385 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018386 }
18387 }
18388
18389 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_lt_8) {
18390 TEST_REQUIRES_X86_XOP;
18391 for (uint32_t channels = 1; channels < 8; channels++) {
18392 DWConvMicrokernelTester()
18393 .cr(8)
18394 .kr(25)
18395 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018396 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018397 }
18398 }
18399
18400 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8) {
18401 TEST_REQUIRES_X86_XOP;
18402 for (uint32_t channels = 9; channels < 16; channels++) {
18403 DWConvMicrokernelTester()
18404 .cr(8)
18405 .kr(25)
18406 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018407 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018408 }
18409 }
18410
18411 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmin) {
18412 TEST_REQUIRES_X86_XOP;
18413 for (uint32_t channels = 9; channels < 16; channels++) {
18414 DWConvMicrokernelTester()
18415 .cr(8)
18416 .kr(25)
18417 .channels(channels)
18418 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018419 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018420 }
18421 }
18422
18423 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmax) {
18424 TEST_REQUIRES_X86_XOP;
18425 for (uint32_t channels = 9; channels < 16; channels++) {
18426 DWConvMicrokernelTester()
18427 .cr(8)
18428 .kr(25)
18429 .channels(channels)
18430 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018431 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018432 }
18433 }
18434
18435 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel) {
18436 TEST_REQUIRES_X86_XOP;
18437 for (size_t channels = 1; channels <= 40; channels += 7) {
18438 DWConvMicrokernelTester()
18439 .cr(8)
18440 .kr(25)
18441 .channels(channels)
18442 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018443 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018444 }
18445 }
18446
18447 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_step) {
18448 TEST_REQUIRES_X86_XOP;
18449 for (size_t channels = 1; channels <= 40; channels += 7) {
18450 for (size_t step = 2; step <= 25; step++) {
18451 DWConvMicrokernelTester()
18452 .cr(8)
18453 .kr(25)
18454 .channels(channels)
18455 .width(3)
18456 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018457 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018458 }
18459 }
18460 }
18461
18462 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_output_stride) {
18463 TEST_REQUIRES_X86_XOP;
18464 for (size_t channels = 1; channels <= 40; channels += 7) {
18465 DWConvMicrokernelTester()
18466 .cr(8)
18467 .kr(25)
18468 .channels(8)
18469 .width(5)
18470 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018471 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018472 }
18473 }
18474
18475 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmin) {
18476 TEST_REQUIRES_X86_XOP;
18477 for (size_t channels = 1; channels <= 40; channels += 7) {
18478 DWConvMicrokernelTester()
18479 .cr(8)
18480 .kr(25)
18481 .channels(channels)
18482 .width(3)
18483 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018484 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018485 }
18486 }
18487
18488 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmax) {
18489 TEST_REQUIRES_X86_XOP;
18490 for (size_t channels = 1; channels <= 40; channels += 7) {
18491 DWConvMicrokernelTester()
18492 .cr(8)
18493 .kr(25)
18494 .channels(channels)
18495 .width(3)
18496 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018497 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018498 }
18499 }
18500
18501 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, input_offset) {
18502 TEST_REQUIRES_X86_XOP;
18503 for (uint32_t channels = 16; channels < 128; channels += 24) {
18504 DWConvMicrokernelTester()
18505 .cr(8)
18506 .kr(25)
18507 .channels(channels)
18508 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080018509 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018510 }
18511 }
18512
18513 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, zero) {
18514 TEST_REQUIRES_X86_XOP;
18515 for (uint32_t mz = 0; mz < 25; mz++) {
18516 for (uint32_t channels = 16; channels < 128; channels += 24) {
18517 DWConvMicrokernelTester()
18518 .cr(8)
18519 .kr(25)
18520 .channels(channels)
18521 .input_offset(176)
18522 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018523 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018524 }
18525 }
18526 }
18527#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18528
18529
18530#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18531 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_eq_16) {
18532 TEST_REQUIRES_X86_XOP;
18533 DWConvMicrokernelTester()
18534 .cr(16)
18535 .kr(25)
18536 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080018537 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018538 }
18539
18540 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16) {
18541 TEST_REQUIRES_X86_XOP;
18542 for (uint32_t channels = 32; channels < 256; channels += 48) {
18543 DWConvMicrokernelTester()
18544 .cr(16)
18545 .kr(25)
18546 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018547 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018548 }
18549 }
18550
18551 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmin) {
18552 TEST_REQUIRES_X86_XOP;
18553 for (uint32_t channels = 32; channels < 256; channels += 48) {
18554 DWConvMicrokernelTester()
18555 .cr(16)
18556 .kr(25)
18557 .channels(channels)
18558 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018559 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018560 }
18561 }
18562
18563 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmax) {
18564 TEST_REQUIRES_X86_XOP;
18565 for (uint32_t channels = 32; channels < 256; channels += 48) {
18566 DWConvMicrokernelTester()
18567 .cr(16)
18568 .kr(25)
18569 .channels(channels)
18570 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018571 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018572 }
18573 }
18574
18575 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_lt_16) {
18576 TEST_REQUIRES_X86_XOP;
18577 for (uint32_t channels = 1; channels < 16; channels++) {
18578 DWConvMicrokernelTester()
18579 .cr(16)
18580 .kr(25)
18581 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018582 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018583 }
18584 }
18585
18586 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16) {
18587 TEST_REQUIRES_X86_XOP;
18588 for (uint32_t channels = 17; channels < 32; channels++) {
18589 DWConvMicrokernelTester()
18590 .cr(16)
18591 .kr(25)
18592 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018593 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018594 }
18595 }
18596
18597 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmin) {
18598 TEST_REQUIRES_X86_XOP;
18599 for (uint32_t channels = 17; channels < 32; channels++) {
18600 DWConvMicrokernelTester()
18601 .cr(16)
18602 .kr(25)
18603 .channels(channels)
18604 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018605 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018606 }
18607 }
18608
18609 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmax) {
18610 TEST_REQUIRES_X86_XOP;
18611 for (uint32_t channels = 17; channels < 32; channels++) {
18612 DWConvMicrokernelTester()
18613 .cr(16)
18614 .kr(25)
18615 .channels(channels)
18616 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018617 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018618 }
18619 }
18620
18621 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel) {
18622 TEST_REQUIRES_X86_XOP;
18623 for (size_t channels = 1; channels <= 80; channels += 15) {
18624 DWConvMicrokernelTester()
18625 .cr(16)
18626 .kr(25)
18627 .channels(channels)
18628 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018629 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018630 }
18631 }
18632
18633 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_step) {
18634 TEST_REQUIRES_X86_XOP;
18635 for (size_t channels = 1; channels <= 80; channels += 15) {
18636 for (size_t step = 2; step <= 25; step++) {
18637 DWConvMicrokernelTester()
18638 .cr(16)
18639 .kr(25)
18640 .channels(channels)
18641 .width(3)
18642 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018643 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018644 }
18645 }
18646 }
18647
18648 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_output_stride) {
18649 TEST_REQUIRES_X86_XOP;
18650 for (size_t channels = 1; channels <= 80; channels += 15) {
18651 DWConvMicrokernelTester()
18652 .cr(16)
18653 .kr(25)
18654 .channels(16)
18655 .width(5)
18656 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018657 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018658 }
18659 }
18660
18661 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmin) {
18662 TEST_REQUIRES_X86_XOP;
18663 for (size_t channels = 1; channels <= 80; channels += 15) {
18664 DWConvMicrokernelTester()
18665 .cr(16)
18666 .kr(25)
18667 .channels(channels)
18668 .width(3)
18669 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018670 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018671 }
18672 }
18673
18674 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmax) {
18675 TEST_REQUIRES_X86_XOP;
18676 for (size_t channels = 1; channels <= 80; channels += 15) {
18677 DWConvMicrokernelTester()
18678 .cr(16)
18679 .kr(25)
18680 .channels(channels)
18681 .width(3)
18682 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018683 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018684 }
18685 }
18686
18687 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, input_offset) {
18688 TEST_REQUIRES_X86_XOP;
18689 for (uint32_t channels = 32; channels < 256; channels += 48) {
18690 DWConvMicrokernelTester()
18691 .cr(16)
18692 .kr(25)
18693 .channels(channels)
18694 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080018695 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018696 }
18697 }
18698
18699 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, zero) {
18700 TEST_REQUIRES_X86_XOP;
18701 for (uint32_t mz = 0; mz < 25; mz++) {
18702 for (uint32_t channels = 32; channels < 256; channels += 48) {
18703 DWConvMicrokernelTester()
18704 .cr(16)
18705 .kr(25)
18706 .channels(channels)
18707 .input_offset(304)
18708 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018709 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018710 }
18711 }
18712 }
18713#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18714
18715
18716#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18717 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_eq_24) {
18718 TEST_REQUIRES_X86_XOP;
18719 DWConvMicrokernelTester()
18720 .cr(24)
18721 .kr(25)
18722 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080018723 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018724 }
18725
18726 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24) {
18727 TEST_REQUIRES_X86_XOP;
18728 for (uint32_t channels = 48; channels < 384; channels += 72) {
18729 DWConvMicrokernelTester()
18730 .cr(24)
18731 .kr(25)
18732 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018733 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018734 }
18735 }
18736
18737 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmin) {
18738 TEST_REQUIRES_X86_XOP;
18739 for (uint32_t channels = 48; channels < 384; channels += 72) {
18740 DWConvMicrokernelTester()
18741 .cr(24)
18742 .kr(25)
18743 .channels(channels)
18744 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018745 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018746 }
18747 }
18748
18749 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmax) {
18750 TEST_REQUIRES_X86_XOP;
18751 for (uint32_t channels = 48; channels < 384; channels += 72) {
18752 DWConvMicrokernelTester()
18753 .cr(24)
18754 .kr(25)
18755 .channels(channels)
18756 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018757 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018758 }
18759 }
18760
18761 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_lt_24) {
18762 TEST_REQUIRES_X86_XOP;
18763 for (uint32_t channels = 1; channels < 24; channels++) {
18764 DWConvMicrokernelTester()
18765 .cr(24)
18766 .kr(25)
18767 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018768 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018769 }
18770 }
18771
18772 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24) {
18773 TEST_REQUIRES_X86_XOP;
18774 for (uint32_t channels = 25; channels < 48; channels++) {
18775 DWConvMicrokernelTester()
18776 .cr(24)
18777 .kr(25)
18778 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018779 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018780 }
18781 }
18782
18783 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmin) {
18784 TEST_REQUIRES_X86_XOP;
18785 for (uint32_t channels = 25; channels < 48; channels++) {
18786 DWConvMicrokernelTester()
18787 .cr(24)
18788 .kr(25)
18789 .channels(channels)
18790 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018791 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018792 }
18793 }
18794
18795 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmax) {
18796 TEST_REQUIRES_X86_XOP;
18797 for (uint32_t channels = 25; channels < 48; channels++) {
18798 DWConvMicrokernelTester()
18799 .cr(24)
18800 .kr(25)
18801 .channels(channels)
18802 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018803 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018804 }
18805 }
18806
18807 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel) {
18808 TEST_REQUIRES_X86_XOP;
18809 for (size_t channels = 1; channels <= 120; channels += 23) {
18810 DWConvMicrokernelTester()
18811 .cr(24)
18812 .kr(25)
18813 .channels(channels)
18814 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018815 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018816 }
18817 }
18818
18819 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_step) {
18820 TEST_REQUIRES_X86_XOP;
18821 for (size_t channels = 1; channels <= 120; channels += 23) {
18822 for (size_t step = 2; step <= 25; step++) {
18823 DWConvMicrokernelTester()
18824 .cr(24)
18825 .kr(25)
18826 .channels(channels)
18827 .width(3)
18828 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018829 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018830 }
18831 }
18832 }
18833
18834 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_output_stride) {
18835 TEST_REQUIRES_X86_XOP;
18836 for (size_t channels = 1; channels <= 120; channels += 23) {
18837 DWConvMicrokernelTester()
18838 .cr(24)
18839 .kr(25)
18840 .channels(24)
18841 .width(5)
18842 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080018843 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018844 }
18845 }
18846
18847 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmin) {
18848 TEST_REQUIRES_X86_XOP;
18849 for (size_t channels = 1; channels <= 120; channels += 23) {
18850 DWConvMicrokernelTester()
18851 .cr(24)
18852 .kr(25)
18853 .channels(channels)
18854 .width(3)
18855 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018856 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018857 }
18858 }
18859
18860 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmax) {
18861 TEST_REQUIRES_X86_XOP;
18862 for (size_t channels = 1; channels <= 120; channels += 23) {
18863 DWConvMicrokernelTester()
18864 .cr(24)
18865 .kr(25)
18866 .channels(channels)
18867 .width(3)
18868 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018869 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018870 }
18871 }
18872
18873 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, input_offset) {
18874 TEST_REQUIRES_X86_XOP;
18875 for (uint32_t channels = 48; channels < 384; channels += 72) {
18876 DWConvMicrokernelTester()
18877 .cr(24)
18878 .kr(25)
18879 .channels(channels)
18880 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080018881 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018882 }
18883 }
18884
18885 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, zero) {
18886 TEST_REQUIRES_X86_XOP;
18887 for (uint32_t mz = 0; mz < 25; mz++) {
18888 for (uint32_t channels = 48; channels < 384; channels += 72) {
18889 DWConvMicrokernelTester()
18890 .cr(24)
18891 .kr(25)
18892 .channels(channels)
18893 .input_offset(464)
18894 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018895 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhancaf48312021-06-01 20:20:58 -070018896 }
18897 }
18898 }
18899#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18900
18901
18902#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan9b474cf2021-05-25 16:37:48 -070018903 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_eq_8) {
18904 TEST_REQUIRES_X86_AVX2;
18905 DWConvMicrokernelTester()
18906 .cr(8)
18907 .kr(25)
18908 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018909 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070018910 }
18911
18912 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8) {
18913 TEST_REQUIRES_X86_AVX2;
18914 for (uint32_t channels = 16; channels < 128; channels += 24) {
18915 DWConvMicrokernelTester()
18916 .cr(8)
18917 .kr(25)
18918 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018919 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070018920 }
18921 }
18922
18923 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
18924 TEST_REQUIRES_X86_AVX2;
18925 for (uint32_t channels = 16; channels < 128; channels += 24) {
18926 DWConvMicrokernelTester()
18927 .cr(8)
18928 .kr(25)
18929 .channels(channels)
18930 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018931 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070018932 }
18933 }
18934
18935 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
18936 TEST_REQUIRES_X86_AVX2;
18937 for (uint32_t channels = 16; channels < 128; channels += 24) {
18938 DWConvMicrokernelTester()
18939 .cr(8)
18940 .kr(25)
18941 .channels(channels)
18942 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018943 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070018944 }
18945 }
18946
18947 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_lt_8) {
18948 TEST_REQUIRES_X86_AVX2;
18949 for (uint32_t channels = 1; channels < 8; channels++) {
18950 DWConvMicrokernelTester()
18951 .cr(8)
18952 .kr(25)
18953 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018954 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070018955 }
18956 }
18957
18958 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8) {
18959 TEST_REQUIRES_X86_AVX2;
18960 for (uint32_t channels = 9; channels < 16; channels++) {
18961 DWConvMicrokernelTester()
18962 .cr(8)
18963 .kr(25)
18964 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018965 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070018966 }
18967 }
18968
18969 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
18970 TEST_REQUIRES_X86_AVX2;
18971 for (uint32_t channels = 9; channels < 16; channels++) {
18972 DWConvMicrokernelTester()
18973 .cr(8)
18974 .kr(25)
18975 .channels(channels)
18976 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018977 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070018978 }
18979 }
18980
18981 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
18982 TEST_REQUIRES_X86_AVX2;
18983 for (uint32_t channels = 9; channels < 16; channels++) {
18984 DWConvMicrokernelTester()
18985 .cr(8)
18986 .kr(25)
18987 .channels(channels)
18988 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018989 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070018990 }
18991 }
18992
18993 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel) {
18994 TEST_REQUIRES_X86_AVX2;
18995 for (size_t channels = 1; channels <= 40; channels += 7) {
18996 DWConvMicrokernelTester()
18997 .cr(8)
18998 .kr(25)
18999 .channels(channels)
19000 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019001 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019002 }
19003 }
19004
19005 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_step) {
19006 TEST_REQUIRES_X86_AVX2;
19007 for (size_t channels = 1; channels <= 40; channels += 7) {
19008 for (size_t step = 2; step <= 25; step++) {
19009 DWConvMicrokernelTester()
19010 .cr(8)
19011 .kr(25)
19012 .channels(channels)
19013 .width(3)
19014 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019015 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019016 }
19017 }
19018 }
19019
19020 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
19021 TEST_REQUIRES_X86_AVX2;
19022 for (size_t channels = 1; channels <= 40; channels += 7) {
19023 DWConvMicrokernelTester()
19024 .cr(8)
19025 .kr(25)
19026 .channels(8)
19027 .width(5)
19028 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019029 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019030 }
19031 }
19032
19033 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
19034 TEST_REQUIRES_X86_AVX2;
19035 for (size_t channels = 1; channels <= 40; channels += 7) {
19036 DWConvMicrokernelTester()
19037 .cr(8)
19038 .kr(25)
19039 .channels(channels)
19040 .width(3)
19041 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019042 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019043 }
19044 }
19045
19046 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
19047 TEST_REQUIRES_X86_AVX2;
19048 for (size_t channels = 1; channels <= 40; channels += 7) {
19049 DWConvMicrokernelTester()
19050 .cr(8)
19051 .kr(25)
19052 .channels(channels)
19053 .width(3)
19054 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019055 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019056 }
19057 }
19058
19059 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_offset) {
19060 TEST_REQUIRES_X86_AVX2;
19061 for (uint32_t channels = 16; channels < 128; channels += 24) {
19062 DWConvMicrokernelTester()
19063 .cr(8)
19064 .kr(25)
19065 .channels(channels)
19066 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080019067 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019068 }
19069 }
19070
19071 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, zero) {
19072 TEST_REQUIRES_X86_AVX2;
19073 for (uint32_t mz = 0; mz < 25; mz++) {
19074 for (uint32_t channels = 16; channels < 128; channels += 24) {
19075 DWConvMicrokernelTester()
19076 .cr(8)
19077 .kr(25)
19078 .channels(channels)
19079 .input_offset(176)
19080 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019081 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019082 }
19083 }
19084 }
19085#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19086
19087
19088#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19089 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_eq_16) {
19090 TEST_REQUIRES_X86_AVX2;
19091 DWConvMicrokernelTester()
19092 .cr(16)
19093 .kr(25)
19094 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080019095 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019096 }
19097
19098 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16) {
19099 TEST_REQUIRES_X86_AVX2;
19100 for (uint32_t channels = 32; channels < 256; channels += 48) {
19101 DWConvMicrokernelTester()
19102 .cr(16)
19103 .kr(25)
19104 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019105 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019106 }
19107 }
19108
19109 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
19110 TEST_REQUIRES_X86_AVX2;
19111 for (uint32_t channels = 32; channels < 256; channels += 48) {
19112 DWConvMicrokernelTester()
19113 .cr(16)
19114 .kr(25)
19115 .channels(channels)
19116 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019117 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019118 }
19119 }
19120
19121 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
19122 TEST_REQUIRES_X86_AVX2;
19123 for (uint32_t channels = 32; channels < 256; channels += 48) {
19124 DWConvMicrokernelTester()
19125 .cr(16)
19126 .kr(25)
19127 .channels(channels)
19128 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019129 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019130 }
19131 }
19132
19133 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_lt_16) {
19134 TEST_REQUIRES_X86_AVX2;
19135 for (uint32_t channels = 1; channels < 16; channels++) {
19136 DWConvMicrokernelTester()
19137 .cr(16)
19138 .kr(25)
19139 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019140 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019141 }
19142 }
19143
19144 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16) {
19145 TEST_REQUIRES_X86_AVX2;
19146 for (uint32_t channels = 17; channels < 32; channels++) {
19147 DWConvMicrokernelTester()
19148 .cr(16)
19149 .kr(25)
19150 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019151 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019152 }
19153 }
19154
19155 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
19156 TEST_REQUIRES_X86_AVX2;
19157 for (uint32_t channels = 17; channels < 32; channels++) {
19158 DWConvMicrokernelTester()
19159 .cr(16)
19160 .kr(25)
19161 .channels(channels)
19162 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019163 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019164 }
19165 }
19166
19167 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
19168 TEST_REQUIRES_X86_AVX2;
19169 for (uint32_t channels = 17; channels < 32; channels++) {
19170 DWConvMicrokernelTester()
19171 .cr(16)
19172 .kr(25)
19173 .channels(channels)
19174 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019175 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019176 }
19177 }
19178
19179 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel) {
19180 TEST_REQUIRES_X86_AVX2;
19181 for (size_t channels = 1; channels <= 80; channels += 15) {
19182 DWConvMicrokernelTester()
19183 .cr(16)
19184 .kr(25)
19185 .channels(channels)
19186 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019187 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019188 }
19189 }
19190
19191 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_step) {
19192 TEST_REQUIRES_X86_AVX2;
19193 for (size_t channels = 1; channels <= 80; channels += 15) {
19194 for (size_t step = 2; step <= 25; step++) {
19195 DWConvMicrokernelTester()
19196 .cr(16)
19197 .kr(25)
19198 .channels(channels)
19199 .width(3)
19200 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019201 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019202 }
19203 }
19204 }
19205
19206 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
19207 TEST_REQUIRES_X86_AVX2;
19208 for (size_t channels = 1; channels <= 80; channels += 15) {
19209 DWConvMicrokernelTester()
19210 .cr(16)
19211 .kr(25)
19212 .channels(16)
19213 .width(5)
19214 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019215 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019216 }
19217 }
19218
19219 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
19220 TEST_REQUIRES_X86_AVX2;
19221 for (size_t channels = 1; channels <= 80; channels += 15) {
19222 DWConvMicrokernelTester()
19223 .cr(16)
19224 .kr(25)
19225 .channels(channels)
19226 .width(3)
19227 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019228 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019229 }
19230 }
19231
19232 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
19233 TEST_REQUIRES_X86_AVX2;
19234 for (size_t channels = 1; channels <= 80; channels += 15) {
19235 DWConvMicrokernelTester()
19236 .cr(16)
19237 .kr(25)
19238 .channels(channels)
19239 .width(3)
19240 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019241 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019242 }
19243 }
19244
19245 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_offset) {
19246 TEST_REQUIRES_X86_AVX2;
19247 for (uint32_t channels = 32; channels < 256; channels += 48) {
19248 DWConvMicrokernelTester()
19249 .cr(16)
19250 .kr(25)
19251 .channels(channels)
19252 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080019253 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019254 }
19255 }
19256
19257 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, zero) {
19258 TEST_REQUIRES_X86_AVX2;
19259 for (uint32_t mz = 0; mz < 25; mz++) {
19260 for (uint32_t channels = 32; channels < 256; channels += 48) {
19261 DWConvMicrokernelTester()
19262 .cr(16)
19263 .kr(25)
19264 .channels(channels)
19265 .input_offset(304)
19266 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019267 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019268 }
19269 }
19270 }
19271#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19272
19273
19274#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19275 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_eq_24) {
19276 TEST_REQUIRES_X86_AVX2;
19277 DWConvMicrokernelTester()
19278 .cr(24)
19279 .kr(25)
19280 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080019281 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019282 }
19283
19284 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24) {
19285 TEST_REQUIRES_X86_AVX2;
19286 for (uint32_t channels = 48; channels < 384; channels += 72) {
19287 DWConvMicrokernelTester()
19288 .cr(24)
19289 .kr(25)
19290 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019291 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019292 }
19293 }
19294
19295 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmin) {
19296 TEST_REQUIRES_X86_AVX2;
19297 for (uint32_t channels = 48; channels < 384; channels += 72) {
19298 DWConvMicrokernelTester()
19299 .cr(24)
19300 .kr(25)
19301 .channels(channels)
19302 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019303 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019304 }
19305 }
19306
19307 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmax) {
19308 TEST_REQUIRES_X86_AVX2;
19309 for (uint32_t channels = 48; channels < 384; channels += 72) {
19310 DWConvMicrokernelTester()
19311 .cr(24)
19312 .kr(25)
19313 .channels(channels)
19314 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019315 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019316 }
19317 }
19318
19319 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_lt_24) {
19320 TEST_REQUIRES_X86_AVX2;
19321 for (uint32_t channels = 1; channels < 24; channels++) {
19322 DWConvMicrokernelTester()
19323 .cr(24)
19324 .kr(25)
19325 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019326 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019327 }
19328 }
19329
19330 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24) {
19331 TEST_REQUIRES_X86_AVX2;
19332 for (uint32_t channels = 25; channels < 48; channels++) {
19333 DWConvMicrokernelTester()
19334 .cr(24)
19335 .kr(25)
19336 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019337 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019338 }
19339 }
19340
19341 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmin) {
19342 TEST_REQUIRES_X86_AVX2;
19343 for (uint32_t channels = 25; channels < 48; channels++) {
19344 DWConvMicrokernelTester()
19345 .cr(24)
19346 .kr(25)
19347 .channels(channels)
19348 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019349 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019350 }
19351 }
19352
19353 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmax) {
19354 TEST_REQUIRES_X86_AVX2;
19355 for (uint32_t channels = 25; channels < 48; channels++) {
19356 DWConvMicrokernelTester()
19357 .cr(24)
19358 .kr(25)
19359 .channels(channels)
19360 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019361 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019362 }
19363 }
19364
19365 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel) {
19366 TEST_REQUIRES_X86_AVX2;
19367 for (size_t channels = 1; channels <= 120; channels += 23) {
19368 DWConvMicrokernelTester()
19369 .cr(24)
19370 .kr(25)
19371 .channels(channels)
19372 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019373 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019374 }
19375 }
19376
19377 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_step) {
19378 TEST_REQUIRES_X86_AVX2;
19379 for (size_t channels = 1; channels <= 120; channels += 23) {
19380 for (size_t step = 2; step <= 25; step++) {
19381 DWConvMicrokernelTester()
19382 .cr(24)
19383 .kr(25)
19384 .channels(channels)
19385 .width(3)
19386 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019387 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019388 }
19389 }
19390 }
19391
19392 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_output_stride) {
19393 TEST_REQUIRES_X86_AVX2;
19394 for (size_t channels = 1; channels <= 120; channels += 23) {
19395 DWConvMicrokernelTester()
19396 .cr(24)
19397 .kr(25)
19398 .channels(24)
19399 .width(5)
19400 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080019401 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019402 }
19403 }
19404
19405 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmin) {
19406 TEST_REQUIRES_X86_AVX2;
19407 for (size_t channels = 1; channels <= 120; channels += 23) {
19408 DWConvMicrokernelTester()
19409 .cr(24)
19410 .kr(25)
19411 .channels(channels)
19412 .width(3)
19413 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019414 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019415 }
19416 }
19417
19418 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmax) {
19419 TEST_REQUIRES_X86_AVX2;
19420 for (size_t channels = 1; channels <= 120; channels += 23) {
19421 DWConvMicrokernelTester()
19422 .cr(24)
19423 .kr(25)
19424 .channels(channels)
19425 .width(3)
19426 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019427 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019428 }
19429 }
19430
19431 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, input_offset) {
19432 TEST_REQUIRES_X86_AVX2;
19433 for (uint32_t channels = 48; channels < 384; channels += 72) {
19434 DWConvMicrokernelTester()
19435 .cr(24)
19436 .kr(25)
19437 .channels(channels)
19438 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080019439 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019440 }
19441 }
19442
19443 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, zero) {
19444 TEST_REQUIRES_X86_AVX2;
19445 for (uint32_t mz = 0; mz < 25; mz++) {
19446 for (uint32_t channels = 48; channels < 384; channels += 72) {
19447 DWConvMicrokernelTester()
19448 .cr(24)
19449 .kr(25)
19450 .channels(channels)
19451 .input_offset(464)
19452 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019453 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019454 }
19455 }
19456 }
19457#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19458
19459
19460#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19461 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_eq_32) {
19462 TEST_REQUIRES_X86_AVX2;
19463 DWConvMicrokernelTester()
19464 .cr(32)
19465 .kr(25)
19466 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080019467 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019468 }
19469
19470 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32) {
19471 TEST_REQUIRES_X86_AVX2;
19472 for (uint32_t channels = 64; channels < 512; channels += 96) {
19473 DWConvMicrokernelTester()
19474 .cr(32)
19475 .kr(25)
19476 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019477 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019478 }
19479 }
19480
19481 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
19482 TEST_REQUIRES_X86_AVX2;
19483 for (uint32_t channels = 64; channels < 512; channels += 96) {
19484 DWConvMicrokernelTester()
19485 .cr(32)
19486 .kr(25)
19487 .channels(channels)
19488 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019489 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019490 }
19491 }
19492
19493 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
19494 TEST_REQUIRES_X86_AVX2;
19495 for (uint32_t channels = 64; channels < 512; channels += 96) {
19496 DWConvMicrokernelTester()
19497 .cr(32)
19498 .kr(25)
19499 .channels(channels)
19500 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019501 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019502 }
19503 }
19504
19505 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_lt_32) {
19506 TEST_REQUIRES_X86_AVX2;
19507 for (uint32_t channels = 1; channels < 32; channels++) {
19508 DWConvMicrokernelTester()
19509 .cr(32)
19510 .kr(25)
19511 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019512 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019513 }
19514 }
19515
19516 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32) {
19517 TEST_REQUIRES_X86_AVX2;
19518 for (uint32_t channels = 33; channels < 64; channels++) {
19519 DWConvMicrokernelTester()
19520 .cr(32)
19521 .kr(25)
19522 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019523 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019524 }
19525 }
19526
19527 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
19528 TEST_REQUIRES_X86_AVX2;
19529 for (uint32_t channels = 33; channels < 64; channels++) {
19530 DWConvMicrokernelTester()
19531 .cr(32)
19532 .kr(25)
19533 .channels(channels)
19534 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019535 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019536 }
19537 }
19538
19539 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
19540 TEST_REQUIRES_X86_AVX2;
19541 for (uint32_t channels = 33; channels < 64; channels++) {
19542 DWConvMicrokernelTester()
19543 .cr(32)
19544 .kr(25)
19545 .channels(channels)
19546 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019547 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019548 }
19549 }
19550
19551 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel) {
19552 TEST_REQUIRES_X86_AVX2;
19553 for (size_t channels = 1; channels <= 160; channels += 31) {
19554 DWConvMicrokernelTester()
19555 .cr(32)
19556 .kr(25)
19557 .channels(channels)
19558 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019559 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019560 }
19561 }
19562
19563 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_step) {
19564 TEST_REQUIRES_X86_AVX2;
19565 for (size_t channels = 1; channels <= 160; channels += 31) {
19566 for (size_t step = 2; step <= 25; step++) {
19567 DWConvMicrokernelTester()
19568 .cr(32)
19569 .kr(25)
19570 .channels(channels)
19571 .width(3)
19572 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019573 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019574 }
19575 }
19576 }
19577
19578 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
19579 TEST_REQUIRES_X86_AVX2;
19580 for (size_t channels = 1; channels <= 160; channels += 31) {
19581 DWConvMicrokernelTester()
19582 .cr(32)
19583 .kr(25)
19584 .channels(32)
19585 .width(5)
19586 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080019587 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019588 }
19589 }
19590
19591 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
19592 TEST_REQUIRES_X86_AVX2;
19593 for (size_t channels = 1; channels <= 160; channels += 31) {
19594 DWConvMicrokernelTester()
19595 .cr(32)
19596 .kr(25)
19597 .channels(channels)
19598 .width(3)
19599 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019600 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019601 }
19602 }
19603
19604 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
19605 TEST_REQUIRES_X86_AVX2;
19606 for (size_t channels = 1; channels <= 160; channels += 31) {
19607 DWConvMicrokernelTester()
19608 .cr(32)
19609 .kr(25)
19610 .channels(channels)
19611 .width(3)
19612 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019613 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019614 }
19615 }
19616
19617 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_offset) {
19618 TEST_REQUIRES_X86_AVX2;
19619 for (uint32_t channels = 64; channels < 512; channels += 96) {
19620 DWConvMicrokernelTester()
19621 .cr(32)
19622 .kr(25)
19623 .channels(channels)
19624 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080019625 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019626 }
19627 }
19628
19629 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, zero) {
19630 TEST_REQUIRES_X86_AVX2;
19631 for (uint32_t mz = 0; mz < 25; mz++) {
19632 for (uint32_t channels = 64; channels < 512; channels += 96) {
19633 DWConvMicrokernelTester()
19634 .cr(32)
19635 .kr(25)
19636 .channels(channels)
19637 .input_offset(592)
19638 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019639 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070019640 }
19641 }
19642 }
19643#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan71855ee2021-05-25 19:05:06 -070019644
19645
19646#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19647 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_eq_16) {
19648 TEST_REQUIRES_X86_AVX512SKX;
19649 DWConvMicrokernelTester()
19650 .cr(16)
19651 .kr(25)
19652 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080019653 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019654 }
19655
19656 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16) {
19657 TEST_REQUIRES_X86_AVX512SKX;
19658 for (uint32_t channels = 32; channels < 256; channels += 48) {
19659 DWConvMicrokernelTester()
19660 .cr(16)
19661 .kr(25)
19662 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019663 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019664 }
19665 }
19666
19667 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
19668 TEST_REQUIRES_X86_AVX512SKX;
19669 for (uint32_t channels = 32; channels < 256; channels += 48) {
19670 DWConvMicrokernelTester()
19671 .cr(16)
19672 .kr(25)
19673 .channels(channels)
19674 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019675 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019676 }
19677 }
19678
19679 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
19680 TEST_REQUIRES_X86_AVX512SKX;
19681 for (uint32_t channels = 32; channels < 256; channels += 48) {
19682 DWConvMicrokernelTester()
19683 .cr(16)
19684 .kr(25)
19685 .channels(channels)
19686 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019687 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019688 }
19689 }
19690
19691 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_lt_16) {
19692 TEST_REQUIRES_X86_AVX512SKX;
19693 for (uint32_t channels = 1; channels < 16; channels++) {
19694 DWConvMicrokernelTester()
19695 .cr(16)
19696 .kr(25)
19697 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019698 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019699 }
19700 }
19701
19702 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16) {
19703 TEST_REQUIRES_X86_AVX512SKX;
19704 for (uint32_t channels = 17; channels < 32; channels++) {
19705 DWConvMicrokernelTester()
19706 .cr(16)
19707 .kr(25)
19708 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019709 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019710 }
19711 }
19712
19713 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
19714 TEST_REQUIRES_X86_AVX512SKX;
19715 for (uint32_t channels = 17; channels < 32; channels++) {
19716 DWConvMicrokernelTester()
19717 .cr(16)
19718 .kr(25)
19719 .channels(channels)
19720 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019721 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019722 }
19723 }
19724
19725 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
19726 TEST_REQUIRES_X86_AVX512SKX;
19727 for (uint32_t channels = 17; channels < 32; channels++) {
19728 DWConvMicrokernelTester()
19729 .cr(16)
19730 .kr(25)
19731 .channels(channels)
19732 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019733 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019734 }
19735 }
19736
19737 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel) {
19738 TEST_REQUIRES_X86_AVX512SKX;
19739 for (size_t channels = 1; channels <= 80; channels += 15) {
19740 DWConvMicrokernelTester()
19741 .cr(16)
19742 .kr(25)
19743 .channels(channels)
19744 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019745 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019746 }
19747 }
19748
19749 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
19750 TEST_REQUIRES_X86_AVX512SKX;
19751 for (size_t channels = 1; channels <= 80; channels += 15) {
19752 for (size_t step = 2; step <= 25; step++) {
19753 DWConvMicrokernelTester()
19754 .cr(16)
19755 .kr(25)
19756 .channels(channels)
19757 .width(3)
19758 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019759 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019760 }
19761 }
19762 }
19763
19764 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
19765 TEST_REQUIRES_X86_AVX512SKX;
19766 for (size_t channels = 1; channels <= 80; channels += 15) {
19767 DWConvMicrokernelTester()
19768 .cr(16)
19769 .kr(25)
19770 .channels(16)
19771 .width(5)
19772 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019773 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019774 }
19775 }
19776
19777 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
19778 TEST_REQUIRES_X86_AVX512SKX;
19779 for (size_t channels = 1; channels <= 80; channels += 15) {
19780 DWConvMicrokernelTester()
19781 .cr(16)
19782 .kr(25)
19783 .channels(channels)
19784 .width(3)
19785 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019786 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019787 }
19788 }
19789
19790 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
19791 TEST_REQUIRES_X86_AVX512SKX;
19792 for (size_t channels = 1; channels <= 80; channels += 15) {
19793 DWConvMicrokernelTester()
19794 .cr(16)
19795 .kr(25)
19796 .channels(channels)
19797 .width(3)
19798 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019799 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019800 }
19801 }
19802
19803 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_offset) {
19804 TEST_REQUIRES_X86_AVX512SKX;
19805 for (uint32_t channels = 32; channels < 256; channels += 48) {
19806 DWConvMicrokernelTester()
19807 .cr(16)
19808 .kr(25)
19809 .channels(channels)
19810 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080019811 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019812 }
19813 }
19814
19815 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, zero) {
19816 TEST_REQUIRES_X86_AVX512SKX;
19817 for (uint32_t mz = 0; mz < 25; mz++) {
19818 for (uint32_t channels = 32; channels < 256; channels += 48) {
19819 DWConvMicrokernelTester()
19820 .cr(16)
19821 .kr(25)
19822 .channels(channels)
19823 .input_offset(304)
19824 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019825 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019826 }
19827 }
19828 }
19829#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19830
19831
19832#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19833 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_eq_32) {
19834 TEST_REQUIRES_X86_AVX512SKX;
19835 DWConvMicrokernelTester()
19836 .cr(32)
19837 .kr(25)
19838 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080019839 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019840 }
19841
19842 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32) {
19843 TEST_REQUIRES_X86_AVX512SKX;
19844 for (uint32_t channels = 64; channels < 512; channels += 96) {
19845 DWConvMicrokernelTester()
19846 .cr(32)
19847 .kr(25)
19848 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019849 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019850 }
19851 }
19852
19853 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
19854 TEST_REQUIRES_X86_AVX512SKX;
19855 for (uint32_t channels = 64; channels < 512; channels += 96) {
19856 DWConvMicrokernelTester()
19857 .cr(32)
19858 .kr(25)
19859 .channels(channels)
19860 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019861 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019862 }
19863 }
19864
19865 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
19866 TEST_REQUIRES_X86_AVX512SKX;
19867 for (uint32_t channels = 64; channels < 512; channels += 96) {
19868 DWConvMicrokernelTester()
19869 .cr(32)
19870 .kr(25)
19871 .channels(channels)
19872 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019873 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019874 }
19875 }
19876
19877 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_lt_32) {
19878 TEST_REQUIRES_X86_AVX512SKX;
19879 for (uint32_t channels = 1; channels < 32; channels++) {
19880 DWConvMicrokernelTester()
19881 .cr(32)
19882 .kr(25)
19883 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019884 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019885 }
19886 }
19887
19888 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32) {
19889 TEST_REQUIRES_X86_AVX512SKX;
19890 for (uint32_t channels = 33; channels < 64; channels++) {
19891 DWConvMicrokernelTester()
19892 .cr(32)
19893 .kr(25)
19894 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019895 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019896 }
19897 }
19898
19899 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
19900 TEST_REQUIRES_X86_AVX512SKX;
19901 for (uint32_t channels = 33; channels < 64; channels++) {
19902 DWConvMicrokernelTester()
19903 .cr(32)
19904 .kr(25)
19905 .channels(channels)
19906 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019907 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019908 }
19909 }
19910
19911 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
19912 TEST_REQUIRES_X86_AVX512SKX;
19913 for (uint32_t channels = 33; channels < 64; channels++) {
19914 DWConvMicrokernelTester()
19915 .cr(32)
19916 .kr(25)
19917 .channels(channels)
19918 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019919 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019920 }
19921 }
19922
19923 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel) {
19924 TEST_REQUIRES_X86_AVX512SKX;
19925 for (size_t channels = 1; channels <= 160; channels += 31) {
19926 DWConvMicrokernelTester()
19927 .cr(32)
19928 .kr(25)
19929 .channels(channels)
19930 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019931 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019932 }
19933 }
19934
19935 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
19936 TEST_REQUIRES_X86_AVX512SKX;
19937 for (size_t channels = 1; channels <= 160; channels += 31) {
19938 for (size_t step = 2; step <= 25; step++) {
19939 DWConvMicrokernelTester()
19940 .cr(32)
19941 .kr(25)
19942 .channels(channels)
19943 .width(3)
19944 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019945 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019946 }
19947 }
19948 }
19949
19950 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
19951 TEST_REQUIRES_X86_AVX512SKX;
19952 for (size_t channels = 1; channels <= 160; channels += 31) {
19953 DWConvMicrokernelTester()
19954 .cr(32)
19955 .kr(25)
19956 .channels(32)
19957 .width(5)
19958 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080019959 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019960 }
19961 }
19962
19963 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
19964 TEST_REQUIRES_X86_AVX512SKX;
19965 for (size_t channels = 1; channels <= 160; channels += 31) {
19966 DWConvMicrokernelTester()
19967 .cr(32)
19968 .kr(25)
19969 .channels(channels)
19970 .width(3)
19971 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019972 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019973 }
19974 }
19975
19976 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
19977 TEST_REQUIRES_X86_AVX512SKX;
19978 for (size_t channels = 1; channels <= 160; channels += 31) {
19979 DWConvMicrokernelTester()
19980 .cr(32)
19981 .kr(25)
19982 .channels(channels)
19983 .width(3)
19984 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019985 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019986 }
19987 }
19988
19989 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_offset) {
19990 TEST_REQUIRES_X86_AVX512SKX;
19991 for (uint32_t channels = 64; channels < 512; channels += 96) {
19992 DWConvMicrokernelTester()
19993 .cr(32)
19994 .kr(25)
19995 .channels(channels)
19996 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080019997 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070019998 }
19999 }
20000
20001 TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, zero) {
20002 TEST_REQUIRES_X86_AVX512SKX;
20003 for (uint32_t mz = 0; mz < 25; mz++) {
20004 for (uint32_t channels = 64; channels < 512; channels += 96) {
20005 DWConvMicrokernelTester()
20006 .cr(32)
20007 .kr(25)
20008 .channels(channels)
20009 .input_offset(592)
20010 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020011 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070020012 }
20013 }
20014 }
20015#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan85d772b2021-06-30 11:02:42 -070020016
20017
Marat Dukhan4c617792021-12-21 15:47:58 -080020018#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -070020019 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_eq_8) {
20020 DWConvMicrokernelTester()
20021 .cr(8)
20022 .kr(25)
20023 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020024 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020025 }
20026
20027 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8) {
20028 for (uint32_t channels = 16; channels < 128; channels += 24) {
20029 DWConvMicrokernelTester()
20030 .cr(8)
20031 .kr(25)
20032 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020033 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020034 }
20035 }
20036
20037 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmin) {
20038 for (uint32_t channels = 16; channels < 128; channels += 24) {
20039 DWConvMicrokernelTester()
20040 .cr(8)
20041 .kr(25)
20042 .channels(channels)
20043 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020044 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020045 }
20046 }
20047
20048 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmax) {
20049 for (uint32_t channels = 16; channels < 128; channels += 24) {
20050 DWConvMicrokernelTester()
20051 .cr(8)
20052 .kr(25)
20053 .channels(channels)
20054 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020055 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020056 }
20057 }
20058
20059 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_lt_8) {
20060 for (uint32_t channels = 1; channels < 8; channels++) {
20061 DWConvMicrokernelTester()
20062 .cr(8)
20063 .kr(25)
20064 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020065 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020066 }
20067 }
20068
20069 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8) {
20070 for (uint32_t channels = 9; channels < 16; channels++) {
20071 DWConvMicrokernelTester()
20072 .cr(8)
20073 .kr(25)
20074 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020075 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020076 }
20077 }
20078
20079 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmin) {
20080 for (uint32_t channels = 9; channels < 16; channels++) {
20081 DWConvMicrokernelTester()
20082 .cr(8)
20083 .kr(25)
20084 .channels(channels)
20085 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020086 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020087 }
20088 }
20089
20090 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmax) {
20091 for (uint32_t channels = 9; channels < 16; channels++) {
20092 DWConvMicrokernelTester()
20093 .cr(8)
20094 .kr(25)
20095 .channels(channels)
20096 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020097 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020098 }
20099 }
20100
20101 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel) {
20102 for (size_t channels = 1; channels <= 40; channels += 7) {
20103 DWConvMicrokernelTester()
20104 .cr(8)
20105 .kr(25)
20106 .channels(channels)
20107 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020108 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020109 }
20110 }
20111
20112 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_step) {
20113 for (size_t channels = 1; channels <= 40; channels += 7) {
20114 for (size_t step = 2; step <= 25; step++) {
20115 DWConvMicrokernelTester()
20116 .cr(8)
20117 .kr(25)
20118 .channels(channels)
20119 .width(3)
20120 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020121 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020122 }
20123 }
20124 }
20125
20126 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
20127 for (size_t channels = 1; channels <= 40; channels += 7) {
20128 DWConvMicrokernelTester()
20129 .cr(8)
20130 .kr(25)
20131 .channels(8)
20132 .width(5)
20133 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020134 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020135 }
20136 }
20137
20138 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmin) {
20139 for (size_t channels = 1; channels <= 40; channels += 7) {
20140 DWConvMicrokernelTester()
20141 .cr(8)
20142 .kr(25)
20143 .channels(channels)
20144 .width(3)
20145 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020146 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020147 }
20148 }
20149
20150 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmax) {
20151 for (size_t channels = 1; channels <= 40; channels += 7) {
20152 DWConvMicrokernelTester()
20153 .cr(8)
20154 .kr(25)
20155 .channels(channels)
20156 .width(3)
20157 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020158 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020159 }
20160 }
20161
20162 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, input_offset) {
20163 for (uint32_t channels = 16; channels < 128; channels += 24) {
20164 DWConvMicrokernelTester()
20165 .cr(8)
20166 .kr(25)
20167 .channels(channels)
20168 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080020169 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020170 }
20171 }
20172
20173 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, zero) {
20174 for (uint32_t mz = 0; mz < 25; mz++) {
20175 for (uint32_t channels = 16; channels < 128; channels += 24) {
20176 DWConvMicrokernelTester()
20177 .cr(8)
20178 .kr(25)
20179 .channels(channels)
20180 .input_offset(176)
20181 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020182 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020183 }
20184 }
20185 }
Marat Dukhan4c617792021-12-21 15:47:58 -080020186#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -070020187
20188
Marat Dukhan4c617792021-12-21 15:47:58 -080020189#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -070020190 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_eq_16) {
20191 DWConvMicrokernelTester()
20192 .cr(16)
20193 .kr(25)
20194 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080020195 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020196 }
20197
20198 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16) {
20199 for (uint32_t channels = 32; channels < 256; channels += 48) {
20200 DWConvMicrokernelTester()
20201 .cr(16)
20202 .kr(25)
20203 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020204 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020205 }
20206 }
20207
20208 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmin) {
20209 for (uint32_t channels = 32; channels < 256; channels += 48) {
20210 DWConvMicrokernelTester()
20211 .cr(16)
20212 .kr(25)
20213 .channels(channels)
20214 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020215 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020216 }
20217 }
20218
20219 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmax) {
20220 for (uint32_t channels = 32; channels < 256; channels += 48) {
20221 DWConvMicrokernelTester()
20222 .cr(16)
20223 .kr(25)
20224 .channels(channels)
20225 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020226 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020227 }
20228 }
20229
20230 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_lt_16) {
20231 for (uint32_t channels = 1; channels < 16; channels++) {
20232 DWConvMicrokernelTester()
20233 .cr(16)
20234 .kr(25)
20235 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020236 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020237 }
20238 }
20239
20240 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16) {
20241 for (uint32_t channels = 17; channels < 32; channels++) {
20242 DWConvMicrokernelTester()
20243 .cr(16)
20244 .kr(25)
20245 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020246 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020247 }
20248 }
20249
20250 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmin) {
20251 for (uint32_t channels = 17; channels < 32; channels++) {
20252 DWConvMicrokernelTester()
20253 .cr(16)
20254 .kr(25)
20255 .channels(channels)
20256 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020257 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020258 }
20259 }
20260
20261 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmax) {
20262 for (uint32_t channels = 17; channels < 32; channels++) {
20263 DWConvMicrokernelTester()
20264 .cr(16)
20265 .kr(25)
20266 .channels(channels)
20267 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020268 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020269 }
20270 }
20271
20272 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel) {
20273 for (size_t channels = 1; channels <= 80; channels += 15) {
20274 DWConvMicrokernelTester()
20275 .cr(16)
20276 .kr(25)
20277 .channels(channels)
20278 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020279 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020280 }
20281 }
20282
20283 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_step) {
20284 for (size_t channels = 1; channels <= 80; channels += 15) {
20285 for (size_t step = 2; step <= 25; step++) {
20286 DWConvMicrokernelTester()
20287 .cr(16)
20288 .kr(25)
20289 .channels(channels)
20290 .width(3)
20291 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020292 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020293 }
20294 }
20295 }
20296
20297 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
20298 for (size_t channels = 1; channels <= 80; channels += 15) {
20299 DWConvMicrokernelTester()
20300 .cr(16)
20301 .kr(25)
20302 .channels(16)
20303 .width(5)
20304 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020305 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020306 }
20307 }
20308
20309 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmin) {
20310 for (size_t channels = 1; channels <= 80; channels += 15) {
20311 DWConvMicrokernelTester()
20312 .cr(16)
20313 .kr(25)
20314 .channels(channels)
20315 .width(3)
20316 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020317 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020318 }
20319 }
20320
20321 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmax) {
20322 for (size_t channels = 1; channels <= 80; channels += 15) {
20323 DWConvMicrokernelTester()
20324 .cr(16)
20325 .kr(25)
20326 .channels(channels)
20327 .width(3)
20328 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020329 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020330 }
20331 }
20332
20333 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, input_offset) {
20334 for (uint32_t channels = 32; channels < 256; channels += 48) {
20335 DWConvMicrokernelTester()
20336 .cr(16)
20337 .kr(25)
20338 .channels(channels)
20339 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080020340 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020341 }
20342 }
20343
20344 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, zero) {
20345 for (uint32_t mz = 0; mz < 25; mz++) {
20346 for (uint32_t channels = 32; channels < 256; channels += 48) {
20347 DWConvMicrokernelTester()
20348 .cr(16)
20349 .kr(25)
20350 .channels(channels)
20351 .input_offset(304)
20352 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020353 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020354 }
20355 }
20356 }
Marat Dukhan4c617792021-12-21 15:47:58 -080020357#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -070020358
20359
Marat Dukhan4c617792021-12-21 15:47:58 -080020360#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -070020361 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_eq_24) {
20362 DWConvMicrokernelTester()
20363 .cr(24)
20364 .kr(25)
20365 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080020366 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020367 }
20368
20369 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24) {
20370 for (uint32_t channels = 48; channels < 384; channels += 72) {
20371 DWConvMicrokernelTester()
20372 .cr(24)
20373 .kr(25)
20374 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020375 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020376 }
20377 }
20378
20379 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmin) {
20380 for (uint32_t channels = 48; channels < 384; channels += 72) {
20381 DWConvMicrokernelTester()
20382 .cr(24)
20383 .kr(25)
20384 .channels(channels)
20385 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020386 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020387 }
20388 }
20389
20390 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmax) {
20391 for (uint32_t channels = 48; channels < 384; channels += 72) {
20392 DWConvMicrokernelTester()
20393 .cr(24)
20394 .kr(25)
20395 .channels(channels)
20396 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020397 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020398 }
20399 }
20400
20401 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_lt_24) {
20402 for (uint32_t channels = 1; channels < 24; channels++) {
20403 DWConvMicrokernelTester()
20404 .cr(24)
20405 .kr(25)
20406 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020407 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020408 }
20409 }
20410
20411 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24) {
20412 for (uint32_t channels = 25; channels < 48; channels++) {
20413 DWConvMicrokernelTester()
20414 .cr(24)
20415 .kr(25)
20416 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020417 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020418 }
20419 }
20420
20421 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmin) {
20422 for (uint32_t channels = 25; channels < 48; channels++) {
20423 DWConvMicrokernelTester()
20424 .cr(24)
20425 .kr(25)
20426 .channels(channels)
20427 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020428 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020429 }
20430 }
20431
20432 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmax) {
20433 for (uint32_t channels = 25; channels < 48; channels++) {
20434 DWConvMicrokernelTester()
20435 .cr(24)
20436 .kr(25)
20437 .channels(channels)
20438 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020439 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020440 }
20441 }
20442
20443 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel) {
20444 for (size_t channels = 1; channels <= 120; channels += 23) {
20445 DWConvMicrokernelTester()
20446 .cr(24)
20447 .kr(25)
20448 .channels(channels)
20449 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020450 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020451 }
20452 }
20453
20454 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_step) {
20455 for (size_t channels = 1; channels <= 120; channels += 23) {
20456 for (size_t step = 2; step <= 25; step++) {
20457 DWConvMicrokernelTester()
20458 .cr(24)
20459 .kr(25)
20460 .channels(channels)
20461 .width(3)
20462 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020463 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020464 }
20465 }
20466 }
20467
20468 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
20469 for (size_t channels = 1; channels <= 120; channels += 23) {
20470 DWConvMicrokernelTester()
20471 .cr(24)
20472 .kr(25)
20473 .channels(24)
20474 .width(5)
20475 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080020476 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020477 }
20478 }
20479
20480 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmin) {
20481 for (size_t channels = 1; channels <= 120; channels += 23) {
20482 DWConvMicrokernelTester()
20483 .cr(24)
20484 .kr(25)
20485 .channels(channels)
20486 .width(3)
20487 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020488 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020489 }
20490 }
20491
20492 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmax) {
20493 for (size_t channels = 1; channels <= 120; channels += 23) {
20494 DWConvMicrokernelTester()
20495 .cr(24)
20496 .kr(25)
20497 .channels(channels)
20498 .width(3)
20499 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020500 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020501 }
20502 }
20503
20504 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, input_offset) {
20505 for (uint32_t channels = 48; channels < 384; channels += 72) {
20506 DWConvMicrokernelTester()
20507 .cr(24)
20508 .kr(25)
20509 .channels(channels)
20510 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080020511 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020512 }
20513 }
20514
20515 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, zero) {
20516 for (uint32_t mz = 0; mz < 25; mz++) {
20517 for (uint32_t channels = 48; channels < 384; channels += 72) {
20518 DWConvMicrokernelTester()
20519 .cr(24)
20520 .kr(25)
20521 .channels(channels)
20522 .input_offset(464)
20523 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020524 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan69aa6232021-06-30 14:17:26 -070020525 }
20526 }
20527 }
Marat Dukhan4c617792021-12-21 15:47:58 -080020528#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan69aa6232021-06-30 14:17:26 -070020529
20530
Marat Dukhan4c617792021-12-21 15:47:58 -080020531#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070020532 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_eq_8) {
20533 DWConvMicrokernelTester()
20534 .cr(8)
20535 .kr(25)
20536 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020537 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020538 }
20539
20540 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8) {
20541 for (uint32_t channels = 16; channels < 128; channels += 24) {
20542 DWConvMicrokernelTester()
20543 .cr(8)
20544 .kr(25)
20545 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020546 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020547 }
20548 }
20549
20550 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
20551 for (uint32_t channels = 16; channels < 128; channels += 24) {
20552 DWConvMicrokernelTester()
20553 .cr(8)
20554 .kr(25)
20555 .channels(channels)
20556 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020557 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020558 }
20559 }
20560
20561 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
20562 for (uint32_t channels = 16; channels < 128; channels += 24) {
20563 DWConvMicrokernelTester()
20564 .cr(8)
20565 .kr(25)
20566 .channels(channels)
20567 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020568 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020569 }
20570 }
20571
20572 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_lt_8) {
20573 for (uint32_t channels = 1; channels < 8; channels++) {
20574 DWConvMicrokernelTester()
20575 .cr(8)
20576 .kr(25)
20577 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020578 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020579 }
20580 }
20581
20582 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8) {
20583 for (uint32_t channels = 9; channels < 16; channels++) {
20584 DWConvMicrokernelTester()
20585 .cr(8)
20586 .kr(25)
20587 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020588 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020589 }
20590 }
20591
20592 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
20593 for (uint32_t channels = 9; channels < 16; channels++) {
20594 DWConvMicrokernelTester()
20595 .cr(8)
20596 .kr(25)
20597 .channels(channels)
20598 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020599 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020600 }
20601 }
20602
20603 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
20604 for (uint32_t channels = 9; channels < 16; channels++) {
20605 DWConvMicrokernelTester()
20606 .cr(8)
20607 .kr(25)
20608 .channels(channels)
20609 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020610 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020611 }
20612 }
20613
20614 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel) {
20615 for (size_t channels = 1; channels <= 40; channels += 7) {
20616 DWConvMicrokernelTester()
20617 .cr(8)
20618 .kr(25)
20619 .channels(channels)
20620 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020621 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020622 }
20623 }
20624
20625 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
20626 for (size_t channels = 1; channels <= 40; channels += 7) {
20627 for (size_t step = 2; step <= 25; step++) {
20628 DWConvMicrokernelTester()
20629 .cr(8)
20630 .kr(25)
20631 .channels(channels)
20632 .width(3)
20633 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020634 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020635 }
20636 }
20637 }
20638
20639 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
20640 for (size_t channels = 1; channels <= 40; channels += 7) {
20641 DWConvMicrokernelTester()
20642 .cr(8)
20643 .kr(25)
20644 .channels(8)
20645 .width(5)
20646 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020647 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020648 }
20649 }
20650
20651 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
20652 for (size_t channels = 1; channels <= 40; channels += 7) {
20653 DWConvMicrokernelTester()
20654 .cr(8)
20655 .kr(25)
20656 .channels(channels)
20657 .width(3)
20658 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020659 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020660 }
20661 }
20662
20663 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
20664 for (size_t channels = 1; channels <= 40; channels += 7) {
20665 DWConvMicrokernelTester()
20666 .cr(8)
20667 .kr(25)
20668 .channels(channels)
20669 .width(3)
20670 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020671 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020672 }
20673 }
20674
20675 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, input_offset) {
20676 for (uint32_t channels = 16; channels < 128; channels += 24) {
20677 DWConvMicrokernelTester()
20678 .cr(8)
20679 .kr(25)
20680 .channels(channels)
20681 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080020682 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020683 }
20684 }
20685
20686 TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, zero) {
20687 for (uint32_t mz = 0; mz < 25; mz++) {
20688 for (uint32_t channels = 16; channels < 128; channels += 24) {
20689 DWConvMicrokernelTester()
20690 .cr(8)
20691 .kr(25)
20692 .channels(channels)
20693 .input_offset(176)
20694 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020695 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020696 }
20697 }
20698 }
Marat Dukhan4c617792021-12-21 15:47:58 -080020699#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070020700
20701
Marat Dukhan4c617792021-12-21 15:47:58 -080020702#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070020703 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_eq_16) {
20704 DWConvMicrokernelTester()
20705 .cr(16)
20706 .kr(25)
20707 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080020708 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020709 }
20710
20711 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16) {
20712 for (uint32_t channels = 32; channels < 256; channels += 48) {
20713 DWConvMicrokernelTester()
20714 .cr(16)
20715 .kr(25)
20716 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020717 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020718 }
20719 }
20720
20721 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
20722 for (uint32_t channels = 32; channels < 256; channels += 48) {
20723 DWConvMicrokernelTester()
20724 .cr(16)
20725 .kr(25)
20726 .channels(channels)
20727 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020728 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020729 }
20730 }
20731
20732 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
20733 for (uint32_t channels = 32; channels < 256; channels += 48) {
20734 DWConvMicrokernelTester()
20735 .cr(16)
20736 .kr(25)
20737 .channels(channels)
20738 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020739 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020740 }
20741 }
20742
20743 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_lt_16) {
20744 for (uint32_t channels = 1; channels < 16; channels++) {
20745 DWConvMicrokernelTester()
20746 .cr(16)
20747 .kr(25)
20748 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020749 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020750 }
20751 }
20752
20753 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16) {
20754 for (uint32_t channels = 17; channels < 32; channels++) {
20755 DWConvMicrokernelTester()
20756 .cr(16)
20757 .kr(25)
20758 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020759 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020760 }
20761 }
20762
20763 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
20764 for (uint32_t channels = 17; channels < 32; channels++) {
20765 DWConvMicrokernelTester()
20766 .cr(16)
20767 .kr(25)
20768 .channels(channels)
20769 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020770 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020771 }
20772 }
20773
20774 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
20775 for (uint32_t channels = 17; channels < 32; channels++) {
20776 DWConvMicrokernelTester()
20777 .cr(16)
20778 .kr(25)
20779 .channels(channels)
20780 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020781 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020782 }
20783 }
20784
20785 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel) {
20786 for (size_t channels = 1; channels <= 80; channels += 15) {
20787 DWConvMicrokernelTester()
20788 .cr(16)
20789 .kr(25)
20790 .channels(channels)
20791 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020792 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020793 }
20794 }
20795
20796 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
20797 for (size_t channels = 1; channels <= 80; channels += 15) {
20798 for (size_t step = 2; step <= 25; step++) {
20799 DWConvMicrokernelTester()
20800 .cr(16)
20801 .kr(25)
20802 .channels(channels)
20803 .width(3)
20804 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020805 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020806 }
20807 }
20808 }
20809
20810 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
20811 for (size_t channels = 1; channels <= 80; channels += 15) {
20812 DWConvMicrokernelTester()
20813 .cr(16)
20814 .kr(25)
20815 .channels(16)
20816 .width(5)
20817 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020818 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020819 }
20820 }
20821
20822 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
20823 for (size_t channels = 1; channels <= 80; channels += 15) {
20824 DWConvMicrokernelTester()
20825 .cr(16)
20826 .kr(25)
20827 .channels(channels)
20828 .width(3)
20829 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020830 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020831 }
20832 }
20833
20834 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
20835 for (size_t channels = 1; channels <= 80; channels += 15) {
20836 DWConvMicrokernelTester()
20837 .cr(16)
20838 .kr(25)
20839 .channels(channels)
20840 .width(3)
20841 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020842 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020843 }
20844 }
20845
20846 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, input_offset) {
20847 for (uint32_t channels = 32; channels < 256; channels += 48) {
20848 DWConvMicrokernelTester()
20849 .cr(16)
20850 .kr(25)
20851 .channels(channels)
20852 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080020853 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020854 }
20855 }
20856
20857 TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, zero) {
20858 for (uint32_t mz = 0; mz < 25; mz++) {
20859 for (uint32_t channels = 32; channels < 256; channels += 48) {
20860 DWConvMicrokernelTester()
20861 .cr(16)
20862 .kr(25)
20863 .channels(channels)
20864 .input_offset(304)
20865 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020866 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020867 }
20868 }
20869 }
Marat Dukhan4c617792021-12-21 15:47:58 -080020870#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070020871
20872
Marat Dukhan4c617792021-12-21 15:47:58 -080020873#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070020874 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_eq_24) {
20875 DWConvMicrokernelTester()
20876 .cr(24)
20877 .kr(25)
20878 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080020879 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020880 }
20881
20882 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24) {
20883 for (uint32_t channels = 48; channels < 384; channels += 72) {
20884 DWConvMicrokernelTester()
20885 .cr(24)
20886 .kr(25)
20887 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020888 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020889 }
20890 }
20891
20892 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
20893 for (uint32_t channels = 48; channels < 384; channels += 72) {
20894 DWConvMicrokernelTester()
20895 .cr(24)
20896 .kr(25)
20897 .channels(channels)
20898 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020899 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020900 }
20901 }
20902
20903 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
20904 for (uint32_t channels = 48; channels < 384; channels += 72) {
20905 DWConvMicrokernelTester()
20906 .cr(24)
20907 .kr(25)
20908 .channels(channels)
20909 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020910 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020911 }
20912 }
20913
20914 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_lt_24) {
20915 for (uint32_t channels = 1; channels < 24; channels++) {
20916 DWConvMicrokernelTester()
20917 .cr(24)
20918 .kr(25)
20919 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020920 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020921 }
20922 }
20923
20924 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24) {
20925 for (uint32_t channels = 25; channels < 48; channels++) {
20926 DWConvMicrokernelTester()
20927 .cr(24)
20928 .kr(25)
20929 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020930 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020931 }
20932 }
20933
20934 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
20935 for (uint32_t channels = 25; channels < 48; channels++) {
20936 DWConvMicrokernelTester()
20937 .cr(24)
20938 .kr(25)
20939 .channels(channels)
20940 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020941 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020942 }
20943 }
20944
20945 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
20946 for (uint32_t channels = 25; channels < 48; channels++) {
20947 DWConvMicrokernelTester()
20948 .cr(24)
20949 .kr(25)
20950 .channels(channels)
20951 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020952 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020953 }
20954 }
20955
20956 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel) {
20957 for (size_t channels = 1; channels <= 120; channels += 23) {
20958 DWConvMicrokernelTester()
20959 .cr(24)
20960 .kr(25)
20961 .channels(channels)
20962 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020963 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020964 }
20965 }
20966
20967 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
20968 for (size_t channels = 1; channels <= 120; channels += 23) {
20969 for (size_t step = 2; step <= 25; step++) {
20970 DWConvMicrokernelTester()
20971 .cr(24)
20972 .kr(25)
20973 .channels(channels)
20974 .width(3)
20975 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020976 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020977 }
20978 }
20979 }
20980
20981 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
20982 for (size_t channels = 1; channels <= 120; channels += 23) {
20983 DWConvMicrokernelTester()
20984 .cr(24)
20985 .kr(25)
20986 .channels(24)
20987 .width(5)
20988 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080020989 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070020990 }
20991 }
20992
20993 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
20994 for (size_t channels = 1; channels <= 120; channels += 23) {
20995 DWConvMicrokernelTester()
20996 .cr(24)
20997 .kr(25)
20998 .channels(channels)
20999 .width(3)
21000 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021001 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070021002 }
21003 }
21004
21005 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
21006 for (size_t channels = 1; channels <= 120; channels += 23) {
21007 DWConvMicrokernelTester()
21008 .cr(24)
21009 .kr(25)
21010 .channels(channels)
21011 .width(3)
21012 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021013 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070021014 }
21015 }
21016
21017 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, input_offset) {
21018 for (uint32_t channels = 48; channels < 384; channels += 72) {
21019 DWConvMicrokernelTester()
21020 .cr(24)
21021 .kr(25)
21022 .channels(channels)
21023 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080021024 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070021025 }
21026 }
21027
21028 TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, zero) {
21029 for (uint32_t mz = 0; mz < 25; mz++) {
21030 for (uint32_t channels = 48; channels < 384; channels += 72) {
21031 DWConvMicrokernelTester()
21032 .cr(24)
21033 .kr(25)
21034 .channels(channels)
21035 .input_offset(464)
21036 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021037 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070021038 }
21039 }
21040 }
Marat Dukhan4c617792021-12-21 15:47:58 -080021041#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070021042
21043
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021044#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21045 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_eq_1) {
21046 DWConvMicrokernelTester()
21047 .cr(1)
21048 .kr(25)
21049 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021050 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021051 }
21052
21053 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1) {
21054 for (uint32_t channels = 2; channels < 10; channels++) {
21055 DWConvMicrokernelTester()
21056 .cr(1)
21057 .kr(25)
21058 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021059 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021060 }
21061 }
21062
21063 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmin) {
21064 for (uint32_t channels = 2; channels < 10; channels++) {
21065 DWConvMicrokernelTester()
21066 .cr(1)
21067 .kr(25)
21068 .channels(channels)
21069 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021070 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021071 }
21072 }
21073
21074 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmax) {
21075 for (uint32_t channels = 2; channels < 10; channels++) {
21076 DWConvMicrokernelTester()
21077 .cr(1)
21078 .kr(25)
21079 .channels(channels)
21080 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021081 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021082 }
21083 }
21084
21085 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel) {
21086 for (size_t channels = 1; channels <= 5; channels += 1) {
21087 DWConvMicrokernelTester()
21088 .cr(1)
21089 .kr(25)
21090 .channels(channels)
21091 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021092 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021093 }
21094 }
21095
21096 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_step) {
21097 for (size_t channels = 1; channels <= 5; channels += 1) {
21098 for (size_t step = 2; step <= 25; step++) {
21099 DWConvMicrokernelTester()
21100 .cr(1)
21101 .kr(25)
21102 .channels(channels)
21103 .width(3)
21104 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021105 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021106 }
21107 }
21108 }
21109
21110 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_output_stride) {
21111 for (size_t channels = 1; channels <= 5; channels += 1) {
21112 DWConvMicrokernelTester()
21113 .cr(1)
21114 .kr(25)
21115 .channels(1)
21116 .width(5)
21117 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021118 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021119 }
21120 }
21121
21122 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmin) {
21123 for (size_t channels = 1; channels <= 5; channels += 1) {
21124 DWConvMicrokernelTester()
21125 .cr(1)
21126 .kr(25)
21127 .channels(channels)
21128 .width(3)
21129 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021130 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021131 }
21132 }
21133
21134 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmax) {
21135 for (size_t channels = 1; channels <= 5; channels += 1) {
21136 DWConvMicrokernelTester()
21137 .cr(1)
21138 .kr(25)
21139 .channels(channels)
21140 .width(3)
21141 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021142 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021143 }
21144 }
21145
21146 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, input_offset) {
21147 for (uint32_t channels = 2; channels < 16; channels += 3) {
21148 DWConvMicrokernelTester()
21149 .cr(1)
21150 .kr(25)
21151 .channels(channels)
21152 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080021153 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021154 }
21155 }
21156
21157 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, zero) {
21158 for (uint32_t mz = 0; mz < 25; mz++) {
21159 for (uint32_t channels = 2; channels < 16; channels += 3) {
21160 DWConvMicrokernelTester()
21161 .cr(1)
21162 .kr(25)
21163 .channels(channels)
21164 .input_offset(48)
21165 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021166 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021167 }
21168 }
21169 }
21170#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21171
21172
21173#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21174 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_eq_2) {
21175 DWConvMicrokernelTester()
21176 .cr(2)
21177 .kr(25)
21178 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080021179 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021180 }
21181
21182 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2) {
21183 for (uint32_t channels = 4; channels < 32; channels += 6) {
21184 DWConvMicrokernelTester()
21185 .cr(2)
21186 .kr(25)
21187 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021188 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021189 }
21190 }
21191
21192 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmin) {
21193 for (uint32_t channels = 4; channels < 32; channels += 6) {
21194 DWConvMicrokernelTester()
21195 .cr(2)
21196 .kr(25)
21197 .channels(channels)
21198 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021199 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021200 }
21201 }
21202
21203 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmax) {
21204 for (uint32_t channels = 4; channels < 32; channels += 6) {
21205 DWConvMicrokernelTester()
21206 .cr(2)
21207 .kr(25)
21208 .channels(channels)
21209 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021210 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021211 }
21212 }
21213
21214 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_lt_2) {
21215 for (uint32_t channels = 1; channels < 2; channels++) {
21216 DWConvMicrokernelTester()
21217 .cr(2)
21218 .kr(25)
21219 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021220 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021221 }
21222 }
21223
21224 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2) {
21225 for (uint32_t channels = 3; channels < 4; channels++) {
21226 DWConvMicrokernelTester()
21227 .cr(2)
21228 .kr(25)
21229 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021230 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021231 }
21232 }
21233
21234 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmin) {
21235 for (uint32_t channels = 3; channels < 4; channels++) {
21236 DWConvMicrokernelTester()
21237 .cr(2)
21238 .kr(25)
21239 .channels(channels)
21240 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021241 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021242 }
21243 }
21244
21245 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmax) {
21246 for (uint32_t channels = 3; channels < 4; channels++) {
21247 DWConvMicrokernelTester()
21248 .cr(2)
21249 .kr(25)
21250 .channels(channels)
21251 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021252 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021253 }
21254 }
21255
21256 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel) {
21257 for (size_t channels = 1; channels <= 10; channels += 1) {
21258 DWConvMicrokernelTester()
21259 .cr(2)
21260 .kr(25)
21261 .channels(channels)
21262 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021263 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021264 }
21265 }
21266
21267 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_step) {
21268 for (size_t channels = 1; channels <= 10; channels += 1) {
21269 for (size_t step = 2; step <= 25; step++) {
21270 DWConvMicrokernelTester()
21271 .cr(2)
21272 .kr(25)
21273 .channels(channels)
21274 .width(3)
21275 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021276 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021277 }
21278 }
21279 }
21280
21281 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_output_stride) {
21282 for (size_t channels = 1; channels <= 10; channels += 1) {
21283 DWConvMicrokernelTester()
21284 .cr(2)
21285 .kr(25)
21286 .channels(2)
21287 .width(5)
21288 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080021289 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021290 }
21291 }
21292
21293 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmin) {
21294 for (size_t channels = 1; channels <= 10; channels += 1) {
21295 DWConvMicrokernelTester()
21296 .cr(2)
21297 .kr(25)
21298 .channels(channels)
21299 .width(3)
21300 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021301 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021302 }
21303 }
21304
21305 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmax) {
21306 for (size_t channels = 1; channels <= 10; channels += 1) {
21307 DWConvMicrokernelTester()
21308 .cr(2)
21309 .kr(25)
21310 .channels(channels)
21311 .width(3)
21312 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021313 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021314 }
21315 }
21316
21317 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, input_offset) {
21318 for (uint32_t channels = 4; channels < 32; channels += 6) {
21319 DWConvMicrokernelTester()
21320 .cr(2)
21321 .kr(25)
21322 .channels(channels)
21323 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080021324 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021325 }
21326 }
21327
21328 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, zero) {
21329 for (uint32_t mz = 0; mz < 25; mz++) {
21330 for (uint32_t channels = 4; channels < 32; channels += 6) {
21331 DWConvMicrokernelTester()
21332 .cr(2)
21333 .kr(25)
21334 .channels(channels)
21335 .input_offset(80)
21336 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021337 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021338 }
21339 }
21340 }
21341#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21342
21343
21344#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21345 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_eq_4) {
21346 DWConvMicrokernelTester()
21347 .cr(4)
21348 .kr(25)
21349 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080021350 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021351 }
21352
21353 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4) {
21354 for (uint32_t channels = 8; channels < 64; channels += 12) {
21355 DWConvMicrokernelTester()
21356 .cr(4)
21357 .kr(25)
21358 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021359 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021360 }
21361 }
21362
21363 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmin) {
21364 for (uint32_t channels = 8; channels < 64; channels += 12) {
21365 DWConvMicrokernelTester()
21366 .cr(4)
21367 .kr(25)
21368 .channels(channels)
21369 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021370 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021371 }
21372 }
21373
21374 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmax) {
21375 for (uint32_t channels = 8; channels < 64; channels += 12) {
21376 DWConvMicrokernelTester()
21377 .cr(4)
21378 .kr(25)
21379 .channels(channels)
21380 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021381 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021382 }
21383 }
21384
21385 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_lt_4) {
21386 for (uint32_t channels = 1; channels < 4; channels++) {
21387 DWConvMicrokernelTester()
21388 .cr(4)
21389 .kr(25)
21390 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021391 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021392 }
21393 }
21394
21395 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4) {
21396 for (uint32_t channels = 5; channels < 8; channels++) {
21397 DWConvMicrokernelTester()
21398 .cr(4)
21399 .kr(25)
21400 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021401 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021402 }
21403 }
21404
21405 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmin) {
21406 for (uint32_t channels = 5; channels < 8; channels++) {
21407 DWConvMicrokernelTester()
21408 .cr(4)
21409 .kr(25)
21410 .channels(channels)
21411 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021412 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021413 }
21414 }
21415
21416 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmax) {
21417 for (uint32_t channels = 5; channels < 8; channels++) {
21418 DWConvMicrokernelTester()
21419 .cr(4)
21420 .kr(25)
21421 .channels(channels)
21422 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021423 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021424 }
21425 }
21426
21427 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel) {
21428 for (size_t channels = 1; channels <= 20; channels += 3) {
21429 DWConvMicrokernelTester()
21430 .cr(4)
21431 .kr(25)
21432 .channels(channels)
21433 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021434 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021435 }
21436 }
21437
21438 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_step) {
21439 for (size_t channels = 1; channels <= 20; channels += 3) {
21440 for (size_t step = 2; step <= 25; step++) {
21441 DWConvMicrokernelTester()
21442 .cr(4)
21443 .kr(25)
21444 .channels(channels)
21445 .width(3)
21446 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021447 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021448 }
21449 }
21450 }
21451
21452 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_output_stride) {
21453 for (size_t channels = 1; channels <= 20; channels += 3) {
21454 DWConvMicrokernelTester()
21455 .cr(4)
21456 .kr(25)
21457 .channels(4)
21458 .width(5)
21459 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080021460 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021461 }
21462 }
21463
21464 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmin) {
21465 for (size_t channels = 1; channels <= 20; channels += 3) {
21466 DWConvMicrokernelTester()
21467 .cr(4)
21468 .kr(25)
21469 .channels(channels)
21470 .width(3)
21471 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021472 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021473 }
21474 }
21475
21476 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmax) {
21477 for (size_t channels = 1; channels <= 20; channels += 3) {
21478 DWConvMicrokernelTester()
21479 .cr(4)
21480 .kr(25)
21481 .channels(channels)
21482 .width(3)
21483 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021484 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021485 }
21486 }
21487
21488 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, input_offset) {
21489 for (uint32_t channels = 8; channels < 64; channels += 12) {
21490 DWConvMicrokernelTester()
21491 .cr(4)
21492 .kr(25)
21493 .channels(channels)
21494 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080021495 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021496 }
21497 }
21498
21499 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, zero) {
21500 for (uint32_t mz = 0; mz < 25; mz++) {
21501 for (uint32_t channels = 8; channels < 64; channels += 12) {
21502 DWConvMicrokernelTester()
21503 .cr(4)
21504 .kr(25)
21505 .channels(channels)
21506 .input_offset(112)
21507 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021508 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080021509 }
21510 }
21511 }
21512#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21513
21514
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021515TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021516 DWConvMicrokernelTester()
21517 .cr(1)
21518 .kr(25)
21519 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021520 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021521}
21522
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021523TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021524 for (uint32_t channels = 2; channels < 10; channels++) {
21525 DWConvMicrokernelTester()
21526 .cr(1)
21527 .kr(25)
21528 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021529 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021530 }
21531}
21532
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021533TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021534 for (uint32_t channels = 2; channels < 10; channels++) {
21535 DWConvMicrokernelTester()
21536 .cr(1)
21537 .kr(25)
21538 .channels(channels)
21539 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021540 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021541 }
21542}
21543
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021544TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021545 for (uint32_t channels = 2; channels < 10; channels++) {
21546 DWConvMicrokernelTester()
21547 .cr(1)
21548 .kr(25)
21549 .channels(channels)
21550 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021551 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021552 }
21553}
21554
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021555TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021556 for (size_t channels = 1; channels <= 5; channels += 1) {
21557 DWConvMicrokernelTester()
21558 .cr(1)
21559 .kr(25)
21560 .channels(channels)
21561 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021562 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021563 }
21564}
21565
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021566TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021567 for (size_t channels = 1; channels <= 5; channels += 1) {
21568 for (size_t step = 2; step <= 25; step++) {
21569 DWConvMicrokernelTester()
21570 .cr(1)
21571 .kr(25)
21572 .channels(channels)
21573 .width(3)
21574 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021575 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021576 }
21577 }
21578}
21579
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021580TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021581 for (size_t channels = 1; channels <= 5; channels += 1) {
21582 DWConvMicrokernelTester()
21583 .cr(1)
21584 .kr(25)
21585 .channels(1)
21586 .width(5)
21587 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021588 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021589 }
21590}
21591
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021592TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021593 for (size_t channels = 1; channels <= 5; channels += 1) {
21594 DWConvMicrokernelTester()
21595 .cr(1)
21596 .kr(25)
21597 .channels(channels)
21598 .width(3)
21599 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021600 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021601 }
21602}
21603
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021604TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021605 for (size_t channels = 1; channels <= 5; channels += 1) {
21606 DWConvMicrokernelTester()
21607 .cr(1)
21608 .kr(25)
21609 .channels(channels)
21610 .width(3)
21611 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021612 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021613 }
21614}
21615
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021616TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, input_offset) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021617 for (uint32_t channels = 2; channels < 16; channels += 3) {
21618 DWConvMicrokernelTester()
21619 .cr(1)
21620 .kr(25)
21621 .channels(channels)
21622 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080021623 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021624 }
21625}
21626
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021627TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, zero) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021628 for (uint32_t mz = 0; mz < 25; mz++) {
21629 for (uint32_t channels = 2; channels < 16; channels += 3) {
21630 DWConvMicrokernelTester()
21631 .cr(1)
21632 .kr(25)
21633 .channels(channels)
21634 .input_offset(48)
21635 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021636 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021637 }
21638 }
21639}
21640
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021641TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_eq_2) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021642 DWConvMicrokernelTester()
21643 .cr(2)
21644 .kr(25)
21645 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080021646 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021647}
21648
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021649TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021650 for (uint32_t channels = 4; channels < 32; channels += 6) {
21651 DWConvMicrokernelTester()
21652 .cr(2)
21653 .kr(25)
21654 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021655 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021656 }
21657}
21658
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021659TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021660 for (uint32_t channels = 4; channels < 32; channels += 6) {
21661 DWConvMicrokernelTester()
21662 .cr(2)
21663 .kr(25)
21664 .channels(channels)
21665 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021666 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021667 }
21668}
21669
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021670TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021671 for (uint32_t channels = 4; channels < 32; channels += 6) {
21672 DWConvMicrokernelTester()
21673 .cr(2)
21674 .kr(25)
21675 .channels(channels)
21676 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021677 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021678 }
21679}
21680
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021681TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_lt_2) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021682 for (uint32_t channels = 1; channels < 2; channels++) {
21683 DWConvMicrokernelTester()
21684 .cr(2)
21685 .kr(25)
21686 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021687 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021688 }
21689}
21690
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021691TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021692 for (uint32_t channels = 3; channels < 4; channels++) {
21693 DWConvMicrokernelTester()
21694 .cr(2)
21695 .kr(25)
21696 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021697 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021698 }
21699}
21700
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021701TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021702 for (uint32_t channels = 3; channels < 4; channels++) {
21703 DWConvMicrokernelTester()
21704 .cr(2)
21705 .kr(25)
21706 .channels(channels)
21707 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021708 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021709 }
21710}
21711
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021712TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021713 for (uint32_t channels = 3; channels < 4; channels++) {
21714 DWConvMicrokernelTester()
21715 .cr(2)
21716 .kr(25)
21717 .channels(channels)
21718 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021719 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021720 }
21721}
21722
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021723TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021724 for (size_t channels = 1; channels <= 10; channels += 1) {
21725 DWConvMicrokernelTester()
21726 .cr(2)
21727 .kr(25)
21728 .channels(channels)
21729 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021730 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021731 }
21732}
21733
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021734TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021735 for (size_t channels = 1; channels <= 10; channels += 1) {
21736 for (size_t step = 2; step <= 25; step++) {
21737 DWConvMicrokernelTester()
21738 .cr(2)
21739 .kr(25)
21740 .channels(channels)
21741 .width(3)
21742 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021743 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021744 }
21745 }
21746}
21747
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021748TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021749 for (size_t channels = 1; channels <= 10; channels += 1) {
21750 DWConvMicrokernelTester()
21751 .cr(2)
21752 .kr(25)
21753 .channels(2)
21754 .width(5)
21755 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080021756 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021757 }
21758}
21759
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021760TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021761 for (size_t channels = 1; channels <= 10; channels += 1) {
21762 DWConvMicrokernelTester()
21763 .cr(2)
21764 .kr(25)
21765 .channels(channels)
21766 .width(3)
21767 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021768 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021769 }
21770}
21771
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021772TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021773 for (size_t channels = 1; channels <= 10; channels += 1) {
21774 DWConvMicrokernelTester()
21775 .cr(2)
21776 .kr(25)
21777 .channels(channels)
21778 .width(3)
21779 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021780 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021781 }
21782}
21783
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021784TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, input_offset) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021785 for (uint32_t channels = 4; channels < 32; channels += 6) {
21786 DWConvMicrokernelTester()
21787 .cr(2)
21788 .kr(25)
21789 .channels(channels)
21790 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080021791 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021792 }
21793}
21794
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021795TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, zero) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021796 for (uint32_t mz = 0; mz < 25; mz++) {
21797 for (uint32_t channels = 4; channels < 32; channels += 6) {
21798 DWConvMicrokernelTester()
21799 .cr(2)
21800 .kr(25)
21801 .channels(channels)
21802 .input_offset(80)
21803 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021804 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021805 }
21806 }
21807}
21808
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021809TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_eq_4) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021810 DWConvMicrokernelTester()
21811 .cr(4)
21812 .kr(25)
21813 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080021814 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021815}
21816
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021817TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021818 for (uint32_t channels = 8; channels < 64; channels += 12) {
21819 DWConvMicrokernelTester()
21820 .cr(4)
21821 .kr(25)
21822 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021823 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021824 }
21825}
21826
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021827TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021828 for (uint32_t channels = 8; channels < 64; channels += 12) {
21829 DWConvMicrokernelTester()
21830 .cr(4)
21831 .kr(25)
21832 .channels(channels)
21833 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021834 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021835 }
21836}
21837
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021838TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021839 for (uint32_t channels = 8; channels < 64; channels += 12) {
21840 DWConvMicrokernelTester()
21841 .cr(4)
21842 .kr(25)
21843 .channels(channels)
21844 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021845 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021846 }
21847}
21848
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021849TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_lt_4) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021850 for (uint32_t channels = 1; channels < 4; channels++) {
21851 DWConvMicrokernelTester()
21852 .cr(4)
21853 .kr(25)
21854 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021855 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021856 }
21857}
21858
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021859TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021860 for (uint32_t channels = 5; channels < 8; channels++) {
21861 DWConvMicrokernelTester()
21862 .cr(4)
21863 .kr(25)
21864 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021865 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021866 }
21867}
21868
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021869TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021870 for (uint32_t channels = 5; channels < 8; channels++) {
21871 DWConvMicrokernelTester()
21872 .cr(4)
21873 .kr(25)
21874 .channels(channels)
21875 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021876 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021877 }
21878}
21879
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021880TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021881 for (uint32_t channels = 5; channels < 8; channels++) {
21882 DWConvMicrokernelTester()
21883 .cr(4)
21884 .kr(25)
21885 .channels(channels)
21886 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021887 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021888 }
21889}
21890
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021891TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021892 for (size_t channels = 1; channels <= 20; channels += 3) {
21893 DWConvMicrokernelTester()
21894 .cr(4)
21895 .kr(25)
21896 .channels(channels)
21897 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021898 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021899 }
21900}
21901
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021902TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021903 for (size_t channels = 1; channels <= 20; channels += 3) {
21904 for (size_t step = 2; step <= 25; step++) {
21905 DWConvMicrokernelTester()
21906 .cr(4)
21907 .kr(25)
21908 .channels(channels)
21909 .width(3)
21910 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021911 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021912 }
21913 }
21914}
21915
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021916TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021917 for (size_t channels = 1; channels <= 20; channels += 3) {
21918 DWConvMicrokernelTester()
21919 .cr(4)
21920 .kr(25)
21921 .channels(4)
21922 .width(5)
21923 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080021924 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021925 }
21926}
21927
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021928TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021929 for (size_t channels = 1; channels <= 20; channels += 3) {
21930 DWConvMicrokernelTester()
21931 .cr(4)
21932 .kr(25)
21933 .channels(channels)
21934 .width(3)
21935 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021936 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021937 }
21938}
21939
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021940TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021941 for (size_t channels = 1; channels <= 20; channels += 3) {
21942 DWConvMicrokernelTester()
21943 .cr(4)
21944 .kr(25)
21945 .channels(channels)
21946 .width(3)
21947 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021948 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021949 }
21950}
21951
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021952TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, input_offset) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021953 for (uint32_t channels = 8; channels < 64; channels += 12) {
21954 DWConvMicrokernelTester()
21955 .cr(4)
21956 .kr(25)
21957 .channels(channels)
21958 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080021959 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021960 }
21961}
21962
Marat Dukhan2ac722e2022-01-04 01:54:20 -080021963TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, zero) {
Marat Dukhan85d772b2021-06-30 11:02:42 -070021964 for (uint32_t mz = 0; mz < 25; mz++) {
21965 for (uint32_t channels = 8; channels < 64; channels += 12) {
21966 DWConvMicrokernelTester()
21967 .cr(4)
21968 .kr(25)
21969 .channels(channels)
21970 .input_offset(112)
21971 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021972 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan85d772b2021-06-30 11:02:42 -070021973 }
21974 }
Marat Dukhan272d4d92022-01-04 15:07:14 -080021975}
21976
21977TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
21978 DWConvMicrokernelTester()
21979 .cr(1)
21980 .kr(25)
21981 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021982 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080021983}
21984
21985TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
21986 for (uint32_t channels = 2; channels < 10; channels++) {
21987 DWConvMicrokernelTester()
21988 .cr(1)
21989 .kr(25)
21990 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021991 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080021992 }
21993}
21994
21995TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
21996 for (uint32_t channels = 2; channels < 10; channels++) {
21997 DWConvMicrokernelTester()
21998 .cr(1)
21999 .kr(25)
22000 .channels(channels)
22001 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022002 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022003 }
22004}
22005
22006TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
22007 for (uint32_t channels = 2; channels < 10; channels++) {
22008 DWConvMicrokernelTester()
22009 .cr(1)
22010 .kr(25)
22011 .channels(channels)
22012 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022013 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022014 }
22015}
22016
22017TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
22018 for (size_t channels = 1; channels <= 5; channels += 1) {
22019 DWConvMicrokernelTester()
22020 .cr(1)
22021 .kr(25)
22022 .channels(channels)
22023 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022024 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022025 }
22026}
22027
22028TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
22029 for (size_t channels = 1; channels <= 5; channels += 1) {
22030 for (size_t step = 2; step <= 25; step++) {
22031 DWConvMicrokernelTester()
22032 .cr(1)
22033 .kr(25)
22034 .channels(channels)
22035 .width(3)
22036 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022037 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022038 }
22039 }
22040}
22041
22042TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
22043 for (size_t channels = 1; channels <= 5; channels += 1) {
22044 DWConvMicrokernelTester()
22045 .cr(1)
22046 .kr(25)
22047 .channels(1)
22048 .width(5)
22049 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022050 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022051 }
22052}
22053
22054TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
22055 for (size_t channels = 1; channels <= 5; channels += 1) {
22056 DWConvMicrokernelTester()
22057 .cr(1)
22058 .kr(25)
22059 .channels(channels)
22060 .width(3)
22061 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022062 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022063 }
22064}
22065
22066TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
22067 for (size_t channels = 1; channels <= 5; channels += 1) {
22068 DWConvMicrokernelTester()
22069 .cr(1)
22070 .kr(25)
22071 .channels(channels)
22072 .width(3)
22073 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022074 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022075 }
22076}
22077
22078TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
22079 for (uint32_t channels = 2; channels < 16; channels += 3) {
22080 DWConvMicrokernelTester()
22081 .cr(1)
22082 .kr(25)
22083 .channels(channels)
22084 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080022085 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022086 }
22087}
22088
22089TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
22090 for (uint32_t mz = 0; mz < 25; mz++) {
22091 for (uint32_t channels = 2; channels < 16; channels += 3) {
22092 DWConvMicrokernelTester()
22093 .cr(1)
22094 .kr(25)
22095 .channels(channels)
22096 .input_offset(48)
22097 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022098 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022099 }
22100 }
22101}
22102
22103TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
22104 DWConvMicrokernelTester()
22105 .cr(2)
22106 .kr(25)
22107 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080022108 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022109}
22110
22111TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
22112 for (uint32_t channels = 4; channels < 32; channels += 6) {
22113 DWConvMicrokernelTester()
22114 .cr(2)
22115 .kr(25)
22116 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022117 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022118 }
22119}
22120
22121TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
22122 for (uint32_t channels = 4; channels < 32; channels += 6) {
22123 DWConvMicrokernelTester()
22124 .cr(2)
22125 .kr(25)
22126 .channels(channels)
22127 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022128 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022129 }
22130}
22131
22132TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
22133 for (uint32_t channels = 4; channels < 32; channels += 6) {
22134 DWConvMicrokernelTester()
22135 .cr(2)
22136 .kr(25)
22137 .channels(channels)
22138 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022139 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022140 }
22141}
22142
22143TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
22144 for (uint32_t channels = 1; channels < 2; channels++) {
22145 DWConvMicrokernelTester()
22146 .cr(2)
22147 .kr(25)
22148 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022149 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022150 }
22151}
22152
22153TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
22154 for (uint32_t channels = 3; channels < 4; channels++) {
22155 DWConvMicrokernelTester()
22156 .cr(2)
22157 .kr(25)
22158 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022159 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022160 }
22161}
22162
22163TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
22164 for (uint32_t channels = 3; channels < 4; channels++) {
22165 DWConvMicrokernelTester()
22166 .cr(2)
22167 .kr(25)
22168 .channels(channels)
22169 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022170 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022171 }
22172}
22173
22174TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
22175 for (uint32_t channels = 3; channels < 4; channels++) {
22176 DWConvMicrokernelTester()
22177 .cr(2)
22178 .kr(25)
22179 .channels(channels)
22180 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022181 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022182 }
22183}
22184
22185TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
22186 for (size_t channels = 1; channels <= 10; channels += 1) {
22187 DWConvMicrokernelTester()
22188 .cr(2)
22189 .kr(25)
22190 .channels(channels)
22191 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022192 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022193 }
22194}
22195
22196TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
22197 for (size_t channels = 1; channels <= 10; channels += 1) {
22198 for (size_t step = 2; step <= 25; step++) {
22199 DWConvMicrokernelTester()
22200 .cr(2)
22201 .kr(25)
22202 .channels(channels)
22203 .width(3)
22204 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022205 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022206 }
22207 }
22208}
22209
22210TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
22211 for (size_t channels = 1; channels <= 10; channels += 1) {
22212 DWConvMicrokernelTester()
22213 .cr(2)
22214 .kr(25)
22215 .channels(2)
22216 .width(5)
22217 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080022218 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022219 }
22220}
22221
22222TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
22223 for (size_t channels = 1; channels <= 10; channels += 1) {
22224 DWConvMicrokernelTester()
22225 .cr(2)
22226 .kr(25)
22227 .channels(channels)
22228 .width(3)
22229 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022230 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022231 }
22232}
22233
22234TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
22235 for (size_t channels = 1; channels <= 10; channels += 1) {
22236 DWConvMicrokernelTester()
22237 .cr(2)
22238 .kr(25)
22239 .channels(channels)
22240 .width(3)
22241 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022242 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022243 }
22244}
22245
22246TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
22247 for (uint32_t channels = 4; channels < 32; channels += 6) {
22248 DWConvMicrokernelTester()
22249 .cr(2)
22250 .kr(25)
22251 .channels(channels)
22252 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080022253 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022254 }
22255}
22256
22257TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
22258 for (uint32_t mz = 0; mz < 25; mz++) {
22259 for (uint32_t channels = 4; channels < 32; channels += 6) {
22260 DWConvMicrokernelTester()
22261 .cr(2)
22262 .kr(25)
22263 .channels(channels)
22264 .input_offset(80)
22265 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022266 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022267 }
22268 }
22269}
22270
22271TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
22272 DWConvMicrokernelTester()
22273 .cr(4)
22274 .kr(25)
22275 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080022276 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022277}
22278
22279TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
22280 for (uint32_t channels = 8; channels < 64; channels += 12) {
22281 DWConvMicrokernelTester()
22282 .cr(4)
22283 .kr(25)
22284 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022285 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022286 }
22287}
22288
22289TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
22290 for (uint32_t channels = 8; channels < 64; channels += 12) {
22291 DWConvMicrokernelTester()
22292 .cr(4)
22293 .kr(25)
22294 .channels(channels)
22295 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022296 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022297 }
22298}
22299
22300TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
22301 for (uint32_t channels = 8; channels < 64; channels += 12) {
22302 DWConvMicrokernelTester()
22303 .cr(4)
22304 .kr(25)
22305 .channels(channels)
22306 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022307 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022308 }
22309}
22310
22311TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
22312 for (uint32_t channels = 1; channels < 4; channels++) {
22313 DWConvMicrokernelTester()
22314 .cr(4)
22315 .kr(25)
22316 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022317 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022318 }
22319}
22320
22321TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
22322 for (uint32_t channels = 5; channels < 8; channels++) {
22323 DWConvMicrokernelTester()
22324 .cr(4)
22325 .kr(25)
22326 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022327 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022328 }
22329}
22330
22331TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
22332 for (uint32_t channels = 5; channels < 8; channels++) {
22333 DWConvMicrokernelTester()
22334 .cr(4)
22335 .kr(25)
22336 .channels(channels)
22337 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022338 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022339 }
22340}
22341
22342TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
22343 for (uint32_t channels = 5; channels < 8; channels++) {
22344 DWConvMicrokernelTester()
22345 .cr(4)
22346 .kr(25)
22347 .channels(channels)
22348 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022349 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022350 }
22351}
22352
22353TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
22354 for (size_t channels = 1; channels <= 20; channels += 3) {
22355 DWConvMicrokernelTester()
22356 .cr(4)
22357 .kr(25)
22358 .channels(channels)
22359 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022360 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022361 }
22362}
22363
22364TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
22365 for (size_t channels = 1; channels <= 20; channels += 3) {
22366 for (size_t step = 2; step <= 25; step++) {
22367 DWConvMicrokernelTester()
22368 .cr(4)
22369 .kr(25)
22370 .channels(channels)
22371 .width(3)
22372 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022373 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022374 }
22375 }
22376}
22377
22378TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
22379 for (size_t channels = 1; channels <= 20; channels += 3) {
22380 DWConvMicrokernelTester()
22381 .cr(4)
22382 .kr(25)
22383 .channels(4)
22384 .width(5)
22385 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080022386 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022387 }
22388}
22389
22390TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
22391 for (size_t channels = 1; channels <= 20; channels += 3) {
22392 DWConvMicrokernelTester()
22393 .cr(4)
22394 .kr(25)
22395 .channels(channels)
22396 .width(3)
22397 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022398 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022399 }
22400}
22401
22402TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
22403 for (size_t channels = 1; channels <= 20; channels += 3) {
22404 DWConvMicrokernelTester()
22405 .cr(4)
22406 .kr(25)
22407 .channels(channels)
22408 .width(3)
22409 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022410 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022411 }
22412}
22413
22414TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
22415 for (uint32_t channels = 8; channels < 64; channels += 12) {
22416 DWConvMicrokernelTester()
22417 .cr(4)
22418 .kr(25)
22419 .channels(channels)
22420 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080022421 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022422 }
22423}
22424
22425TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
22426 for (uint32_t mz = 0; mz < 25; mz++) {
22427 for (uint32_t channels = 8; channels < 64; channels += 12) {
22428 DWConvMicrokernelTester()
22429 .cr(4)
22430 .kr(25)
22431 .channels(channels)
22432 .input_offset(112)
22433 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022434 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022435 }
22436 }
22437}
22438
22439TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
22440 DWConvMicrokernelTester()
22441 .cr(1)
22442 .kr(25)
22443 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022444 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022445}
22446
22447TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
22448 for (uint32_t channels = 2; channels < 10; channels++) {
22449 DWConvMicrokernelTester()
22450 .cr(1)
22451 .kr(25)
22452 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022453 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022454 }
22455}
22456
22457TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
22458 for (uint32_t channels = 2; channels < 10; channels++) {
22459 DWConvMicrokernelTester()
22460 .cr(1)
22461 .kr(25)
22462 .channels(channels)
22463 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022464 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022465 }
22466}
22467
22468TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
22469 for (uint32_t channels = 2; channels < 10; channels++) {
22470 DWConvMicrokernelTester()
22471 .cr(1)
22472 .kr(25)
22473 .channels(channels)
22474 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022475 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022476 }
22477}
22478
22479TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
22480 for (size_t channels = 1; channels <= 5; channels += 1) {
22481 DWConvMicrokernelTester()
22482 .cr(1)
22483 .kr(25)
22484 .channels(channels)
22485 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022486 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022487 }
22488}
22489
22490TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
22491 for (size_t channels = 1; channels <= 5; channels += 1) {
22492 for (size_t step = 2; step <= 25; step++) {
22493 DWConvMicrokernelTester()
22494 .cr(1)
22495 .kr(25)
22496 .channels(channels)
22497 .width(3)
22498 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022499 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022500 }
22501 }
22502}
22503
22504TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
22505 for (size_t channels = 1; channels <= 5; channels += 1) {
22506 DWConvMicrokernelTester()
22507 .cr(1)
22508 .kr(25)
22509 .channels(1)
22510 .width(5)
22511 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022512 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022513 }
22514}
22515
22516TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
22517 for (size_t channels = 1; channels <= 5; channels += 1) {
22518 DWConvMicrokernelTester()
22519 .cr(1)
22520 .kr(25)
22521 .channels(channels)
22522 .width(3)
22523 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022524 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022525 }
22526}
22527
22528TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
22529 for (size_t channels = 1; channels <= 5; channels += 1) {
22530 DWConvMicrokernelTester()
22531 .cr(1)
22532 .kr(25)
22533 .channels(channels)
22534 .width(3)
22535 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022536 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022537 }
22538}
22539
22540TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
22541 for (uint32_t channels = 2; channels < 16; channels += 3) {
22542 DWConvMicrokernelTester()
22543 .cr(1)
22544 .kr(25)
22545 .channels(channels)
22546 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080022547 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022548 }
22549}
22550
22551TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
22552 for (uint32_t mz = 0; mz < 25; mz++) {
22553 for (uint32_t channels = 2; channels < 16; channels += 3) {
22554 DWConvMicrokernelTester()
22555 .cr(1)
22556 .kr(25)
22557 .channels(channels)
22558 .input_offset(48)
22559 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022560 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022561 }
22562 }
22563}
22564
22565TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
22566 DWConvMicrokernelTester()
22567 .cr(2)
22568 .kr(25)
22569 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080022570 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022571}
22572
22573TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
22574 for (uint32_t channels = 4; channels < 32; channels += 6) {
22575 DWConvMicrokernelTester()
22576 .cr(2)
22577 .kr(25)
22578 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022579 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022580 }
22581}
22582
22583TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
22584 for (uint32_t channels = 4; channels < 32; channels += 6) {
22585 DWConvMicrokernelTester()
22586 .cr(2)
22587 .kr(25)
22588 .channels(channels)
22589 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022590 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022591 }
22592}
22593
22594TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
22595 for (uint32_t channels = 4; channels < 32; channels += 6) {
22596 DWConvMicrokernelTester()
22597 .cr(2)
22598 .kr(25)
22599 .channels(channels)
22600 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022601 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022602 }
22603}
22604
22605TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
22606 for (uint32_t channels = 1; channels < 2; channels++) {
22607 DWConvMicrokernelTester()
22608 .cr(2)
22609 .kr(25)
22610 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022611 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022612 }
22613}
22614
22615TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
22616 for (uint32_t channels = 3; channels < 4; channels++) {
22617 DWConvMicrokernelTester()
22618 .cr(2)
22619 .kr(25)
22620 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022621 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022622 }
22623}
22624
22625TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
22626 for (uint32_t channels = 3; channels < 4; channels++) {
22627 DWConvMicrokernelTester()
22628 .cr(2)
22629 .kr(25)
22630 .channels(channels)
22631 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022632 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022633 }
22634}
22635
22636TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
22637 for (uint32_t channels = 3; channels < 4; channels++) {
22638 DWConvMicrokernelTester()
22639 .cr(2)
22640 .kr(25)
22641 .channels(channels)
22642 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022643 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022644 }
22645}
22646
22647TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
22648 for (size_t channels = 1; channels <= 10; channels += 1) {
22649 DWConvMicrokernelTester()
22650 .cr(2)
22651 .kr(25)
22652 .channels(channels)
22653 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022654 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022655 }
22656}
22657
22658TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
22659 for (size_t channels = 1; channels <= 10; channels += 1) {
22660 for (size_t step = 2; step <= 25; step++) {
22661 DWConvMicrokernelTester()
22662 .cr(2)
22663 .kr(25)
22664 .channels(channels)
22665 .width(3)
22666 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022667 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022668 }
22669 }
22670}
22671
22672TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
22673 for (size_t channels = 1; channels <= 10; channels += 1) {
22674 DWConvMicrokernelTester()
22675 .cr(2)
22676 .kr(25)
22677 .channels(2)
22678 .width(5)
22679 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080022680 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022681 }
22682}
22683
22684TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
22685 for (size_t channels = 1; channels <= 10; channels += 1) {
22686 DWConvMicrokernelTester()
22687 .cr(2)
22688 .kr(25)
22689 .channels(channels)
22690 .width(3)
22691 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022692 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022693 }
22694}
22695
22696TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
22697 for (size_t channels = 1; channels <= 10; channels += 1) {
22698 DWConvMicrokernelTester()
22699 .cr(2)
22700 .kr(25)
22701 .channels(channels)
22702 .width(3)
22703 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022704 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022705 }
22706}
22707
22708TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
22709 for (uint32_t channels = 4; channels < 32; channels += 6) {
22710 DWConvMicrokernelTester()
22711 .cr(2)
22712 .kr(25)
22713 .channels(channels)
22714 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080022715 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022716 }
22717}
22718
22719TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
22720 for (uint32_t mz = 0; mz < 25; mz++) {
22721 for (uint32_t channels = 4; channels < 32; channels += 6) {
22722 DWConvMicrokernelTester()
22723 .cr(2)
22724 .kr(25)
22725 .channels(channels)
22726 .input_offset(80)
22727 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022728 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022729 }
22730 }
22731}
22732
22733TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
22734 DWConvMicrokernelTester()
22735 .cr(4)
22736 .kr(25)
22737 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080022738 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022739}
22740
22741TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
22742 for (uint32_t channels = 8; channels < 64; channels += 12) {
22743 DWConvMicrokernelTester()
22744 .cr(4)
22745 .kr(25)
22746 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022747 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022748 }
22749}
22750
22751TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
22752 for (uint32_t channels = 8; channels < 64; channels += 12) {
22753 DWConvMicrokernelTester()
22754 .cr(4)
22755 .kr(25)
22756 .channels(channels)
22757 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022758 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022759 }
22760}
22761
22762TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
22763 for (uint32_t channels = 8; channels < 64; channels += 12) {
22764 DWConvMicrokernelTester()
22765 .cr(4)
22766 .kr(25)
22767 .channels(channels)
22768 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022769 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022770 }
22771}
22772
22773TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
22774 for (uint32_t channels = 1; channels < 4; channels++) {
22775 DWConvMicrokernelTester()
22776 .cr(4)
22777 .kr(25)
22778 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022779 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022780 }
22781}
22782
22783TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
22784 for (uint32_t channels = 5; channels < 8; channels++) {
22785 DWConvMicrokernelTester()
22786 .cr(4)
22787 .kr(25)
22788 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022789 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022790 }
22791}
22792
22793TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
22794 for (uint32_t channels = 5; channels < 8; channels++) {
22795 DWConvMicrokernelTester()
22796 .cr(4)
22797 .kr(25)
22798 .channels(channels)
22799 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022800 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022801 }
22802}
22803
22804TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
22805 for (uint32_t channels = 5; channels < 8; channels++) {
22806 DWConvMicrokernelTester()
22807 .cr(4)
22808 .kr(25)
22809 .channels(channels)
22810 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022811 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022812 }
22813}
22814
22815TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
22816 for (size_t channels = 1; channels <= 20; channels += 3) {
22817 DWConvMicrokernelTester()
22818 .cr(4)
22819 .kr(25)
22820 .channels(channels)
22821 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022822 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022823 }
22824}
22825
22826TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
22827 for (size_t channels = 1; channels <= 20; channels += 3) {
22828 for (size_t step = 2; step <= 25; step++) {
22829 DWConvMicrokernelTester()
22830 .cr(4)
22831 .kr(25)
22832 .channels(channels)
22833 .width(3)
22834 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022835 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022836 }
22837 }
22838}
22839
22840TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
22841 for (size_t channels = 1; channels <= 20; channels += 3) {
22842 DWConvMicrokernelTester()
22843 .cr(4)
22844 .kr(25)
22845 .channels(4)
22846 .width(5)
22847 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080022848 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022849 }
22850}
22851
22852TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
22853 for (size_t channels = 1; channels <= 20; channels += 3) {
22854 DWConvMicrokernelTester()
22855 .cr(4)
22856 .kr(25)
22857 .channels(channels)
22858 .width(3)
22859 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022860 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022861 }
22862}
22863
22864TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
22865 for (size_t channels = 1; channels <= 20; channels += 3) {
22866 DWConvMicrokernelTester()
22867 .cr(4)
22868 .kr(25)
22869 .channels(channels)
22870 .width(3)
22871 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022872 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022873 }
22874}
22875
22876TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
22877 for (uint32_t channels = 8; channels < 64; channels += 12) {
22878 DWConvMicrokernelTester()
22879 .cr(4)
22880 .kr(25)
22881 .channels(channels)
22882 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080022883 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022884 }
22885}
22886
22887TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
22888 for (uint32_t mz = 0; mz < 25; mz++) {
22889 for (uint32_t channels = 8; channels < 64; channels += 12) {
22890 DWConvMicrokernelTester()
22891 .cr(4)
22892 .kr(25)
22893 .channels(channels)
22894 .input_offset(112)
22895 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022896 .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080022897 }
22898 }
Marat Dukhan85d772b2021-06-30 11:02:42 -070022899}