blob: 2ecf4183dfc95b58d9b54b592005af83c670bc26 [file] [log] [blame]
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qu8-dwconv-minmax-fp32.yaml
11// Generator: tools/generate-dwconv-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/dwconv.h>
20#include "dwconv-microkernel-tester.h"
21
22
Marat Dukhan605696a2021-07-15 18:01:30 -070023#if XNN_ARCH_ARM || XNN_ARCH_ARM64
24 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_eq_8) {
25 TEST_REQUIRES_ARM_NEON;
26 DWConvMicrokernelTester()
27 .cr(8)
28 .kr(9)
29 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080030 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -070031 }
32
33 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8) {
34 TEST_REQUIRES_ARM_NEON;
35 for (uint32_t channels = 16; channels < 128; channels += 24) {
36 DWConvMicrokernelTester()
37 .cr(8)
38 .kr(9)
39 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080040 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -070041 }
42 }
43
44 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
45 TEST_REQUIRES_ARM_NEON;
46 for (uint32_t channels = 16; channels < 128; channels += 24) {
47 DWConvMicrokernelTester()
48 .cr(8)
49 .kr(9)
50 .channels(channels)
51 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080052 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -070053 }
54 }
55
56 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
57 TEST_REQUIRES_ARM_NEON;
58 for (uint32_t channels = 16; channels < 128; channels += 24) {
59 DWConvMicrokernelTester()
60 .cr(8)
61 .kr(9)
62 .channels(channels)
63 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080064 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -070065 }
66 }
67
68 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_lt_8) {
69 TEST_REQUIRES_ARM_NEON;
70 for (uint32_t channels = 1; channels < 8; channels++) {
71 DWConvMicrokernelTester()
72 .cr(8)
73 .kr(9)
74 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080075 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -070076 }
77 }
78
79 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8) {
80 TEST_REQUIRES_ARM_NEON;
81 for (uint32_t channels = 9; channels < 16; channels++) {
82 DWConvMicrokernelTester()
83 .cr(8)
84 .kr(9)
85 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080086 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -070087 }
88 }
89
90 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
91 TEST_REQUIRES_ARM_NEON;
92 for (uint32_t channels = 9; channels < 16; channels++) {
93 DWConvMicrokernelTester()
94 .cr(8)
95 .kr(9)
96 .channels(channels)
97 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080098 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -070099 }
100 }
101
102 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t channels = 9; channels < 16; channels++) {
105 DWConvMicrokernelTester()
106 .cr(8)
107 .kr(9)
108 .channels(channels)
109 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800110 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700111 }
112 }
113
114 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t channels = 1; channels <= 40; channels += 7) {
117 DWConvMicrokernelTester()
118 .cr(8)
119 .kr(9)
120 .channels(channels)
121 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800122 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700123 }
124 }
125
126 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_step) {
127 TEST_REQUIRES_ARM_NEON;
128 for (size_t channels = 1; channels <= 40; channels += 7) {
129 for (size_t step = 2; step <= 9; step++) {
130 DWConvMicrokernelTester()
131 .cr(8)
132 .kr(9)
133 .channels(channels)
134 .width(3)
135 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800136 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700137 }
138 }
139 }
140
141 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
142 TEST_REQUIRES_ARM_NEON;
143 for (size_t channels = 1; channels <= 40; channels += 7) {
144 DWConvMicrokernelTester()
145 .cr(8)
146 .kr(9)
147 .channels(8)
148 .width(5)
149 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -0800150 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700151 }
152 }
153
154 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmin) {
155 TEST_REQUIRES_ARM_NEON;
156 for (size_t channels = 1; channels <= 40; channels += 7) {
157 DWConvMicrokernelTester()
158 .cr(8)
159 .kr(9)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800163 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700164 }
165 }
166
167 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmax) {
168 TEST_REQUIRES_ARM_NEON;
169 for (size_t channels = 1; channels <= 40; channels += 7) {
170 DWConvMicrokernelTester()
171 .cr(8)
172 .kr(9)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700177 }
178 }
179
180 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, input_zero_point_only) {
181 TEST_REQUIRES_ARM_NEON;
182 for (size_t channels = 1; channels <= 40; channels += 7) {
183 DWConvMicrokernelTester()
184 .cr(8)
185 .kr(9)
186 .channels(channels)
187 .width(3)
188 .input_zero_point(255)
189 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800190 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700191 }
192 }
193
194 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, kernel_zero_point_only) {
195 TEST_REQUIRES_ARM_NEON;
196 for (size_t channels = 1; channels <= 40; channels += 7) {
197 DWConvMicrokernelTester()
198 .cr(8)
199 .kr(9)
200 .channels(channels)
201 .width(3)
202 .input_zero_point(0)
203 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -0800204 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700205 }
206 }
207
208 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, input_offset) {
209 TEST_REQUIRES_ARM_NEON;
210 for (uint32_t channels = 16; channels < 128; channels += 24) {
211 DWConvMicrokernelTester()
212 .cr(8)
213 .kr(9)
214 .channels(channels)
215 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -0800216 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700217 }
218 }
219
220 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, zero) {
221 TEST_REQUIRES_ARM_NEON;
222 for (uint32_t mz = 0; mz < 9; mz++) {
223 for (uint32_t channels = 16; channels < 128; channels += 24) {
224 DWConvMicrokernelTester()
225 .cr(8)
226 .kr(9)
227 .channels(channels)
228 .input_offset(176)
229 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800230 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700231 }
232 }
233 }
234#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
235
236
237#if XNN_ARCH_ARM || XNN_ARCH_ARM64
238 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_eq_16) {
239 TEST_REQUIRES_ARM_NEON;
240 DWConvMicrokernelTester()
241 .cr(16)
242 .kr(9)
243 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800244 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700245 }
246
247 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16) {
248 TEST_REQUIRES_ARM_NEON;
249 for (uint32_t channels = 32; channels < 256; channels += 48) {
250 DWConvMicrokernelTester()
251 .cr(16)
252 .kr(9)
253 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800254 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700255 }
256 }
257
258 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
259 TEST_REQUIRES_ARM_NEON;
260 for (uint32_t channels = 32; channels < 256; channels += 48) {
261 DWConvMicrokernelTester()
262 .cr(16)
263 .kr(9)
264 .channels(channels)
265 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800266 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700267 }
268 }
269
270 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
271 TEST_REQUIRES_ARM_NEON;
272 for (uint32_t channels = 32; channels < 256; channels += 48) {
273 DWConvMicrokernelTester()
274 .cr(16)
275 .kr(9)
276 .channels(channels)
277 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800278 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700279 }
280 }
281
282 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_lt_16) {
283 TEST_REQUIRES_ARM_NEON;
284 for (uint32_t channels = 1; channels < 16; channels++) {
285 DWConvMicrokernelTester()
286 .cr(16)
287 .kr(9)
288 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800289 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700290 }
291 }
292
293 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16) {
294 TEST_REQUIRES_ARM_NEON;
295 for (uint32_t channels = 17; channels < 32; channels++) {
296 DWConvMicrokernelTester()
297 .cr(16)
298 .kr(9)
299 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800300 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700301 }
302 }
303
304 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
305 TEST_REQUIRES_ARM_NEON;
306 for (uint32_t channels = 17; channels < 32; channels++) {
307 DWConvMicrokernelTester()
308 .cr(16)
309 .kr(9)
310 .channels(channels)
311 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800312 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700313 }
314 }
315
316 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
317 TEST_REQUIRES_ARM_NEON;
318 for (uint32_t channels = 17; channels < 32; channels++) {
319 DWConvMicrokernelTester()
320 .cr(16)
321 .kr(9)
322 .channels(channels)
323 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800324 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700325 }
326 }
327
328 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel) {
329 TEST_REQUIRES_ARM_NEON;
330 for (size_t channels = 1; channels <= 80; channels += 15) {
331 DWConvMicrokernelTester()
332 .cr(16)
333 .kr(9)
334 .channels(channels)
335 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800336 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700337 }
338 }
339
340 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_step) {
341 TEST_REQUIRES_ARM_NEON;
342 for (size_t channels = 1; channels <= 80; channels += 15) {
343 for (size_t step = 2; step <= 9; step++) {
344 DWConvMicrokernelTester()
345 .cr(16)
346 .kr(9)
347 .channels(channels)
348 .width(3)
349 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800350 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700351 }
352 }
353 }
354
355 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
356 TEST_REQUIRES_ARM_NEON;
357 for (size_t channels = 1; channels <= 80; channels += 15) {
358 DWConvMicrokernelTester()
359 .cr(16)
360 .kr(9)
361 .channels(16)
362 .width(5)
363 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800364 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700365 }
366 }
367
368 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmin) {
369 TEST_REQUIRES_ARM_NEON;
370 for (size_t channels = 1; channels <= 80; channels += 15) {
371 DWConvMicrokernelTester()
372 .cr(16)
373 .kr(9)
374 .channels(channels)
375 .width(3)
376 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800377 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700378 }
379 }
380
381 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmax) {
382 TEST_REQUIRES_ARM_NEON;
383 for (size_t channels = 1; channels <= 80; channels += 15) {
384 DWConvMicrokernelTester()
385 .cr(16)
386 .kr(9)
387 .channels(channels)
388 .width(3)
389 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800390 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700391 }
392 }
393
394 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, input_zero_point_only) {
395 TEST_REQUIRES_ARM_NEON;
396 for (size_t channels = 1; channels <= 80; channels += 15) {
397 DWConvMicrokernelTester()
398 .cr(16)
399 .kr(9)
400 .channels(channels)
401 .width(3)
402 .input_zero_point(255)
403 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800404 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700405 }
406 }
407
408 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, kernel_zero_point_only) {
409 TEST_REQUIRES_ARM_NEON;
410 for (size_t channels = 1; channels <= 80; channels += 15) {
411 DWConvMicrokernelTester()
412 .cr(16)
413 .kr(9)
414 .channels(channels)
415 .width(3)
416 .input_zero_point(0)
417 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -0800418 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700419 }
420 }
421
422 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, input_offset) {
423 TEST_REQUIRES_ARM_NEON;
424 for (uint32_t channels = 32; channels < 256; channels += 48) {
425 DWConvMicrokernelTester()
426 .cr(16)
427 .kr(9)
428 .channels(channels)
429 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -0800430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700431 }
432 }
433
434 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, zero) {
435 TEST_REQUIRES_ARM_NEON;
436 for (uint32_t mz = 0; mz < 9; mz++) {
437 for (uint32_t channels = 32; channels < 256; channels += 48) {
438 DWConvMicrokernelTester()
439 .cr(16)
440 .kr(9)
441 .channels(channels)
442 .input_offset(304)
443 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800444 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700445 }
446 }
447 }
448#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
449
450
451#if XNN_ARCH_ARM || XNN_ARCH_ARM64
452 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_eq_24) {
453 TEST_REQUIRES_ARM_NEON;
454 DWConvMicrokernelTester()
455 .cr(24)
456 .kr(9)
457 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -0800458 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700459 }
460
461 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24) {
462 TEST_REQUIRES_ARM_NEON;
463 for (uint32_t channels = 48; channels < 384; channels += 72) {
464 DWConvMicrokernelTester()
465 .cr(24)
466 .kr(9)
467 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800468 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700469 }
470 }
471
472 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
473 TEST_REQUIRES_ARM_NEON;
474 for (uint32_t channels = 48; channels < 384; channels += 72) {
475 DWConvMicrokernelTester()
476 .cr(24)
477 .kr(9)
478 .channels(channels)
479 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800480 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700481 }
482 }
483
484 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
485 TEST_REQUIRES_ARM_NEON;
486 for (uint32_t channels = 48; channels < 384; channels += 72) {
487 DWConvMicrokernelTester()
488 .cr(24)
489 .kr(9)
490 .channels(channels)
491 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800492 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700493 }
494 }
495
496 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_lt_24) {
497 TEST_REQUIRES_ARM_NEON;
498 for (uint32_t channels = 1; channels < 24; channels++) {
499 DWConvMicrokernelTester()
500 .cr(24)
501 .kr(9)
502 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800503 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700504 }
505 }
506
507 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24) {
508 TEST_REQUIRES_ARM_NEON;
509 for (uint32_t channels = 25; channels < 48; channels++) {
510 DWConvMicrokernelTester()
511 .cr(24)
512 .kr(9)
513 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800514 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700515 }
516 }
517
518 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
519 TEST_REQUIRES_ARM_NEON;
520 for (uint32_t channels = 25; channels < 48; channels++) {
521 DWConvMicrokernelTester()
522 .cr(24)
523 .kr(9)
524 .channels(channels)
525 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800526 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700527 }
528 }
529
530 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
531 TEST_REQUIRES_ARM_NEON;
532 for (uint32_t channels = 25; channels < 48; channels++) {
533 DWConvMicrokernelTester()
534 .cr(24)
535 .kr(9)
536 .channels(channels)
537 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800538 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700539 }
540 }
541
542 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel) {
543 TEST_REQUIRES_ARM_NEON;
544 for (size_t channels = 1; channels <= 120; channels += 23) {
545 DWConvMicrokernelTester()
546 .cr(24)
547 .kr(9)
548 .channels(channels)
549 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800550 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700551 }
552 }
553
554 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_step) {
555 TEST_REQUIRES_ARM_NEON;
556 for (size_t channels = 1; channels <= 120; channels += 23) {
557 for (size_t step = 2; step <= 9; step++) {
558 DWConvMicrokernelTester()
559 .cr(24)
560 .kr(9)
561 .channels(channels)
562 .width(3)
563 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800564 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700565 }
566 }
567 }
568
569 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
570 TEST_REQUIRES_ARM_NEON;
571 for (size_t channels = 1; channels <= 120; channels += 23) {
572 DWConvMicrokernelTester()
573 .cr(24)
574 .kr(9)
575 .channels(24)
576 .width(5)
577 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -0800578 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700579 }
580 }
581
582 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmin) {
583 TEST_REQUIRES_ARM_NEON;
584 for (size_t channels = 1; channels <= 120; channels += 23) {
585 DWConvMicrokernelTester()
586 .cr(24)
587 .kr(9)
588 .channels(channels)
589 .width(3)
590 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800591 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700592 }
593 }
594
595 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmax) {
596 TEST_REQUIRES_ARM_NEON;
597 for (size_t channels = 1; channels <= 120; channels += 23) {
598 DWConvMicrokernelTester()
599 .cr(24)
600 .kr(9)
601 .channels(channels)
602 .width(3)
603 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800604 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700605 }
606 }
607
608 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, input_zero_point_only) {
609 TEST_REQUIRES_ARM_NEON;
610 for (size_t channels = 1; channels <= 120; channels += 23) {
611 DWConvMicrokernelTester()
612 .cr(24)
613 .kr(9)
614 .channels(channels)
615 .width(3)
616 .input_zero_point(255)
617 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800618 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700619 }
620 }
621
622 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, kernel_zero_point_only) {
623 TEST_REQUIRES_ARM_NEON;
624 for (size_t channels = 1; channels <= 120; channels += 23) {
625 DWConvMicrokernelTester()
626 .cr(24)
627 .kr(9)
628 .channels(channels)
629 .width(3)
630 .input_zero_point(0)
631 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -0800632 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700633 }
634 }
635
636 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, input_offset) {
637 TEST_REQUIRES_ARM_NEON;
638 for (uint32_t channels = 48; channels < 384; channels += 72) {
639 DWConvMicrokernelTester()
640 .cr(24)
641 .kr(9)
642 .channels(channels)
643 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -0800644 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700645 }
646 }
647
648 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, zero) {
649 TEST_REQUIRES_ARM_NEON;
650 for (uint32_t mz = 0; mz < 9; mz++) {
651 for (uint32_t channels = 48; channels < 384; channels += 72) {
652 DWConvMicrokernelTester()
653 .cr(24)
654 .kr(9)
655 .channels(channels)
656 .input_offset(464)
657 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800658 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700659 }
660 }
661 }
662#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
663
664
665#if XNN_ARCH_ARM || XNN_ARCH_ARM64
666 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_eq_32) {
667 TEST_REQUIRES_ARM_NEON;
668 DWConvMicrokernelTester()
669 .cr(32)
670 .kr(9)
671 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -0800672 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700673 }
674
675 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32) {
676 TEST_REQUIRES_ARM_NEON;
677 for (uint32_t channels = 64; channels < 512; channels += 96) {
678 DWConvMicrokernelTester()
679 .cr(32)
680 .kr(9)
681 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800682 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700683 }
684 }
685
686 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
687 TEST_REQUIRES_ARM_NEON;
688 for (uint32_t channels = 64; channels < 512; channels += 96) {
689 DWConvMicrokernelTester()
690 .cr(32)
691 .kr(9)
692 .channels(channels)
693 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800694 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700695 }
696 }
697
698 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
699 TEST_REQUIRES_ARM_NEON;
700 for (uint32_t channels = 64; channels < 512; channels += 96) {
701 DWConvMicrokernelTester()
702 .cr(32)
703 .kr(9)
704 .channels(channels)
705 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800706 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700707 }
708 }
709
710 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_lt_32) {
711 TEST_REQUIRES_ARM_NEON;
712 for (uint32_t channels = 1; channels < 32; channels++) {
713 DWConvMicrokernelTester()
714 .cr(32)
715 .kr(9)
716 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800717 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700718 }
719 }
720
721 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32) {
722 TEST_REQUIRES_ARM_NEON;
723 for (uint32_t channels = 33; channels < 64; channels++) {
724 DWConvMicrokernelTester()
725 .cr(32)
726 .kr(9)
727 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800728 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700729 }
730 }
731
732 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
733 TEST_REQUIRES_ARM_NEON;
734 for (uint32_t channels = 33; channels < 64; channels++) {
735 DWConvMicrokernelTester()
736 .cr(32)
737 .kr(9)
738 .channels(channels)
739 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800740 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700741 }
742 }
743
744 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
745 TEST_REQUIRES_ARM_NEON;
746 for (uint32_t channels = 33; channels < 64; channels++) {
747 DWConvMicrokernelTester()
748 .cr(32)
749 .kr(9)
750 .channels(channels)
751 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800752 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700753 }
754 }
755
756 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel) {
757 TEST_REQUIRES_ARM_NEON;
758 for (size_t channels = 1; channels <= 160; channels += 31) {
759 DWConvMicrokernelTester()
760 .cr(32)
761 .kr(9)
762 .channels(channels)
763 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800764 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700765 }
766 }
767
768 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_step) {
769 TEST_REQUIRES_ARM_NEON;
770 for (size_t channels = 1; channels <= 160; channels += 31) {
771 for (size_t step = 2; step <= 9; step++) {
772 DWConvMicrokernelTester()
773 .cr(32)
774 .kr(9)
775 .channels(channels)
776 .width(3)
777 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800778 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700779 }
780 }
781 }
782
783 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
784 TEST_REQUIRES_ARM_NEON;
785 for (size_t channels = 1; channels <= 160; channels += 31) {
786 DWConvMicrokernelTester()
787 .cr(32)
788 .kr(9)
789 .channels(32)
790 .width(5)
791 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -0800792 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700793 }
794 }
795
796 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmin) {
797 TEST_REQUIRES_ARM_NEON;
798 for (size_t channels = 1; channels <= 160; channels += 31) {
799 DWConvMicrokernelTester()
800 .cr(32)
801 .kr(9)
802 .channels(channels)
803 .width(3)
804 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800805 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700806 }
807 }
808
809 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmax) {
810 TEST_REQUIRES_ARM_NEON;
811 for (size_t channels = 1; channels <= 160; channels += 31) {
812 DWConvMicrokernelTester()
813 .cr(32)
814 .kr(9)
815 .channels(channels)
816 .width(3)
817 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800818 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700819 }
820 }
821
822 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, input_zero_point_only) {
823 TEST_REQUIRES_ARM_NEON;
824 for (size_t channels = 1; channels <= 160; channels += 31) {
825 DWConvMicrokernelTester()
826 .cr(32)
827 .kr(9)
828 .channels(channels)
829 .width(3)
830 .input_zero_point(255)
831 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800832 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700833 }
834 }
835
836 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, kernel_zero_point_only) {
837 TEST_REQUIRES_ARM_NEON;
838 for (size_t channels = 1; channels <= 160; channels += 31) {
839 DWConvMicrokernelTester()
840 .cr(32)
841 .kr(9)
842 .channels(channels)
843 .width(3)
844 .input_zero_point(0)
845 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -0800846 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700847 }
848 }
849
850 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, input_offset) {
851 TEST_REQUIRES_ARM_NEON;
852 for (uint32_t channels = 64; channels < 512; channels += 96) {
853 DWConvMicrokernelTester()
854 .cr(32)
855 .kr(9)
856 .channels(channels)
857 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -0800858 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700859 }
860 }
861
862 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, zero) {
863 TEST_REQUIRES_ARM_NEON;
864 for (uint32_t mz = 0; mz < 9; mz++) {
865 for (uint32_t channels = 64; channels < 512; channels += 96) {
866 DWConvMicrokernelTester()
867 .cr(32)
868 .kr(9)
869 .channels(channels)
870 .input_offset(592)
871 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800872 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700873 }
874 }
875 }
876#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
877
878
879#if XNN_ARCH_ARM || XNN_ARCH_ARM64
880 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_eq_8) {
881 TEST_REQUIRES_ARM_NEON_V8;
882 DWConvMicrokernelTester()
883 .cr(8)
884 .kr(9)
885 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -0800886 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700887 }
888
889 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8) {
890 TEST_REQUIRES_ARM_NEON_V8;
891 for (uint32_t channels = 16; channels < 128; channels += 24) {
892 DWConvMicrokernelTester()
893 .cr(8)
894 .kr(9)
895 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800896 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700897 }
898 }
899
900 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmin) {
901 TEST_REQUIRES_ARM_NEON_V8;
902 for (uint32_t channels = 16; channels < 128; channels += 24) {
903 DWConvMicrokernelTester()
904 .cr(8)
905 .kr(9)
906 .channels(channels)
907 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800908 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700909 }
910 }
911
912 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmax) {
913 TEST_REQUIRES_ARM_NEON_V8;
914 for (uint32_t channels = 16; channels < 128; channels += 24) {
915 DWConvMicrokernelTester()
916 .cr(8)
917 .kr(9)
918 .channels(channels)
919 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800920 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700921 }
922 }
923
924 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_lt_8) {
925 TEST_REQUIRES_ARM_NEON_V8;
926 for (uint32_t channels = 1; channels < 8; channels++) {
927 DWConvMicrokernelTester()
928 .cr(8)
929 .kr(9)
930 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800931 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700932 }
933 }
934
935 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8) {
936 TEST_REQUIRES_ARM_NEON_V8;
937 for (uint32_t channels = 9; channels < 16; channels++) {
938 DWConvMicrokernelTester()
939 .cr(8)
940 .kr(9)
941 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800942 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700943 }
944 }
945
946 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmin) {
947 TEST_REQUIRES_ARM_NEON_V8;
948 for (uint32_t channels = 9; channels < 16; channels++) {
949 DWConvMicrokernelTester()
950 .cr(8)
951 .kr(9)
952 .channels(channels)
953 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800954 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700955 }
956 }
957
958 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmax) {
959 TEST_REQUIRES_ARM_NEON_V8;
960 for (uint32_t channels = 9; channels < 16; channels++) {
961 DWConvMicrokernelTester()
962 .cr(8)
963 .kr(9)
964 .channels(channels)
965 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800966 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700967 }
968 }
969
970 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel) {
971 TEST_REQUIRES_ARM_NEON_V8;
972 for (size_t channels = 1; channels <= 40; channels += 7) {
973 DWConvMicrokernelTester()
974 .cr(8)
975 .kr(9)
976 .channels(channels)
977 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800978 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700979 }
980 }
981
982 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_step) {
983 TEST_REQUIRES_ARM_NEON_V8;
984 for (size_t channels = 1; channels <= 40; channels += 7) {
985 for (size_t step = 2; step <= 9; step++) {
986 DWConvMicrokernelTester()
987 .cr(8)
988 .kr(9)
989 .channels(channels)
990 .width(3)
991 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800992 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -0700993 }
994 }
995 }
996
997 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_output_stride) {
998 TEST_REQUIRES_ARM_NEON_V8;
999 for (size_t channels = 1; channels <= 40; channels += 7) {
1000 DWConvMicrokernelTester()
1001 .cr(8)
1002 .kr(9)
1003 .channels(8)
1004 .width(5)
1005 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001006 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001007 }
1008 }
1009
1010 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmin) {
1011 TEST_REQUIRES_ARM_NEON_V8;
1012 for (size_t channels = 1; channels <= 40; channels += 7) {
1013 DWConvMicrokernelTester()
1014 .cr(8)
1015 .kr(9)
1016 .channels(channels)
1017 .width(3)
1018 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001019 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001020 }
1021 }
1022
1023 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmax) {
1024 TEST_REQUIRES_ARM_NEON_V8;
1025 for (size_t channels = 1; channels <= 40; channels += 7) {
1026 DWConvMicrokernelTester()
1027 .cr(8)
1028 .kr(9)
1029 .channels(channels)
1030 .width(3)
1031 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001032 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001033 }
1034 }
1035
1036 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, input_zero_point_only) {
1037 TEST_REQUIRES_ARM_NEON_V8;
1038 for (size_t channels = 1; channels <= 40; channels += 7) {
1039 DWConvMicrokernelTester()
1040 .cr(8)
1041 .kr(9)
1042 .channels(channels)
1043 .width(3)
1044 .input_zero_point(255)
1045 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001046 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001047 }
1048 }
1049
1050 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, kernel_zero_point_only) {
1051 TEST_REQUIRES_ARM_NEON_V8;
1052 for (size_t channels = 1; channels <= 40; channels += 7) {
1053 DWConvMicrokernelTester()
1054 .cr(8)
1055 .kr(9)
1056 .channels(channels)
1057 .width(3)
1058 .input_zero_point(0)
1059 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08001060 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001061 }
1062 }
1063
1064 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, input_offset) {
1065 TEST_REQUIRES_ARM_NEON_V8;
1066 for (uint32_t channels = 16; channels < 128; channels += 24) {
1067 DWConvMicrokernelTester()
1068 .cr(8)
1069 .kr(9)
1070 .channels(channels)
1071 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08001072 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001073 }
1074 }
1075
1076 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, zero) {
1077 TEST_REQUIRES_ARM_NEON_V8;
1078 for (uint32_t mz = 0; mz < 9; mz++) {
1079 for (uint32_t channels = 16; channels < 128; channels += 24) {
1080 DWConvMicrokernelTester()
1081 .cr(8)
1082 .kr(9)
1083 .channels(channels)
1084 .input_offset(176)
1085 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001086 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001087 }
1088 }
1089 }
1090#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1091
1092
1093#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1094 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_eq_16) {
1095 TEST_REQUIRES_ARM_NEON_V8;
1096 DWConvMicrokernelTester()
1097 .cr(16)
1098 .kr(9)
1099 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001100 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001101 }
1102
1103 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16) {
1104 TEST_REQUIRES_ARM_NEON_V8;
1105 for (uint32_t channels = 32; channels < 256; channels += 48) {
1106 DWConvMicrokernelTester()
1107 .cr(16)
1108 .kr(9)
1109 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001110 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001111 }
1112 }
1113
1114 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmin) {
1115 TEST_REQUIRES_ARM_NEON_V8;
1116 for (uint32_t channels = 32; channels < 256; channels += 48) {
1117 DWConvMicrokernelTester()
1118 .cr(16)
1119 .kr(9)
1120 .channels(channels)
1121 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001122 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001123 }
1124 }
1125
1126 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmax) {
1127 TEST_REQUIRES_ARM_NEON_V8;
1128 for (uint32_t channels = 32; channels < 256; channels += 48) {
1129 DWConvMicrokernelTester()
1130 .cr(16)
1131 .kr(9)
1132 .channels(channels)
1133 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001134 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001135 }
1136 }
1137
1138 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_lt_16) {
1139 TEST_REQUIRES_ARM_NEON_V8;
1140 for (uint32_t channels = 1; channels < 16; channels++) {
1141 DWConvMicrokernelTester()
1142 .cr(16)
1143 .kr(9)
1144 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001145 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001146 }
1147 }
1148
1149 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16) {
1150 TEST_REQUIRES_ARM_NEON_V8;
1151 for (uint32_t channels = 17; channels < 32; channels++) {
1152 DWConvMicrokernelTester()
1153 .cr(16)
1154 .kr(9)
1155 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001156 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001157 }
1158 }
1159
1160 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmin) {
1161 TEST_REQUIRES_ARM_NEON_V8;
1162 for (uint32_t channels = 17; channels < 32; channels++) {
1163 DWConvMicrokernelTester()
1164 .cr(16)
1165 .kr(9)
1166 .channels(channels)
1167 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001168 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001169 }
1170 }
1171
1172 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmax) {
1173 TEST_REQUIRES_ARM_NEON_V8;
1174 for (uint32_t channels = 17; channels < 32; channels++) {
1175 DWConvMicrokernelTester()
1176 .cr(16)
1177 .kr(9)
1178 .channels(channels)
1179 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001180 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001181 }
1182 }
1183
1184 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel) {
1185 TEST_REQUIRES_ARM_NEON_V8;
1186 for (size_t channels = 1; channels <= 80; channels += 15) {
1187 DWConvMicrokernelTester()
1188 .cr(16)
1189 .kr(9)
1190 .channels(channels)
1191 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001192 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001193 }
1194 }
1195
1196 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_step) {
1197 TEST_REQUIRES_ARM_NEON_V8;
1198 for (size_t channels = 1; channels <= 80; channels += 15) {
1199 for (size_t step = 2; step <= 9; step++) {
1200 DWConvMicrokernelTester()
1201 .cr(16)
1202 .kr(9)
1203 .channels(channels)
1204 .width(3)
1205 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001206 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001207 }
1208 }
1209 }
1210
1211 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_output_stride) {
1212 TEST_REQUIRES_ARM_NEON_V8;
1213 for (size_t channels = 1; channels <= 80; channels += 15) {
1214 DWConvMicrokernelTester()
1215 .cr(16)
1216 .kr(9)
1217 .channels(16)
1218 .width(5)
1219 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001220 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001221 }
1222 }
1223
1224 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmin) {
1225 TEST_REQUIRES_ARM_NEON_V8;
1226 for (size_t channels = 1; channels <= 80; channels += 15) {
1227 DWConvMicrokernelTester()
1228 .cr(16)
1229 .kr(9)
1230 .channels(channels)
1231 .width(3)
1232 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001233 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001234 }
1235 }
1236
1237 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmax) {
1238 TEST_REQUIRES_ARM_NEON_V8;
1239 for (size_t channels = 1; channels <= 80; channels += 15) {
1240 DWConvMicrokernelTester()
1241 .cr(16)
1242 .kr(9)
1243 .channels(channels)
1244 .width(3)
1245 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001246 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001247 }
1248 }
1249
1250 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, input_zero_point_only) {
1251 TEST_REQUIRES_ARM_NEON_V8;
1252 for (size_t channels = 1; channels <= 80; channels += 15) {
1253 DWConvMicrokernelTester()
1254 .cr(16)
1255 .kr(9)
1256 .channels(channels)
1257 .width(3)
1258 .input_zero_point(255)
1259 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001260 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001261 }
1262 }
1263
1264 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, kernel_zero_point_only) {
1265 TEST_REQUIRES_ARM_NEON_V8;
1266 for (size_t channels = 1; channels <= 80; channels += 15) {
1267 DWConvMicrokernelTester()
1268 .cr(16)
1269 .kr(9)
1270 .channels(channels)
1271 .width(3)
1272 .input_zero_point(0)
1273 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08001274 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001275 }
1276 }
1277
1278 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, input_offset) {
1279 TEST_REQUIRES_ARM_NEON_V8;
1280 for (uint32_t channels = 32; channels < 256; channels += 48) {
1281 DWConvMicrokernelTester()
1282 .cr(16)
1283 .kr(9)
1284 .channels(channels)
1285 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08001286 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001287 }
1288 }
1289
1290 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, zero) {
1291 TEST_REQUIRES_ARM_NEON_V8;
1292 for (uint32_t mz = 0; mz < 9; mz++) {
1293 for (uint32_t channels = 32; channels < 256; channels += 48) {
1294 DWConvMicrokernelTester()
1295 .cr(16)
1296 .kr(9)
1297 .channels(channels)
1298 .input_offset(304)
1299 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001300 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001301 }
1302 }
1303 }
1304#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1305
1306
1307#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1308 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_eq_24) {
1309 TEST_REQUIRES_ARM_NEON_V8;
1310 DWConvMicrokernelTester()
1311 .cr(24)
1312 .kr(9)
1313 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08001314 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001315 }
1316
1317 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24) {
1318 TEST_REQUIRES_ARM_NEON_V8;
1319 for (uint32_t channels = 48; channels < 384; channels += 72) {
1320 DWConvMicrokernelTester()
1321 .cr(24)
1322 .kr(9)
1323 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001324 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001325 }
1326 }
1327
1328 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmin) {
1329 TEST_REQUIRES_ARM_NEON_V8;
1330 for (uint32_t channels = 48; channels < 384; channels += 72) {
1331 DWConvMicrokernelTester()
1332 .cr(24)
1333 .kr(9)
1334 .channels(channels)
1335 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001336 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001337 }
1338 }
1339
1340 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmax) {
1341 TEST_REQUIRES_ARM_NEON_V8;
1342 for (uint32_t channels = 48; channels < 384; channels += 72) {
1343 DWConvMicrokernelTester()
1344 .cr(24)
1345 .kr(9)
1346 .channels(channels)
1347 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001348 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001349 }
1350 }
1351
1352 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_lt_24) {
1353 TEST_REQUIRES_ARM_NEON_V8;
1354 for (uint32_t channels = 1; channels < 24; channels++) {
1355 DWConvMicrokernelTester()
1356 .cr(24)
1357 .kr(9)
1358 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001359 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001360 }
1361 }
1362
1363 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24) {
1364 TEST_REQUIRES_ARM_NEON_V8;
1365 for (uint32_t channels = 25; channels < 48; channels++) {
1366 DWConvMicrokernelTester()
1367 .cr(24)
1368 .kr(9)
1369 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001370 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001371 }
1372 }
1373
1374 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmin) {
1375 TEST_REQUIRES_ARM_NEON_V8;
1376 for (uint32_t channels = 25; channels < 48; channels++) {
1377 DWConvMicrokernelTester()
1378 .cr(24)
1379 .kr(9)
1380 .channels(channels)
1381 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001382 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001383 }
1384 }
1385
1386 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmax) {
1387 TEST_REQUIRES_ARM_NEON_V8;
1388 for (uint32_t channels = 25; channels < 48; channels++) {
1389 DWConvMicrokernelTester()
1390 .cr(24)
1391 .kr(9)
1392 .channels(channels)
1393 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001394 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001395 }
1396 }
1397
1398 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel) {
1399 TEST_REQUIRES_ARM_NEON_V8;
1400 for (size_t channels = 1; channels <= 120; channels += 23) {
1401 DWConvMicrokernelTester()
1402 .cr(24)
1403 .kr(9)
1404 .channels(channels)
1405 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001406 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001407 }
1408 }
1409
1410 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_step) {
1411 TEST_REQUIRES_ARM_NEON_V8;
1412 for (size_t channels = 1; channels <= 120; channels += 23) {
1413 for (size_t step = 2; step <= 9; step++) {
1414 DWConvMicrokernelTester()
1415 .cr(24)
1416 .kr(9)
1417 .channels(channels)
1418 .width(3)
1419 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001420 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001421 }
1422 }
1423 }
1424
1425 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_output_stride) {
1426 TEST_REQUIRES_ARM_NEON_V8;
1427 for (size_t channels = 1; channels <= 120; channels += 23) {
1428 DWConvMicrokernelTester()
1429 .cr(24)
1430 .kr(9)
1431 .channels(24)
1432 .width(5)
1433 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08001434 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001435 }
1436 }
1437
1438 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmin) {
1439 TEST_REQUIRES_ARM_NEON_V8;
1440 for (size_t channels = 1; channels <= 120; channels += 23) {
1441 DWConvMicrokernelTester()
1442 .cr(24)
1443 .kr(9)
1444 .channels(channels)
1445 .width(3)
1446 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001447 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001448 }
1449 }
1450
1451 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmax) {
1452 TEST_REQUIRES_ARM_NEON_V8;
1453 for (size_t channels = 1; channels <= 120; channels += 23) {
1454 DWConvMicrokernelTester()
1455 .cr(24)
1456 .kr(9)
1457 .channels(channels)
1458 .width(3)
1459 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001460 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001461 }
1462 }
1463
1464 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, input_zero_point_only) {
1465 TEST_REQUIRES_ARM_NEON_V8;
1466 for (size_t channels = 1; channels <= 120; channels += 23) {
1467 DWConvMicrokernelTester()
1468 .cr(24)
1469 .kr(9)
1470 .channels(channels)
1471 .width(3)
1472 .input_zero_point(255)
1473 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001474 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001475 }
1476 }
1477
1478 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, kernel_zero_point_only) {
1479 TEST_REQUIRES_ARM_NEON_V8;
1480 for (size_t channels = 1; channels <= 120; channels += 23) {
1481 DWConvMicrokernelTester()
1482 .cr(24)
1483 .kr(9)
1484 .channels(channels)
1485 .width(3)
1486 .input_zero_point(0)
1487 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08001488 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001489 }
1490 }
1491
1492 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, input_offset) {
1493 TEST_REQUIRES_ARM_NEON_V8;
1494 for (uint32_t channels = 48; channels < 384; channels += 72) {
1495 DWConvMicrokernelTester()
1496 .cr(24)
1497 .kr(9)
1498 .channels(channels)
1499 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08001500 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001501 }
1502 }
1503
1504 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, zero) {
1505 TEST_REQUIRES_ARM_NEON_V8;
1506 for (uint32_t mz = 0; mz < 9; mz++) {
1507 for (uint32_t channels = 48; channels < 384; channels += 72) {
1508 DWConvMicrokernelTester()
1509 .cr(24)
1510 .kr(9)
1511 .channels(channels)
1512 .input_offset(464)
1513 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001514 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001515 }
1516 }
1517 }
1518#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1519
1520
1521#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1522 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_eq_32) {
1523 TEST_REQUIRES_ARM_NEON_V8;
1524 DWConvMicrokernelTester()
1525 .cr(32)
1526 .kr(9)
1527 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08001528 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001529 }
1530
1531 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32) {
1532 TEST_REQUIRES_ARM_NEON_V8;
1533 for (uint32_t channels = 64; channels < 512; channels += 96) {
1534 DWConvMicrokernelTester()
1535 .cr(32)
1536 .kr(9)
1537 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001538 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001539 }
1540 }
1541
1542 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmin) {
1543 TEST_REQUIRES_ARM_NEON_V8;
1544 for (uint32_t channels = 64; channels < 512; channels += 96) {
1545 DWConvMicrokernelTester()
1546 .cr(32)
1547 .kr(9)
1548 .channels(channels)
1549 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001550 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001551 }
1552 }
1553
1554 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmax) {
1555 TEST_REQUIRES_ARM_NEON_V8;
1556 for (uint32_t channels = 64; channels < 512; channels += 96) {
1557 DWConvMicrokernelTester()
1558 .cr(32)
1559 .kr(9)
1560 .channels(channels)
1561 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001562 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001563 }
1564 }
1565
1566 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_lt_32) {
1567 TEST_REQUIRES_ARM_NEON_V8;
1568 for (uint32_t channels = 1; channels < 32; channels++) {
1569 DWConvMicrokernelTester()
1570 .cr(32)
1571 .kr(9)
1572 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001573 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001574 }
1575 }
1576
1577 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32) {
1578 TEST_REQUIRES_ARM_NEON_V8;
1579 for (uint32_t channels = 33; channels < 64; channels++) {
1580 DWConvMicrokernelTester()
1581 .cr(32)
1582 .kr(9)
1583 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001584 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001585 }
1586 }
1587
1588 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmin) {
1589 TEST_REQUIRES_ARM_NEON_V8;
1590 for (uint32_t channels = 33; channels < 64; channels++) {
1591 DWConvMicrokernelTester()
1592 .cr(32)
1593 .kr(9)
1594 .channels(channels)
1595 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001596 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001597 }
1598 }
1599
1600 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmax) {
1601 TEST_REQUIRES_ARM_NEON_V8;
1602 for (uint32_t channels = 33; channels < 64; channels++) {
1603 DWConvMicrokernelTester()
1604 .cr(32)
1605 .kr(9)
1606 .channels(channels)
1607 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001608 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001609 }
1610 }
1611
1612 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel) {
1613 TEST_REQUIRES_ARM_NEON_V8;
1614 for (size_t channels = 1; channels <= 160; channels += 31) {
1615 DWConvMicrokernelTester()
1616 .cr(32)
1617 .kr(9)
1618 .channels(channels)
1619 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001620 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001621 }
1622 }
1623
1624 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_step) {
1625 TEST_REQUIRES_ARM_NEON_V8;
1626 for (size_t channels = 1; channels <= 160; channels += 31) {
1627 for (size_t step = 2; step <= 9; step++) {
1628 DWConvMicrokernelTester()
1629 .cr(32)
1630 .kr(9)
1631 .channels(channels)
1632 .width(3)
1633 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001634 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001635 }
1636 }
1637 }
1638
1639 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_output_stride) {
1640 TEST_REQUIRES_ARM_NEON_V8;
1641 for (size_t channels = 1; channels <= 160; channels += 31) {
1642 DWConvMicrokernelTester()
1643 .cr(32)
1644 .kr(9)
1645 .channels(32)
1646 .width(5)
1647 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08001648 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001649 }
1650 }
1651
1652 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmin) {
1653 TEST_REQUIRES_ARM_NEON_V8;
1654 for (size_t channels = 1; channels <= 160; channels += 31) {
1655 DWConvMicrokernelTester()
1656 .cr(32)
1657 .kr(9)
1658 .channels(channels)
1659 .width(3)
1660 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001661 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001662 }
1663 }
1664
1665 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmax) {
1666 TEST_REQUIRES_ARM_NEON_V8;
1667 for (size_t channels = 1; channels <= 160; channels += 31) {
1668 DWConvMicrokernelTester()
1669 .cr(32)
1670 .kr(9)
1671 .channels(channels)
1672 .width(3)
1673 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001674 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001675 }
1676 }
1677
1678 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, input_zero_point_only) {
1679 TEST_REQUIRES_ARM_NEON_V8;
1680 for (size_t channels = 1; channels <= 160; channels += 31) {
1681 DWConvMicrokernelTester()
1682 .cr(32)
1683 .kr(9)
1684 .channels(channels)
1685 .width(3)
1686 .input_zero_point(255)
1687 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001688 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001689 }
1690 }
1691
1692 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, kernel_zero_point_only) {
1693 TEST_REQUIRES_ARM_NEON_V8;
1694 for (size_t channels = 1; channels <= 160; channels += 31) {
1695 DWConvMicrokernelTester()
1696 .cr(32)
1697 .kr(9)
1698 .channels(channels)
1699 .width(3)
1700 .input_zero_point(0)
1701 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08001702 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001703 }
1704 }
1705
1706 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, input_offset) {
1707 TEST_REQUIRES_ARM_NEON_V8;
1708 for (uint32_t channels = 64; channels < 512; channels += 96) {
1709 DWConvMicrokernelTester()
1710 .cr(32)
1711 .kr(9)
1712 .channels(channels)
1713 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08001714 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001715 }
1716 }
1717
1718 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, zero) {
1719 TEST_REQUIRES_ARM_NEON_V8;
1720 for (uint32_t mz = 0; mz < 9; mz++) {
1721 for (uint32_t channels = 64; channels < 512; channels += 96) {
1722 DWConvMicrokernelTester()
1723 .cr(32)
1724 .kr(9)
1725 .channels(channels)
1726 .input_offset(592)
1727 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001728 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07001729 }
1730 }
1731 }
1732#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1733
1734
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07001735#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanf0f28812021-07-08 22:34:20 -07001736 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_eq_8) {
1737 TEST_REQUIRES_X86_SSE2;
1738 DWConvMicrokernelTester()
1739 .cr(8)
1740 .kr(9)
1741 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001742 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001743 }
1744
1745 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8) {
1746 TEST_REQUIRES_X86_SSE2;
1747 for (uint32_t channels = 16; channels < 128; channels += 24) {
1748 DWConvMicrokernelTester()
1749 .cr(8)
1750 .kr(9)
1751 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001752 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001753 }
1754 }
1755
1756 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
1757 TEST_REQUIRES_X86_SSE2;
1758 for (uint32_t channels = 16; channels < 128; channels += 24) {
1759 DWConvMicrokernelTester()
1760 .cr(8)
1761 .kr(9)
1762 .channels(channels)
1763 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001764 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001765 }
1766 }
1767
1768 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
1769 TEST_REQUIRES_X86_SSE2;
1770 for (uint32_t channels = 16; channels < 128; channels += 24) {
1771 DWConvMicrokernelTester()
1772 .cr(8)
1773 .kr(9)
1774 .channels(channels)
1775 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001776 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001777 }
1778 }
1779
1780 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_lt_8) {
1781 TEST_REQUIRES_X86_SSE2;
1782 for (uint32_t channels = 1; channels < 8; channels++) {
1783 DWConvMicrokernelTester()
1784 .cr(8)
1785 .kr(9)
1786 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001787 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001788 }
1789 }
1790
1791 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8) {
1792 TEST_REQUIRES_X86_SSE2;
1793 for (uint32_t channels = 9; channels < 16; channels++) {
1794 DWConvMicrokernelTester()
1795 .cr(8)
1796 .kr(9)
1797 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001798 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001799 }
1800 }
1801
1802 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
1803 TEST_REQUIRES_X86_SSE2;
1804 for (uint32_t channels = 9; channels < 16; channels++) {
1805 DWConvMicrokernelTester()
1806 .cr(8)
1807 .kr(9)
1808 .channels(channels)
1809 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001810 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001811 }
1812 }
1813
1814 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
1815 TEST_REQUIRES_X86_SSE2;
1816 for (uint32_t channels = 9; channels < 16; channels++) {
1817 DWConvMicrokernelTester()
1818 .cr(8)
1819 .kr(9)
1820 .channels(channels)
1821 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001822 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001823 }
1824 }
1825
1826 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel) {
1827 TEST_REQUIRES_X86_SSE2;
1828 for (size_t channels = 1; channels <= 40; channels += 7) {
1829 DWConvMicrokernelTester()
1830 .cr(8)
1831 .kr(9)
1832 .channels(channels)
1833 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001834 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001835 }
1836 }
1837
1838 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_step) {
1839 TEST_REQUIRES_X86_SSE2;
1840 for (size_t channels = 1; channels <= 40; channels += 7) {
1841 for (size_t step = 2; step <= 9; step++) {
1842 DWConvMicrokernelTester()
1843 .cr(8)
1844 .kr(9)
1845 .channels(channels)
1846 .width(3)
1847 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001848 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001849 }
1850 }
1851 }
1852
1853 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
1854 TEST_REQUIRES_X86_SSE2;
1855 for (size_t channels = 1; channels <= 40; channels += 7) {
1856 DWConvMicrokernelTester()
1857 .cr(8)
1858 .kr(9)
1859 .channels(8)
1860 .width(5)
1861 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001862 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001863 }
1864 }
1865
1866 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
1867 TEST_REQUIRES_X86_SSE2;
1868 for (size_t channels = 1; channels <= 40; channels += 7) {
1869 DWConvMicrokernelTester()
1870 .cr(8)
1871 .kr(9)
1872 .channels(channels)
1873 .width(3)
1874 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001875 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001876 }
1877 }
1878
1879 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
1880 TEST_REQUIRES_X86_SSE2;
1881 for (size_t channels = 1; channels <= 40; channels += 7) {
1882 DWConvMicrokernelTester()
1883 .cr(8)
1884 .kr(9)
1885 .channels(channels)
1886 .width(3)
1887 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001888 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001889 }
1890 }
1891
1892 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, input_zero_point_only) {
1893 TEST_REQUIRES_X86_SSE2;
1894 for (size_t channels = 1; channels <= 40; channels += 7) {
1895 DWConvMicrokernelTester()
1896 .cr(8)
1897 .kr(9)
1898 .channels(channels)
1899 .width(3)
1900 .input_zero_point(255)
1901 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001902 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001903 }
1904 }
1905
1906 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, kernel_zero_point_only) {
1907 TEST_REQUIRES_X86_SSE2;
1908 for (size_t channels = 1; channels <= 40; channels += 7) {
1909 DWConvMicrokernelTester()
1910 .cr(8)
1911 .kr(9)
1912 .channels(channels)
1913 .width(3)
1914 .input_zero_point(0)
1915 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08001916 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001917 }
1918 }
1919
1920 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, input_offset) {
1921 TEST_REQUIRES_X86_SSE2;
1922 for (uint32_t channels = 16; channels < 128; channels += 24) {
1923 DWConvMicrokernelTester()
1924 .cr(8)
1925 .kr(9)
1926 .channels(channels)
1927 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08001928 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001929 }
1930 }
1931
1932 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, zero) {
1933 TEST_REQUIRES_X86_SSE2;
1934 for (uint32_t mz = 0; mz < 9; mz++) {
1935 for (uint32_t channels = 16; channels < 128; channels += 24) {
1936 DWConvMicrokernelTester()
1937 .cr(8)
1938 .kr(9)
1939 .channels(channels)
1940 .input_offset(176)
1941 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001942 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001943 }
1944 }
1945 }
1946#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1947
1948
1949#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1950 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_eq_16) {
1951 TEST_REQUIRES_X86_SSE2;
1952 DWConvMicrokernelTester()
1953 .cr(16)
1954 .kr(9)
1955 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001956 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001957 }
1958
1959 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16) {
1960 TEST_REQUIRES_X86_SSE2;
1961 for (uint32_t channels = 32; channels < 256; channels += 48) {
1962 DWConvMicrokernelTester()
1963 .cr(16)
1964 .kr(9)
1965 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001966 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001967 }
1968 }
1969
1970 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
1971 TEST_REQUIRES_X86_SSE2;
1972 for (uint32_t channels = 32; channels < 256; channels += 48) {
1973 DWConvMicrokernelTester()
1974 .cr(16)
1975 .kr(9)
1976 .channels(channels)
1977 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001978 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001979 }
1980 }
1981
1982 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
1983 TEST_REQUIRES_X86_SSE2;
1984 for (uint32_t channels = 32; channels < 256; channels += 48) {
1985 DWConvMicrokernelTester()
1986 .cr(16)
1987 .kr(9)
1988 .channels(channels)
1989 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001990 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07001991 }
1992 }
1993
1994 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_lt_16) {
1995 TEST_REQUIRES_X86_SSE2;
1996 for (uint32_t channels = 1; channels < 16; channels++) {
1997 DWConvMicrokernelTester()
1998 .cr(16)
1999 .kr(9)
2000 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002001 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002002 }
2003 }
2004
2005 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16) {
2006 TEST_REQUIRES_X86_SSE2;
2007 for (uint32_t channels = 17; channels < 32; channels++) {
2008 DWConvMicrokernelTester()
2009 .cr(16)
2010 .kr(9)
2011 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002012 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002013 }
2014 }
2015
2016 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
2017 TEST_REQUIRES_X86_SSE2;
2018 for (uint32_t channels = 17; channels < 32; channels++) {
2019 DWConvMicrokernelTester()
2020 .cr(16)
2021 .kr(9)
2022 .channels(channels)
2023 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002024 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002025 }
2026 }
2027
2028 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
2029 TEST_REQUIRES_X86_SSE2;
2030 for (uint32_t channels = 17; channels < 32; channels++) {
2031 DWConvMicrokernelTester()
2032 .cr(16)
2033 .kr(9)
2034 .channels(channels)
2035 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002036 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002037 }
2038 }
2039
2040 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel) {
2041 TEST_REQUIRES_X86_SSE2;
2042 for (size_t channels = 1; channels <= 80; channels += 15) {
2043 DWConvMicrokernelTester()
2044 .cr(16)
2045 .kr(9)
2046 .channels(channels)
2047 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002048 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002049 }
2050 }
2051
2052 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_step) {
2053 TEST_REQUIRES_X86_SSE2;
2054 for (size_t channels = 1; channels <= 80; channels += 15) {
2055 for (size_t step = 2; step <= 9; step++) {
2056 DWConvMicrokernelTester()
2057 .cr(16)
2058 .kr(9)
2059 .channels(channels)
2060 .width(3)
2061 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002062 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002063 }
2064 }
2065 }
2066
2067 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
2068 TEST_REQUIRES_X86_SSE2;
2069 for (size_t channels = 1; channels <= 80; channels += 15) {
2070 DWConvMicrokernelTester()
2071 .cr(16)
2072 .kr(9)
2073 .channels(16)
2074 .width(5)
2075 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002076 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002077 }
2078 }
2079
2080 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
2081 TEST_REQUIRES_X86_SSE2;
2082 for (size_t channels = 1; channels <= 80; channels += 15) {
2083 DWConvMicrokernelTester()
2084 .cr(16)
2085 .kr(9)
2086 .channels(channels)
2087 .width(3)
2088 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002089 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002090 }
2091 }
2092
2093 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
2094 TEST_REQUIRES_X86_SSE2;
2095 for (size_t channels = 1; channels <= 80; channels += 15) {
2096 DWConvMicrokernelTester()
2097 .cr(16)
2098 .kr(9)
2099 .channels(channels)
2100 .width(3)
2101 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002102 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002103 }
2104 }
2105
2106 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, input_zero_point_only) {
2107 TEST_REQUIRES_X86_SSE2;
2108 for (size_t channels = 1; channels <= 80; channels += 15) {
2109 DWConvMicrokernelTester()
2110 .cr(16)
2111 .kr(9)
2112 .channels(channels)
2113 .width(3)
2114 .input_zero_point(255)
2115 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002116 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002117 }
2118 }
2119
2120 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, kernel_zero_point_only) {
2121 TEST_REQUIRES_X86_SSE2;
2122 for (size_t channels = 1; channels <= 80; channels += 15) {
2123 DWConvMicrokernelTester()
2124 .cr(16)
2125 .kr(9)
2126 .channels(channels)
2127 .width(3)
2128 .input_zero_point(0)
2129 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08002130 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002131 }
2132 }
2133
2134 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, input_offset) {
2135 TEST_REQUIRES_X86_SSE2;
2136 for (uint32_t channels = 32; channels < 256; channels += 48) {
2137 DWConvMicrokernelTester()
2138 .cr(16)
2139 .kr(9)
2140 .channels(channels)
2141 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08002142 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002143 }
2144 }
2145
2146 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, zero) {
2147 TEST_REQUIRES_X86_SSE2;
2148 for (uint32_t mz = 0; mz < 9; mz++) {
2149 for (uint32_t channels = 32; channels < 256; channels += 48) {
2150 DWConvMicrokernelTester()
2151 .cr(16)
2152 .kr(9)
2153 .channels(channels)
2154 .input_offset(304)
2155 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002156 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002157 }
2158 }
2159 }
2160#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2161
2162
2163#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2164 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_eq_8) {
2165 TEST_REQUIRES_X86_SSE41;
2166 DWConvMicrokernelTester()
2167 .cr(8)
2168 .kr(9)
2169 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002170 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002171 }
2172
2173 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8) {
2174 TEST_REQUIRES_X86_SSE41;
2175 for (uint32_t channels = 16; channels < 128; channels += 24) {
2176 DWConvMicrokernelTester()
2177 .cr(8)
2178 .kr(9)
2179 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002180 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002181 }
2182 }
2183
2184 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
2185 TEST_REQUIRES_X86_SSE41;
2186 for (uint32_t channels = 16; channels < 128; channels += 24) {
2187 DWConvMicrokernelTester()
2188 .cr(8)
2189 .kr(9)
2190 .channels(channels)
2191 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002192 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002193 }
2194 }
2195
2196 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
2197 TEST_REQUIRES_X86_SSE41;
2198 for (uint32_t channels = 16; channels < 128; channels += 24) {
2199 DWConvMicrokernelTester()
2200 .cr(8)
2201 .kr(9)
2202 .channels(channels)
2203 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002204 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002205 }
2206 }
2207
2208 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_lt_8) {
2209 TEST_REQUIRES_X86_SSE41;
2210 for (uint32_t channels = 1; channels < 8; channels++) {
2211 DWConvMicrokernelTester()
2212 .cr(8)
2213 .kr(9)
2214 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002215 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002216 }
2217 }
2218
2219 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8) {
2220 TEST_REQUIRES_X86_SSE41;
2221 for (uint32_t channels = 9; channels < 16; channels++) {
2222 DWConvMicrokernelTester()
2223 .cr(8)
2224 .kr(9)
2225 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002226 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002227 }
2228 }
2229
2230 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
2231 TEST_REQUIRES_X86_SSE41;
2232 for (uint32_t channels = 9; channels < 16; channels++) {
2233 DWConvMicrokernelTester()
2234 .cr(8)
2235 .kr(9)
2236 .channels(channels)
2237 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002238 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002239 }
2240 }
2241
2242 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
2243 TEST_REQUIRES_X86_SSE41;
2244 for (uint32_t channels = 9; channels < 16; channels++) {
2245 DWConvMicrokernelTester()
2246 .cr(8)
2247 .kr(9)
2248 .channels(channels)
2249 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002250 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002251 }
2252 }
2253
2254 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel) {
2255 TEST_REQUIRES_X86_SSE41;
2256 for (size_t channels = 1; channels <= 40; channels += 7) {
2257 DWConvMicrokernelTester()
2258 .cr(8)
2259 .kr(9)
2260 .channels(channels)
2261 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002262 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002263 }
2264 }
2265
2266 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_step) {
2267 TEST_REQUIRES_X86_SSE41;
2268 for (size_t channels = 1; channels <= 40; channels += 7) {
2269 for (size_t step = 2; step <= 9; step++) {
2270 DWConvMicrokernelTester()
2271 .cr(8)
2272 .kr(9)
2273 .channels(channels)
2274 .width(3)
2275 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002276 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002277 }
2278 }
2279 }
2280
2281 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
2282 TEST_REQUIRES_X86_SSE41;
2283 for (size_t channels = 1; channels <= 40; channels += 7) {
2284 DWConvMicrokernelTester()
2285 .cr(8)
2286 .kr(9)
2287 .channels(8)
2288 .width(5)
2289 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002290 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002291 }
2292 }
2293
2294 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
2295 TEST_REQUIRES_X86_SSE41;
2296 for (size_t channels = 1; channels <= 40; channels += 7) {
2297 DWConvMicrokernelTester()
2298 .cr(8)
2299 .kr(9)
2300 .channels(channels)
2301 .width(3)
2302 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002303 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002304 }
2305 }
2306
2307 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
2308 TEST_REQUIRES_X86_SSE41;
2309 for (size_t channels = 1; channels <= 40; channels += 7) {
2310 DWConvMicrokernelTester()
2311 .cr(8)
2312 .kr(9)
2313 .channels(channels)
2314 .width(3)
2315 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002316 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002317 }
2318 }
2319
2320 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, input_zero_point_only) {
2321 TEST_REQUIRES_X86_SSE41;
2322 for (size_t channels = 1; channels <= 40; channels += 7) {
2323 DWConvMicrokernelTester()
2324 .cr(8)
2325 .kr(9)
2326 .channels(channels)
2327 .width(3)
2328 .input_zero_point(255)
2329 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002330 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002331 }
2332 }
2333
2334 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, kernel_zero_point_only) {
2335 TEST_REQUIRES_X86_SSE41;
2336 for (size_t channels = 1; channels <= 40; channels += 7) {
2337 DWConvMicrokernelTester()
2338 .cr(8)
2339 .kr(9)
2340 .channels(channels)
2341 .width(3)
2342 .input_zero_point(0)
2343 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08002344 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002345 }
2346 }
2347
2348 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, input_offset) {
2349 TEST_REQUIRES_X86_SSE41;
2350 for (uint32_t channels = 16; channels < 128; channels += 24) {
2351 DWConvMicrokernelTester()
2352 .cr(8)
2353 .kr(9)
2354 .channels(channels)
2355 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08002356 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002357 }
2358 }
2359
2360 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, zero) {
2361 TEST_REQUIRES_X86_SSE41;
2362 for (uint32_t mz = 0; mz < 9; mz++) {
2363 for (uint32_t channels = 16; channels < 128; channels += 24) {
2364 DWConvMicrokernelTester()
2365 .cr(8)
2366 .kr(9)
2367 .channels(channels)
2368 .input_offset(176)
2369 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002370 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002371 }
2372 }
2373 }
2374#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2375
2376
2377#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2378 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_eq_16) {
2379 TEST_REQUIRES_X86_SSE41;
2380 DWConvMicrokernelTester()
2381 .cr(16)
2382 .kr(9)
2383 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002384 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002385 }
2386
2387 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16) {
2388 TEST_REQUIRES_X86_SSE41;
2389 for (uint32_t channels = 32; channels < 256; channels += 48) {
2390 DWConvMicrokernelTester()
2391 .cr(16)
2392 .kr(9)
2393 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002394 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002395 }
2396 }
2397
2398 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
2399 TEST_REQUIRES_X86_SSE41;
2400 for (uint32_t channels = 32; channels < 256; channels += 48) {
2401 DWConvMicrokernelTester()
2402 .cr(16)
2403 .kr(9)
2404 .channels(channels)
2405 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002406 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002407 }
2408 }
2409
2410 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
2411 TEST_REQUIRES_X86_SSE41;
2412 for (uint32_t channels = 32; channels < 256; channels += 48) {
2413 DWConvMicrokernelTester()
2414 .cr(16)
2415 .kr(9)
2416 .channels(channels)
2417 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002418 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002419 }
2420 }
2421
2422 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_lt_16) {
2423 TEST_REQUIRES_X86_SSE41;
2424 for (uint32_t channels = 1; channels < 16; channels++) {
2425 DWConvMicrokernelTester()
2426 .cr(16)
2427 .kr(9)
2428 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002429 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002430 }
2431 }
2432
2433 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16) {
2434 TEST_REQUIRES_X86_SSE41;
2435 for (uint32_t channels = 17; channels < 32; channels++) {
2436 DWConvMicrokernelTester()
2437 .cr(16)
2438 .kr(9)
2439 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002440 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002441 }
2442 }
2443
2444 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
2445 TEST_REQUIRES_X86_SSE41;
2446 for (uint32_t channels = 17; channels < 32; channels++) {
2447 DWConvMicrokernelTester()
2448 .cr(16)
2449 .kr(9)
2450 .channels(channels)
2451 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002452 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002453 }
2454 }
2455
2456 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
2457 TEST_REQUIRES_X86_SSE41;
2458 for (uint32_t channels = 17; channels < 32; channels++) {
2459 DWConvMicrokernelTester()
2460 .cr(16)
2461 .kr(9)
2462 .channels(channels)
2463 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002464 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002465 }
2466 }
2467
2468 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel) {
2469 TEST_REQUIRES_X86_SSE41;
2470 for (size_t channels = 1; channels <= 80; channels += 15) {
2471 DWConvMicrokernelTester()
2472 .cr(16)
2473 .kr(9)
2474 .channels(channels)
2475 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002476 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002477 }
2478 }
2479
2480 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_step) {
2481 TEST_REQUIRES_X86_SSE41;
2482 for (size_t channels = 1; channels <= 80; channels += 15) {
2483 for (size_t step = 2; step <= 9; step++) {
2484 DWConvMicrokernelTester()
2485 .cr(16)
2486 .kr(9)
2487 .channels(channels)
2488 .width(3)
2489 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002490 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002491 }
2492 }
2493 }
2494
2495 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
2496 TEST_REQUIRES_X86_SSE41;
2497 for (size_t channels = 1; channels <= 80; channels += 15) {
2498 DWConvMicrokernelTester()
2499 .cr(16)
2500 .kr(9)
2501 .channels(16)
2502 .width(5)
2503 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002504 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002505 }
2506 }
2507
2508 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
2509 TEST_REQUIRES_X86_SSE41;
2510 for (size_t channels = 1; channels <= 80; channels += 15) {
2511 DWConvMicrokernelTester()
2512 .cr(16)
2513 .kr(9)
2514 .channels(channels)
2515 .width(3)
2516 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002517 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002518 }
2519 }
2520
2521 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
2522 TEST_REQUIRES_X86_SSE41;
2523 for (size_t channels = 1; channels <= 80; channels += 15) {
2524 DWConvMicrokernelTester()
2525 .cr(16)
2526 .kr(9)
2527 .channels(channels)
2528 .width(3)
2529 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002530 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002531 }
2532 }
2533
2534 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, input_zero_point_only) {
2535 TEST_REQUIRES_X86_SSE41;
2536 for (size_t channels = 1; channels <= 80; channels += 15) {
2537 DWConvMicrokernelTester()
2538 .cr(16)
2539 .kr(9)
2540 .channels(channels)
2541 .width(3)
2542 .input_zero_point(255)
2543 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002544 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002545 }
2546 }
2547
2548 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, kernel_zero_point_only) {
2549 TEST_REQUIRES_X86_SSE41;
2550 for (size_t channels = 1; channels <= 80; channels += 15) {
2551 DWConvMicrokernelTester()
2552 .cr(16)
2553 .kr(9)
2554 .channels(channels)
2555 .width(3)
2556 .input_zero_point(0)
2557 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08002558 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002559 }
2560 }
2561
2562 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, input_offset) {
2563 TEST_REQUIRES_X86_SSE41;
2564 for (uint32_t channels = 32; channels < 256; channels += 48) {
2565 DWConvMicrokernelTester()
2566 .cr(16)
2567 .kr(9)
2568 .channels(channels)
2569 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08002570 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002571 }
2572 }
2573
2574 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, zero) {
2575 TEST_REQUIRES_X86_SSE41;
2576 for (uint32_t mz = 0; mz < 9; mz++) {
2577 for (uint32_t channels = 32; channels < 256; channels += 48) {
2578 DWConvMicrokernelTester()
2579 .cr(16)
2580 .kr(9)
2581 .channels(channels)
2582 .input_offset(304)
2583 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002584 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002585 }
2586 }
2587 }
2588#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2589
2590
2591#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2592 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_eq_8) {
2593 TEST_REQUIRES_X86_AVX;
2594 DWConvMicrokernelTester()
2595 .cr(8)
2596 .kr(9)
2597 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002598 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002599 }
2600
2601 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8) {
2602 TEST_REQUIRES_X86_AVX;
2603 for (uint32_t channels = 16; channels < 128; channels += 24) {
2604 DWConvMicrokernelTester()
2605 .cr(8)
2606 .kr(9)
2607 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002608 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002609 }
2610 }
2611
2612 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
2613 TEST_REQUIRES_X86_AVX;
2614 for (uint32_t channels = 16; channels < 128; channels += 24) {
2615 DWConvMicrokernelTester()
2616 .cr(8)
2617 .kr(9)
2618 .channels(channels)
2619 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002620 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002621 }
2622 }
2623
2624 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
2625 TEST_REQUIRES_X86_AVX;
2626 for (uint32_t channels = 16; channels < 128; channels += 24) {
2627 DWConvMicrokernelTester()
2628 .cr(8)
2629 .kr(9)
2630 .channels(channels)
2631 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002632 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002633 }
2634 }
2635
2636 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_lt_8) {
2637 TEST_REQUIRES_X86_AVX;
2638 for (uint32_t channels = 1; channels < 8; channels++) {
2639 DWConvMicrokernelTester()
2640 .cr(8)
2641 .kr(9)
2642 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002643 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002644 }
2645 }
2646
2647 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8) {
2648 TEST_REQUIRES_X86_AVX;
2649 for (uint32_t channels = 9; channels < 16; channels++) {
2650 DWConvMicrokernelTester()
2651 .cr(8)
2652 .kr(9)
2653 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002654 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002655 }
2656 }
2657
2658 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
2659 TEST_REQUIRES_X86_AVX;
2660 for (uint32_t channels = 9; channels < 16; channels++) {
2661 DWConvMicrokernelTester()
2662 .cr(8)
2663 .kr(9)
2664 .channels(channels)
2665 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002666 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002667 }
2668 }
2669
2670 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
2671 TEST_REQUIRES_X86_AVX;
2672 for (uint32_t channels = 9; channels < 16; channels++) {
2673 DWConvMicrokernelTester()
2674 .cr(8)
2675 .kr(9)
2676 .channels(channels)
2677 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002678 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002679 }
2680 }
2681
2682 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel) {
2683 TEST_REQUIRES_X86_AVX;
2684 for (size_t channels = 1; channels <= 40; channels += 7) {
2685 DWConvMicrokernelTester()
2686 .cr(8)
2687 .kr(9)
2688 .channels(channels)
2689 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002690 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002691 }
2692 }
2693
2694 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_step) {
2695 TEST_REQUIRES_X86_AVX;
2696 for (size_t channels = 1; channels <= 40; channels += 7) {
2697 for (size_t step = 2; step <= 9; step++) {
2698 DWConvMicrokernelTester()
2699 .cr(8)
2700 .kr(9)
2701 .channels(channels)
2702 .width(3)
2703 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002704 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002705 }
2706 }
2707 }
2708
2709 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
2710 TEST_REQUIRES_X86_AVX;
2711 for (size_t channels = 1; channels <= 40; channels += 7) {
2712 DWConvMicrokernelTester()
2713 .cr(8)
2714 .kr(9)
2715 .channels(8)
2716 .width(5)
2717 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002718 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002719 }
2720 }
2721
2722 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmin) {
2723 TEST_REQUIRES_X86_AVX;
2724 for (size_t channels = 1; channels <= 40; channels += 7) {
2725 DWConvMicrokernelTester()
2726 .cr(8)
2727 .kr(9)
2728 .channels(channels)
2729 .width(3)
2730 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002731 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002732 }
2733 }
2734
2735 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmax) {
2736 TEST_REQUIRES_X86_AVX;
2737 for (size_t channels = 1; channels <= 40; channels += 7) {
2738 DWConvMicrokernelTester()
2739 .cr(8)
2740 .kr(9)
2741 .channels(channels)
2742 .width(3)
2743 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002744 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002745 }
2746 }
2747
2748 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, input_zero_point_only) {
2749 TEST_REQUIRES_X86_AVX;
2750 for (size_t channels = 1; channels <= 40; channels += 7) {
2751 DWConvMicrokernelTester()
2752 .cr(8)
2753 .kr(9)
2754 .channels(channels)
2755 .width(3)
2756 .input_zero_point(255)
2757 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002758 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002759 }
2760 }
2761
2762 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, kernel_zero_point_only) {
2763 TEST_REQUIRES_X86_AVX;
2764 for (size_t channels = 1; channels <= 40; channels += 7) {
2765 DWConvMicrokernelTester()
2766 .cr(8)
2767 .kr(9)
2768 .channels(channels)
2769 .width(3)
2770 .input_zero_point(0)
2771 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08002772 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002773 }
2774 }
2775
2776 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, input_offset) {
2777 TEST_REQUIRES_X86_AVX;
2778 for (uint32_t channels = 16; channels < 128; channels += 24) {
2779 DWConvMicrokernelTester()
2780 .cr(8)
2781 .kr(9)
2782 .channels(channels)
2783 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08002784 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002785 }
2786 }
2787
2788 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, zero) {
2789 TEST_REQUIRES_X86_AVX;
2790 for (uint32_t mz = 0; mz < 9; mz++) {
2791 for (uint32_t channels = 16; channels < 128; channels += 24) {
2792 DWConvMicrokernelTester()
2793 .cr(8)
2794 .kr(9)
2795 .channels(channels)
2796 .input_offset(176)
2797 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002798 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002799 }
2800 }
2801 }
2802#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2803
2804
2805#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2806 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_eq_16) {
2807 TEST_REQUIRES_X86_AVX;
2808 DWConvMicrokernelTester()
2809 .cr(16)
2810 .kr(9)
2811 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002812 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002813 }
2814
2815 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16) {
2816 TEST_REQUIRES_X86_AVX;
2817 for (uint32_t channels = 32; channels < 256; channels += 48) {
2818 DWConvMicrokernelTester()
2819 .cr(16)
2820 .kr(9)
2821 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002822 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002823 }
2824 }
2825
2826 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
2827 TEST_REQUIRES_X86_AVX;
2828 for (uint32_t channels = 32; channels < 256; channels += 48) {
2829 DWConvMicrokernelTester()
2830 .cr(16)
2831 .kr(9)
2832 .channels(channels)
2833 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002834 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002835 }
2836 }
2837
2838 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
2839 TEST_REQUIRES_X86_AVX;
2840 for (uint32_t channels = 32; channels < 256; channels += 48) {
2841 DWConvMicrokernelTester()
2842 .cr(16)
2843 .kr(9)
2844 .channels(channels)
2845 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002846 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002847 }
2848 }
2849
2850 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_lt_16) {
2851 TEST_REQUIRES_X86_AVX;
2852 for (uint32_t channels = 1; channels < 16; channels++) {
2853 DWConvMicrokernelTester()
2854 .cr(16)
2855 .kr(9)
2856 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002857 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002858 }
2859 }
2860
2861 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16) {
2862 TEST_REQUIRES_X86_AVX;
2863 for (uint32_t channels = 17; channels < 32; channels++) {
2864 DWConvMicrokernelTester()
2865 .cr(16)
2866 .kr(9)
2867 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002868 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002869 }
2870 }
2871
2872 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
2873 TEST_REQUIRES_X86_AVX;
2874 for (uint32_t channels = 17; channels < 32; channels++) {
2875 DWConvMicrokernelTester()
2876 .cr(16)
2877 .kr(9)
2878 .channels(channels)
2879 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002880 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002881 }
2882 }
2883
2884 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
2885 TEST_REQUIRES_X86_AVX;
2886 for (uint32_t channels = 17; channels < 32; channels++) {
2887 DWConvMicrokernelTester()
2888 .cr(16)
2889 .kr(9)
2890 .channels(channels)
2891 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002892 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002893 }
2894 }
2895
2896 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel) {
2897 TEST_REQUIRES_X86_AVX;
2898 for (size_t channels = 1; channels <= 80; channels += 15) {
2899 DWConvMicrokernelTester()
2900 .cr(16)
2901 .kr(9)
2902 .channels(channels)
2903 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002904 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002905 }
2906 }
2907
2908 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_step) {
2909 TEST_REQUIRES_X86_AVX;
2910 for (size_t channels = 1; channels <= 80; channels += 15) {
2911 for (size_t step = 2; step <= 9; step++) {
2912 DWConvMicrokernelTester()
2913 .cr(16)
2914 .kr(9)
2915 .channels(channels)
2916 .width(3)
2917 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002918 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002919 }
2920 }
2921 }
2922
2923 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
2924 TEST_REQUIRES_X86_AVX;
2925 for (size_t channels = 1; channels <= 80; channels += 15) {
2926 DWConvMicrokernelTester()
2927 .cr(16)
2928 .kr(9)
2929 .channels(16)
2930 .width(5)
2931 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002932 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002933 }
2934 }
2935
2936 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmin) {
2937 TEST_REQUIRES_X86_AVX;
2938 for (size_t channels = 1; channels <= 80; channels += 15) {
2939 DWConvMicrokernelTester()
2940 .cr(16)
2941 .kr(9)
2942 .channels(channels)
2943 .width(3)
2944 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002945 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002946 }
2947 }
2948
2949 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmax) {
2950 TEST_REQUIRES_X86_AVX;
2951 for (size_t channels = 1; channels <= 80; channels += 15) {
2952 DWConvMicrokernelTester()
2953 .cr(16)
2954 .kr(9)
2955 .channels(channels)
2956 .width(3)
2957 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002958 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002959 }
2960 }
2961
2962 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, input_zero_point_only) {
2963 TEST_REQUIRES_X86_AVX;
2964 for (size_t channels = 1; channels <= 80; channels += 15) {
2965 DWConvMicrokernelTester()
2966 .cr(16)
2967 .kr(9)
2968 .channels(channels)
2969 .width(3)
2970 .input_zero_point(255)
2971 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002972 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002973 }
2974 }
2975
2976 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, kernel_zero_point_only) {
2977 TEST_REQUIRES_X86_AVX;
2978 for (size_t channels = 1; channels <= 80; channels += 15) {
2979 DWConvMicrokernelTester()
2980 .cr(16)
2981 .kr(9)
2982 .channels(channels)
2983 .width(3)
2984 .input_zero_point(0)
2985 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08002986 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002987 }
2988 }
2989
2990 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, input_offset) {
2991 TEST_REQUIRES_X86_AVX;
2992 for (uint32_t channels = 32; channels < 256; channels += 48) {
2993 DWConvMicrokernelTester()
2994 .cr(16)
2995 .kr(9)
2996 .channels(channels)
2997 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08002998 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07002999 }
3000 }
3001
3002 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, zero) {
3003 TEST_REQUIRES_X86_AVX;
3004 for (uint32_t mz = 0; mz < 9; mz++) {
3005 for (uint32_t channels = 32; channels < 256; channels += 48) {
3006 DWConvMicrokernelTester()
3007 .cr(16)
3008 .kr(9)
3009 .channels(channels)
3010 .input_offset(304)
3011 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003012 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07003013 }
3014 }
3015 }
3016#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3017
3018
3019#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003020 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_eq_8) {
3021 TEST_REQUIRES_X86_SSE41;
3022 DWConvMicrokernelTester()
3023 .cr(8)
3024 .kr(9)
3025 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003026 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003027 }
3028
3029 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8) {
3030 TEST_REQUIRES_X86_SSE41;
3031 for (uint32_t channels = 16; channels < 128; channels += 24) {
3032 DWConvMicrokernelTester()
3033 .cr(8)
3034 .kr(9)
3035 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003036 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003037 }
3038 }
3039
3040 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
3041 TEST_REQUIRES_X86_SSE41;
3042 for (uint32_t channels = 16; channels < 128; channels += 24) {
3043 DWConvMicrokernelTester()
3044 .cr(8)
3045 .kr(9)
3046 .channels(channels)
3047 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003048 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003049 }
3050 }
3051
3052 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
3053 TEST_REQUIRES_X86_SSE41;
3054 for (uint32_t channels = 16; channels < 128; channels += 24) {
3055 DWConvMicrokernelTester()
3056 .cr(8)
3057 .kr(9)
3058 .channels(channels)
3059 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003060 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003061 }
3062 }
3063
3064 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_lt_8) {
3065 TEST_REQUIRES_X86_SSE41;
3066 for (uint32_t channels = 1; channels < 8; channels++) {
3067 DWConvMicrokernelTester()
3068 .cr(8)
3069 .kr(9)
3070 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003071 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003072 }
3073 }
3074
3075 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8) {
3076 TEST_REQUIRES_X86_SSE41;
3077 for (uint32_t channels = 9; channels < 16; channels++) {
3078 DWConvMicrokernelTester()
3079 .cr(8)
3080 .kr(9)
3081 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003082 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003083 }
3084 }
3085
3086 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
3087 TEST_REQUIRES_X86_SSE41;
3088 for (uint32_t channels = 9; channels < 16; channels++) {
3089 DWConvMicrokernelTester()
3090 .cr(8)
3091 .kr(9)
3092 .channels(channels)
3093 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003094 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003095 }
3096 }
3097
3098 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
3099 TEST_REQUIRES_X86_SSE41;
3100 for (uint32_t channels = 9; channels < 16; channels++) {
3101 DWConvMicrokernelTester()
3102 .cr(8)
3103 .kr(9)
3104 .channels(channels)
3105 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003106 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003107 }
3108 }
3109
3110 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel) {
3111 TEST_REQUIRES_X86_SSE41;
3112 for (size_t channels = 1; channels <= 40; channels += 7) {
3113 DWConvMicrokernelTester()
3114 .cr(8)
3115 .kr(9)
3116 .channels(channels)
3117 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003118 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003119 }
3120 }
3121
3122 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_step) {
3123 TEST_REQUIRES_X86_SSE41;
3124 for (size_t channels = 1; channels <= 40; channels += 7) {
3125 for (size_t step = 2; step <= 9; step++) {
3126 DWConvMicrokernelTester()
3127 .cr(8)
3128 .kr(9)
3129 .channels(channels)
3130 .width(3)
3131 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003132 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003133 }
3134 }
3135 }
3136
3137 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
3138 TEST_REQUIRES_X86_SSE41;
3139 for (size_t channels = 1; channels <= 40; channels += 7) {
3140 DWConvMicrokernelTester()
3141 .cr(8)
3142 .kr(9)
3143 .channels(8)
3144 .width(5)
3145 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003146 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003147 }
3148 }
3149
3150 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
3151 TEST_REQUIRES_X86_SSE41;
3152 for (size_t channels = 1; channels <= 40; channels += 7) {
3153 DWConvMicrokernelTester()
3154 .cr(8)
3155 .kr(9)
3156 .channels(channels)
3157 .width(3)
3158 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003159 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003160 }
3161 }
3162
3163 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
3164 TEST_REQUIRES_X86_SSE41;
3165 for (size_t channels = 1; channels <= 40; channels += 7) {
3166 DWConvMicrokernelTester()
3167 .cr(8)
3168 .kr(9)
3169 .channels(channels)
3170 .width(3)
3171 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003172 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003173 }
3174 }
3175
3176 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, input_zero_point_only) {
3177 TEST_REQUIRES_X86_SSE41;
3178 for (size_t channels = 1; channels <= 40; channels += 7) {
3179 DWConvMicrokernelTester()
3180 .cr(8)
3181 .kr(9)
3182 .channels(channels)
3183 .width(3)
3184 .input_zero_point(255)
3185 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003186 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003187 }
3188 }
3189
3190 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, kernel_zero_point_only) {
3191 TEST_REQUIRES_X86_SSE41;
3192 for (size_t channels = 1; channels <= 40; channels += 7) {
3193 DWConvMicrokernelTester()
3194 .cr(8)
3195 .kr(9)
3196 .channels(channels)
3197 .width(3)
3198 .input_zero_point(0)
3199 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08003200 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003201 }
3202 }
3203
3204 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, input_offset) {
3205 TEST_REQUIRES_X86_SSE41;
3206 for (uint32_t channels = 16; channels < 128; channels += 24) {
3207 DWConvMicrokernelTester()
3208 .cr(8)
3209 .kr(9)
3210 .channels(channels)
3211 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08003212 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003213 }
3214 }
3215
3216 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, zero) {
3217 TEST_REQUIRES_X86_SSE41;
3218 for (uint32_t mz = 0; mz < 9; mz++) {
3219 for (uint32_t channels = 16; channels < 128; channels += 24) {
3220 DWConvMicrokernelTester()
3221 .cr(8)
3222 .kr(9)
3223 .channels(channels)
3224 .input_offset(176)
3225 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003226 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003227 }
3228 }
3229 }
3230#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3231
3232
3233#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3234 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_eq_16) {
3235 TEST_REQUIRES_X86_SSE41;
3236 DWConvMicrokernelTester()
3237 .cr(16)
3238 .kr(9)
3239 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003240 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003241 }
3242
3243 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16) {
3244 TEST_REQUIRES_X86_SSE41;
3245 for (uint32_t channels = 32; channels < 256; channels += 48) {
3246 DWConvMicrokernelTester()
3247 .cr(16)
3248 .kr(9)
3249 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003250 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003251 }
3252 }
3253
3254 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
3255 TEST_REQUIRES_X86_SSE41;
3256 for (uint32_t channels = 32; channels < 256; channels += 48) {
3257 DWConvMicrokernelTester()
3258 .cr(16)
3259 .kr(9)
3260 .channels(channels)
3261 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003262 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003263 }
3264 }
3265
3266 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
3267 TEST_REQUIRES_X86_SSE41;
3268 for (uint32_t channels = 32; channels < 256; channels += 48) {
3269 DWConvMicrokernelTester()
3270 .cr(16)
3271 .kr(9)
3272 .channels(channels)
3273 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003274 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003275 }
3276 }
3277
3278 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_lt_16) {
3279 TEST_REQUIRES_X86_SSE41;
3280 for (uint32_t channels = 1; channels < 16; channels++) {
3281 DWConvMicrokernelTester()
3282 .cr(16)
3283 .kr(9)
3284 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003285 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003286 }
3287 }
3288
3289 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16) {
3290 TEST_REQUIRES_X86_SSE41;
3291 for (uint32_t channels = 17; channels < 32; channels++) {
3292 DWConvMicrokernelTester()
3293 .cr(16)
3294 .kr(9)
3295 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003296 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003297 }
3298 }
3299
3300 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
3301 TEST_REQUIRES_X86_SSE41;
3302 for (uint32_t channels = 17; channels < 32; channels++) {
3303 DWConvMicrokernelTester()
3304 .cr(16)
3305 .kr(9)
3306 .channels(channels)
3307 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003308 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003309 }
3310 }
3311
3312 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
3313 TEST_REQUIRES_X86_SSE41;
3314 for (uint32_t channels = 17; channels < 32; channels++) {
3315 DWConvMicrokernelTester()
3316 .cr(16)
3317 .kr(9)
3318 .channels(channels)
3319 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003320 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003321 }
3322 }
3323
3324 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel) {
3325 TEST_REQUIRES_X86_SSE41;
3326 for (size_t channels = 1; channels <= 80; channels += 15) {
3327 DWConvMicrokernelTester()
3328 .cr(16)
3329 .kr(9)
3330 .channels(channels)
3331 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003332 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003333 }
3334 }
3335
3336 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_step) {
3337 TEST_REQUIRES_X86_SSE41;
3338 for (size_t channels = 1; channels <= 80; channels += 15) {
3339 for (size_t step = 2; step <= 9; step++) {
3340 DWConvMicrokernelTester()
3341 .cr(16)
3342 .kr(9)
3343 .channels(channels)
3344 .width(3)
3345 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003346 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003347 }
3348 }
3349 }
3350
3351 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
3352 TEST_REQUIRES_X86_SSE41;
3353 for (size_t channels = 1; channels <= 80; channels += 15) {
3354 DWConvMicrokernelTester()
3355 .cr(16)
3356 .kr(9)
3357 .channels(16)
3358 .width(5)
3359 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003360 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003361 }
3362 }
3363
3364 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
3365 TEST_REQUIRES_X86_SSE41;
3366 for (size_t channels = 1; channels <= 80; channels += 15) {
3367 DWConvMicrokernelTester()
3368 .cr(16)
3369 .kr(9)
3370 .channels(channels)
3371 .width(3)
3372 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003373 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003374 }
3375 }
3376
3377 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
3378 TEST_REQUIRES_X86_SSE41;
3379 for (size_t channels = 1; channels <= 80; channels += 15) {
3380 DWConvMicrokernelTester()
3381 .cr(16)
3382 .kr(9)
3383 .channels(channels)
3384 .width(3)
3385 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003386 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003387 }
3388 }
3389
3390 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, input_zero_point_only) {
3391 TEST_REQUIRES_X86_SSE41;
3392 for (size_t channels = 1; channels <= 80; channels += 15) {
3393 DWConvMicrokernelTester()
3394 .cr(16)
3395 .kr(9)
3396 .channels(channels)
3397 .width(3)
3398 .input_zero_point(255)
3399 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003400 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003401 }
3402 }
3403
3404 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, kernel_zero_point_only) {
3405 TEST_REQUIRES_X86_SSE41;
3406 for (size_t channels = 1; channels <= 80; channels += 15) {
3407 DWConvMicrokernelTester()
3408 .cr(16)
3409 .kr(9)
3410 .channels(channels)
3411 .width(3)
3412 .input_zero_point(0)
3413 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08003414 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003415 }
3416 }
3417
3418 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, input_offset) {
3419 TEST_REQUIRES_X86_SSE41;
3420 for (uint32_t channels = 32; channels < 256; channels += 48) {
3421 DWConvMicrokernelTester()
3422 .cr(16)
3423 .kr(9)
3424 .channels(channels)
3425 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08003426 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003427 }
3428 }
3429
3430 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, zero) {
3431 TEST_REQUIRES_X86_SSE41;
3432 for (uint32_t mz = 0; mz < 9; mz++) {
3433 for (uint32_t channels = 32; channels < 256; channels += 48) {
3434 DWConvMicrokernelTester()
3435 .cr(16)
3436 .kr(9)
3437 .channels(channels)
3438 .input_offset(304)
3439 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003440 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003441 }
3442 }
3443 }
3444#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3445
3446
3447#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003448 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_eq_8) {
3449 TEST_REQUIRES_X86_AVX;
3450 DWConvMicrokernelTester()
3451 .cr(8)
3452 .kr(9)
3453 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003454 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003455 }
3456
3457 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8) {
3458 TEST_REQUIRES_X86_AVX;
3459 for (uint32_t channels = 16; channels < 128; channels += 24) {
3460 DWConvMicrokernelTester()
3461 .cr(8)
3462 .kr(9)
3463 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003464 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003465 }
3466 }
3467
3468 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
3469 TEST_REQUIRES_X86_AVX;
3470 for (uint32_t channels = 16; channels < 128; channels += 24) {
3471 DWConvMicrokernelTester()
3472 .cr(8)
3473 .kr(9)
3474 .channels(channels)
3475 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003476 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003477 }
3478 }
3479
3480 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
3481 TEST_REQUIRES_X86_AVX;
3482 for (uint32_t channels = 16; channels < 128; channels += 24) {
3483 DWConvMicrokernelTester()
3484 .cr(8)
3485 .kr(9)
3486 .channels(channels)
3487 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003488 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003489 }
3490 }
3491
3492 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_lt_8) {
3493 TEST_REQUIRES_X86_AVX;
3494 for (uint32_t channels = 1; channels < 8; channels++) {
3495 DWConvMicrokernelTester()
3496 .cr(8)
3497 .kr(9)
3498 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003499 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003500 }
3501 }
3502
3503 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8) {
3504 TEST_REQUIRES_X86_AVX;
3505 for (uint32_t channels = 9; channels < 16; channels++) {
3506 DWConvMicrokernelTester()
3507 .cr(8)
3508 .kr(9)
3509 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003510 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003511 }
3512 }
3513
3514 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
3515 TEST_REQUIRES_X86_AVX;
3516 for (uint32_t channels = 9; channels < 16; channels++) {
3517 DWConvMicrokernelTester()
3518 .cr(8)
3519 .kr(9)
3520 .channels(channels)
3521 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003522 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003523 }
3524 }
3525
3526 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
3527 TEST_REQUIRES_X86_AVX;
3528 for (uint32_t channels = 9; channels < 16; channels++) {
3529 DWConvMicrokernelTester()
3530 .cr(8)
3531 .kr(9)
3532 .channels(channels)
3533 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003534 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003535 }
3536 }
3537
3538 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel) {
3539 TEST_REQUIRES_X86_AVX;
3540 for (size_t channels = 1; channels <= 40; channels += 7) {
3541 DWConvMicrokernelTester()
3542 .cr(8)
3543 .kr(9)
3544 .channels(channels)
3545 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003546 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003547 }
3548 }
3549
3550 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_step) {
3551 TEST_REQUIRES_X86_AVX;
3552 for (size_t channels = 1; channels <= 40; channels += 7) {
3553 for (size_t step = 2; step <= 9; step++) {
3554 DWConvMicrokernelTester()
3555 .cr(8)
3556 .kr(9)
3557 .channels(channels)
3558 .width(3)
3559 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003560 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003561 }
3562 }
3563 }
3564
3565 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
3566 TEST_REQUIRES_X86_AVX;
3567 for (size_t channels = 1; channels <= 40; channels += 7) {
3568 DWConvMicrokernelTester()
3569 .cr(8)
3570 .kr(9)
3571 .channels(8)
3572 .width(5)
3573 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003574 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003575 }
3576 }
3577
3578 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmin) {
3579 TEST_REQUIRES_X86_AVX;
3580 for (size_t channels = 1; channels <= 40; channels += 7) {
3581 DWConvMicrokernelTester()
3582 .cr(8)
3583 .kr(9)
3584 .channels(channels)
3585 .width(3)
3586 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003587 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003588 }
3589 }
3590
3591 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmax) {
3592 TEST_REQUIRES_X86_AVX;
3593 for (size_t channels = 1; channels <= 40; channels += 7) {
3594 DWConvMicrokernelTester()
3595 .cr(8)
3596 .kr(9)
3597 .channels(channels)
3598 .width(3)
3599 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003600 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003601 }
3602 }
3603
3604 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, input_zero_point_only) {
3605 TEST_REQUIRES_X86_AVX;
3606 for (size_t channels = 1; channels <= 40; channels += 7) {
3607 DWConvMicrokernelTester()
3608 .cr(8)
3609 .kr(9)
3610 .channels(channels)
3611 .width(3)
3612 .input_zero_point(255)
3613 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003614 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003615 }
3616 }
3617
3618 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, kernel_zero_point_only) {
3619 TEST_REQUIRES_X86_AVX;
3620 for (size_t channels = 1; channels <= 40; channels += 7) {
3621 DWConvMicrokernelTester()
3622 .cr(8)
3623 .kr(9)
3624 .channels(channels)
3625 .width(3)
3626 .input_zero_point(0)
3627 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08003628 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003629 }
3630 }
3631
3632 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, input_offset) {
3633 TEST_REQUIRES_X86_AVX;
3634 for (uint32_t channels = 16; channels < 128; channels += 24) {
3635 DWConvMicrokernelTester()
3636 .cr(8)
3637 .kr(9)
3638 .channels(channels)
3639 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08003640 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003641 }
3642 }
3643
3644 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, zero) {
3645 TEST_REQUIRES_X86_AVX;
3646 for (uint32_t mz = 0; mz < 9; mz++) {
3647 for (uint32_t channels = 16; channels < 128; channels += 24) {
3648 DWConvMicrokernelTester()
3649 .cr(8)
3650 .kr(9)
3651 .channels(channels)
3652 .input_offset(176)
3653 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003654 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003655 }
3656 }
3657 }
3658#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3659
3660
3661#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3662 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_eq_16) {
3663 TEST_REQUIRES_X86_AVX;
3664 DWConvMicrokernelTester()
3665 .cr(16)
3666 .kr(9)
3667 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003668 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003669 }
3670
3671 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16) {
3672 TEST_REQUIRES_X86_AVX;
3673 for (uint32_t channels = 32; channels < 256; channels += 48) {
3674 DWConvMicrokernelTester()
3675 .cr(16)
3676 .kr(9)
3677 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003678 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003679 }
3680 }
3681
3682 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
3683 TEST_REQUIRES_X86_AVX;
3684 for (uint32_t channels = 32; channels < 256; channels += 48) {
3685 DWConvMicrokernelTester()
3686 .cr(16)
3687 .kr(9)
3688 .channels(channels)
3689 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003690 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003691 }
3692 }
3693
3694 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
3695 TEST_REQUIRES_X86_AVX;
3696 for (uint32_t channels = 32; channels < 256; channels += 48) {
3697 DWConvMicrokernelTester()
3698 .cr(16)
3699 .kr(9)
3700 .channels(channels)
3701 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003702 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003703 }
3704 }
3705
3706 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_lt_16) {
3707 TEST_REQUIRES_X86_AVX;
3708 for (uint32_t channels = 1; channels < 16; channels++) {
3709 DWConvMicrokernelTester()
3710 .cr(16)
3711 .kr(9)
3712 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003713 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003714 }
3715 }
3716
3717 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16) {
3718 TEST_REQUIRES_X86_AVX;
3719 for (uint32_t channels = 17; channels < 32; channels++) {
3720 DWConvMicrokernelTester()
3721 .cr(16)
3722 .kr(9)
3723 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003724 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003725 }
3726 }
3727
3728 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
3729 TEST_REQUIRES_X86_AVX;
3730 for (uint32_t channels = 17; channels < 32; channels++) {
3731 DWConvMicrokernelTester()
3732 .cr(16)
3733 .kr(9)
3734 .channels(channels)
3735 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003736 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003737 }
3738 }
3739
3740 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
3741 TEST_REQUIRES_X86_AVX;
3742 for (uint32_t channels = 17; channels < 32; channels++) {
3743 DWConvMicrokernelTester()
3744 .cr(16)
3745 .kr(9)
3746 .channels(channels)
3747 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003748 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003749 }
3750 }
3751
3752 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel) {
3753 TEST_REQUIRES_X86_AVX;
3754 for (size_t channels = 1; channels <= 80; channels += 15) {
3755 DWConvMicrokernelTester()
3756 .cr(16)
3757 .kr(9)
3758 .channels(channels)
3759 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003760 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003761 }
3762 }
3763
3764 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_step) {
3765 TEST_REQUIRES_X86_AVX;
3766 for (size_t channels = 1; channels <= 80; channels += 15) {
3767 for (size_t step = 2; step <= 9; step++) {
3768 DWConvMicrokernelTester()
3769 .cr(16)
3770 .kr(9)
3771 .channels(channels)
3772 .width(3)
3773 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003774 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003775 }
3776 }
3777 }
3778
3779 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
3780 TEST_REQUIRES_X86_AVX;
3781 for (size_t channels = 1; channels <= 80; channels += 15) {
3782 DWConvMicrokernelTester()
3783 .cr(16)
3784 .kr(9)
3785 .channels(16)
3786 .width(5)
3787 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003788 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003789 }
3790 }
3791
3792 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmin) {
3793 TEST_REQUIRES_X86_AVX;
3794 for (size_t channels = 1; channels <= 80; channels += 15) {
3795 DWConvMicrokernelTester()
3796 .cr(16)
3797 .kr(9)
3798 .channels(channels)
3799 .width(3)
3800 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003801 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003802 }
3803 }
3804
3805 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmax) {
3806 TEST_REQUIRES_X86_AVX;
3807 for (size_t channels = 1; channels <= 80; channels += 15) {
3808 DWConvMicrokernelTester()
3809 .cr(16)
3810 .kr(9)
3811 .channels(channels)
3812 .width(3)
3813 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003814 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003815 }
3816 }
3817
3818 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, input_zero_point_only) {
3819 TEST_REQUIRES_X86_AVX;
3820 for (size_t channels = 1; channels <= 80; channels += 15) {
3821 DWConvMicrokernelTester()
3822 .cr(16)
3823 .kr(9)
3824 .channels(channels)
3825 .width(3)
3826 .input_zero_point(255)
3827 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003828 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003829 }
3830 }
3831
3832 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, kernel_zero_point_only) {
3833 TEST_REQUIRES_X86_AVX;
3834 for (size_t channels = 1; channels <= 80; channels += 15) {
3835 DWConvMicrokernelTester()
3836 .cr(16)
3837 .kr(9)
3838 .channels(channels)
3839 .width(3)
3840 .input_zero_point(0)
3841 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08003842 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003843 }
3844 }
3845
3846 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, input_offset) {
3847 TEST_REQUIRES_X86_AVX;
3848 for (uint32_t channels = 32; channels < 256; channels += 48) {
3849 DWConvMicrokernelTester()
3850 .cr(16)
3851 .kr(9)
3852 .channels(channels)
3853 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08003854 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003855 }
3856 }
3857
3858 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, zero) {
3859 TEST_REQUIRES_X86_AVX;
3860 for (uint32_t mz = 0; mz < 9; mz++) {
3861 for (uint32_t channels = 32; channels < 256; channels += 48) {
3862 DWConvMicrokernelTester()
3863 .cr(16)
3864 .kr(9)
3865 .channels(channels)
3866 .input_offset(304)
3867 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003868 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003869 }
3870 }
3871 }
3872#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3873
3874
3875#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003876 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_eq_8) {
3877 TEST_REQUIRES_X86_XOP;
3878 DWConvMicrokernelTester()
3879 .cr(8)
3880 .kr(9)
3881 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003882 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003883 }
3884
3885 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8) {
3886 TEST_REQUIRES_X86_XOP;
3887 for (uint32_t channels = 16; channels < 128; channels += 24) {
3888 DWConvMicrokernelTester()
3889 .cr(8)
3890 .kr(9)
3891 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003892 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003893 }
3894 }
3895
3896 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
3897 TEST_REQUIRES_X86_XOP;
3898 for (uint32_t channels = 16; channels < 128; channels += 24) {
3899 DWConvMicrokernelTester()
3900 .cr(8)
3901 .kr(9)
3902 .channels(channels)
3903 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003904 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003905 }
3906 }
3907
3908 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
3909 TEST_REQUIRES_X86_XOP;
3910 for (uint32_t channels = 16; channels < 128; channels += 24) {
3911 DWConvMicrokernelTester()
3912 .cr(8)
3913 .kr(9)
3914 .channels(channels)
3915 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003916 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003917 }
3918 }
3919
3920 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_lt_8) {
3921 TEST_REQUIRES_X86_XOP;
3922 for (uint32_t channels = 1; channels < 8; channels++) {
3923 DWConvMicrokernelTester()
3924 .cr(8)
3925 .kr(9)
3926 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003927 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003928 }
3929 }
3930
3931 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8) {
3932 TEST_REQUIRES_X86_XOP;
3933 for (uint32_t channels = 9; channels < 16; channels++) {
3934 DWConvMicrokernelTester()
3935 .cr(8)
3936 .kr(9)
3937 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003938 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003939 }
3940 }
3941
3942 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
3943 TEST_REQUIRES_X86_XOP;
3944 for (uint32_t channels = 9; channels < 16; channels++) {
3945 DWConvMicrokernelTester()
3946 .cr(8)
3947 .kr(9)
3948 .channels(channels)
3949 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003950 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003951 }
3952 }
3953
3954 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
3955 TEST_REQUIRES_X86_XOP;
3956 for (uint32_t channels = 9; channels < 16; channels++) {
3957 DWConvMicrokernelTester()
3958 .cr(8)
3959 .kr(9)
3960 .channels(channels)
3961 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003962 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003963 }
3964 }
3965
3966 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel) {
3967 TEST_REQUIRES_X86_XOP;
3968 for (size_t channels = 1; channels <= 40; channels += 7) {
3969 DWConvMicrokernelTester()
3970 .cr(8)
3971 .kr(9)
3972 .channels(channels)
3973 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003974 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003975 }
3976 }
3977
3978 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_step) {
3979 TEST_REQUIRES_X86_XOP;
3980 for (size_t channels = 1; channels <= 40; channels += 7) {
3981 for (size_t step = 2; step <= 9; step++) {
3982 DWConvMicrokernelTester()
3983 .cr(8)
3984 .kr(9)
3985 .channels(channels)
3986 .width(3)
3987 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003988 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07003989 }
3990 }
3991 }
3992
3993 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
3994 TEST_REQUIRES_X86_XOP;
3995 for (size_t channels = 1; channels <= 40; channels += 7) {
3996 DWConvMicrokernelTester()
3997 .cr(8)
3998 .kr(9)
3999 .channels(8)
4000 .width(5)
4001 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004002 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004003 }
4004 }
4005
4006 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmin) {
4007 TEST_REQUIRES_X86_XOP;
4008 for (size_t channels = 1; channels <= 40; channels += 7) {
4009 DWConvMicrokernelTester()
4010 .cr(8)
4011 .kr(9)
4012 .channels(channels)
4013 .width(3)
4014 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004015 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004016 }
4017 }
4018
4019 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmax) {
4020 TEST_REQUIRES_X86_XOP;
4021 for (size_t channels = 1; channels <= 40; channels += 7) {
4022 DWConvMicrokernelTester()
4023 .cr(8)
4024 .kr(9)
4025 .channels(channels)
4026 .width(3)
4027 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004028 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004029 }
4030 }
4031
4032 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, input_zero_point_only) {
4033 TEST_REQUIRES_X86_XOP;
4034 for (size_t channels = 1; channels <= 40; channels += 7) {
4035 DWConvMicrokernelTester()
4036 .cr(8)
4037 .kr(9)
4038 .channels(channels)
4039 .width(3)
4040 .input_zero_point(255)
4041 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004042 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004043 }
4044 }
4045
4046 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, kernel_zero_point_only) {
4047 TEST_REQUIRES_X86_XOP;
4048 for (size_t channels = 1; channels <= 40; channels += 7) {
4049 DWConvMicrokernelTester()
4050 .cr(8)
4051 .kr(9)
4052 .channels(channels)
4053 .width(3)
4054 .input_zero_point(0)
4055 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08004056 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004057 }
4058 }
4059
4060 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, input_offset) {
4061 TEST_REQUIRES_X86_XOP;
4062 for (uint32_t channels = 16; channels < 128; channels += 24) {
4063 DWConvMicrokernelTester()
4064 .cr(8)
4065 .kr(9)
4066 .channels(channels)
4067 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08004068 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004069 }
4070 }
4071
4072 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, zero) {
4073 TEST_REQUIRES_X86_XOP;
4074 for (uint32_t mz = 0; mz < 9; mz++) {
4075 for (uint32_t channels = 16; channels < 128; channels += 24) {
4076 DWConvMicrokernelTester()
4077 .cr(8)
4078 .kr(9)
4079 .channels(channels)
4080 .input_offset(176)
4081 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004082 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004083 }
4084 }
4085 }
4086#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4087
4088
4089#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4090 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_eq_16) {
4091 TEST_REQUIRES_X86_XOP;
4092 DWConvMicrokernelTester()
4093 .cr(16)
4094 .kr(9)
4095 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004096 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004097 }
4098
4099 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16) {
4100 TEST_REQUIRES_X86_XOP;
4101 for (uint32_t channels = 32; channels < 256; channels += 48) {
4102 DWConvMicrokernelTester()
4103 .cr(16)
4104 .kr(9)
4105 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004106 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004107 }
4108 }
4109
4110 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
4111 TEST_REQUIRES_X86_XOP;
4112 for (uint32_t channels = 32; channels < 256; channels += 48) {
4113 DWConvMicrokernelTester()
4114 .cr(16)
4115 .kr(9)
4116 .channels(channels)
4117 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004118 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004119 }
4120 }
4121
4122 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
4123 TEST_REQUIRES_X86_XOP;
4124 for (uint32_t channels = 32; channels < 256; channels += 48) {
4125 DWConvMicrokernelTester()
4126 .cr(16)
4127 .kr(9)
4128 .channels(channels)
4129 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004130 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004131 }
4132 }
4133
4134 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_lt_16) {
4135 TEST_REQUIRES_X86_XOP;
4136 for (uint32_t channels = 1; channels < 16; channels++) {
4137 DWConvMicrokernelTester()
4138 .cr(16)
4139 .kr(9)
4140 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004141 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004142 }
4143 }
4144
4145 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16) {
4146 TEST_REQUIRES_X86_XOP;
4147 for (uint32_t channels = 17; channels < 32; channels++) {
4148 DWConvMicrokernelTester()
4149 .cr(16)
4150 .kr(9)
4151 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004152 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004153 }
4154 }
4155
4156 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
4157 TEST_REQUIRES_X86_XOP;
4158 for (uint32_t channels = 17; channels < 32; channels++) {
4159 DWConvMicrokernelTester()
4160 .cr(16)
4161 .kr(9)
4162 .channels(channels)
4163 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004164 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004165 }
4166 }
4167
4168 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
4169 TEST_REQUIRES_X86_XOP;
4170 for (uint32_t channels = 17; channels < 32; channels++) {
4171 DWConvMicrokernelTester()
4172 .cr(16)
4173 .kr(9)
4174 .channels(channels)
4175 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004177 }
4178 }
4179
4180 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel) {
4181 TEST_REQUIRES_X86_XOP;
4182 for (size_t channels = 1; channels <= 80; channels += 15) {
4183 DWConvMicrokernelTester()
4184 .cr(16)
4185 .kr(9)
4186 .channels(channels)
4187 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004188 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004189 }
4190 }
4191
4192 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_step) {
4193 TEST_REQUIRES_X86_XOP;
4194 for (size_t channels = 1; channels <= 80; channels += 15) {
4195 for (size_t step = 2; step <= 9; step++) {
4196 DWConvMicrokernelTester()
4197 .cr(16)
4198 .kr(9)
4199 .channels(channels)
4200 .width(3)
4201 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004202 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004203 }
4204 }
4205 }
4206
4207 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
4208 TEST_REQUIRES_X86_XOP;
4209 for (size_t channels = 1; channels <= 80; channels += 15) {
4210 DWConvMicrokernelTester()
4211 .cr(16)
4212 .kr(9)
4213 .channels(16)
4214 .width(5)
4215 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004216 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004217 }
4218 }
4219
4220 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmin) {
4221 TEST_REQUIRES_X86_XOP;
4222 for (size_t channels = 1; channels <= 80; channels += 15) {
4223 DWConvMicrokernelTester()
4224 .cr(16)
4225 .kr(9)
4226 .channels(channels)
4227 .width(3)
4228 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004229 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004230 }
4231 }
4232
4233 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmax) {
4234 TEST_REQUIRES_X86_XOP;
4235 for (size_t channels = 1; channels <= 80; channels += 15) {
4236 DWConvMicrokernelTester()
4237 .cr(16)
4238 .kr(9)
4239 .channels(channels)
4240 .width(3)
4241 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004242 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004243 }
4244 }
4245
4246 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, input_zero_point_only) {
4247 TEST_REQUIRES_X86_XOP;
4248 for (size_t channels = 1; channels <= 80; channels += 15) {
4249 DWConvMicrokernelTester()
4250 .cr(16)
4251 .kr(9)
4252 .channels(channels)
4253 .width(3)
4254 .input_zero_point(255)
4255 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004256 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004257 }
4258 }
4259
4260 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, kernel_zero_point_only) {
4261 TEST_REQUIRES_X86_XOP;
4262 for (size_t channels = 1; channels <= 80; channels += 15) {
4263 DWConvMicrokernelTester()
4264 .cr(16)
4265 .kr(9)
4266 .channels(channels)
4267 .width(3)
4268 .input_zero_point(0)
4269 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08004270 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004271 }
4272 }
4273
4274 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, input_offset) {
4275 TEST_REQUIRES_X86_XOP;
4276 for (uint32_t channels = 32; channels < 256; channels += 48) {
4277 DWConvMicrokernelTester()
4278 .cr(16)
4279 .kr(9)
4280 .channels(channels)
4281 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08004282 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004283 }
4284 }
4285
4286 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, zero) {
4287 TEST_REQUIRES_X86_XOP;
4288 for (uint32_t mz = 0; mz < 9; mz++) {
4289 for (uint32_t channels = 32; channels < 256; channels += 48) {
4290 DWConvMicrokernelTester()
4291 .cr(16)
4292 .kr(9)
4293 .channels(channels)
4294 .input_offset(304)
4295 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004296 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -07004297 }
4298 }
4299 }
4300#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4301
4302
4303#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09c312b2021-07-09 00:45:04 -07004304 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_eq_8) {
4305 TEST_REQUIRES_X86_AVX2;
4306 DWConvMicrokernelTester()
4307 .cr(8)
4308 .kr(9)
4309 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004310 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004311 }
4312
4313 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8) {
4314 TEST_REQUIRES_X86_AVX2;
4315 for (uint32_t channels = 16; channels < 128; channels += 24) {
4316 DWConvMicrokernelTester()
4317 .cr(8)
4318 .kr(9)
4319 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004320 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004321 }
4322 }
4323
4324 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
4325 TEST_REQUIRES_X86_AVX2;
4326 for (uint32_t channels = 16; channels < 128; channels += 24) {
4327 DWConvMicrokernelTester()
4328 .cr(8)
4329 .kr(9)
4330 .channels(channels)
4331 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004332 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004333 }
4334 }
4335
4336 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
4337 TEST_REQUIRES_X86_AVX2;
4338 for (uint32_t channels = 16; channels < 128; channels += 24) {
4339 DWConvMicrokernelTester()
4340 .cr(8)
4341 .kr(9)
4342 .channels(channels)
4343 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004344 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004345 }
4346 }
4347
4348 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_lt_8) {
4349 TEST_REQUIRES_X86_AVX2;
4350 for (uint32_t channels = 1; channels < 8; channels++) {
4351 DWConvMicrokernelTester()
4352 .cr(8)
4353 .kr(9)
4354 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004355 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004356 }
4357 }
4358
4359 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8) {
4360 TEST_REQUIRES_X86_AVX2;
4361 for (uint32_t channels = 9; channels < 16; channels++) {
4362 DWConvMicrokernelTester()
4363 .cr(8)
4364 .kr(9)
4365 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004366 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004367 }
4368 }
4369
4370 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
4371 TEST_REQUIRES_X86_AVX2;
4372 for (uint32_t channels = 9; channels < 16; channels++) {
4373 DWConvMicrokernelTester()
4374 .cr(8)
4375 .kr(9)
4376 .channels(channels)
4377 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004378 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004379 }
4380 }
4381
4382 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
4383 TEST_REQUIRES_X86_AVX2;
4384 for (uint32_t channels = 9; channels < 16; channels++) {
4385 DWConvMicrokernelTester()
4386 .cr(8)
4387 .kr(9)
4388 .channels(channels)
4389 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004390 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004391 }
4392 }
4393
4394 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel) {
4395 TEST_REQUIRES_X86_AVX2;
4396 for (size_t channels = 1; channels <= 40; channels += 7) {
4397 DWConvMicrokernelTester()
4398 .cr(8)
4399 .kr(9)
4400 .channels(channels)
4401 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004402 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004403 }
4404 }
4405
4406 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_step) {
4407 TEST_REQUIRES_X86_AVX2;
4408 for (size_t channels = 1; channels <= 40; channels += 7) {
4409 for (size_t step = 2; step <= 9; step++) {
4410 DWConvMicrokernelTester()
4411 .cr(8)
4412 .kr(9)
4413 .channels(channels)
4414 .width(3)
4415 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004416 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004417 }
4418 }
4419 }
4420
4421 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
4422 TEST_REQUIRES_X86_AVX2;
4423 for (size_t channels = 1; channels <= 40; channels += 7) {
4424 DWConvMicrokernelTester()
4425 .cr(8)
4426 .kr(9)
4427 .channels(8)
4428 .width(5)
4429 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004431 }
4432 }
4433
4434 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
4435 TEST_REQUIRES_X86_AVX2;
4436 for (size_t channels = 1; channels <= 40; channels += 7) {
4437 DWConvMicrokernelTester()
4438 .cr(8)
4439 .kr(9)
4440 .channels(channels)
4441 .width(3)
4442 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004443 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004444 }
4445 }
4446
4447 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
4448 TEST_REQUIRES_X86_AVX2;
4449 for (size_t channels = 1; channels <= 40; channels += 7) {
4450 DWConvMicrokernelTester()
4451 .cr(8)
4452 .kr(9)
4453 .channels(channels)
4454 .width(3)
4455 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004456 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004457 }
4458 }
4459
4460 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_zero_point_only) {
4461 TEST_REQUIRES_X86_AVX2;
4462 for (size_t channels = 1; channels <= 40; channels += 7) {
4463 DWConvMicrokernelTester()
4464 .cr(8)
4465 .kr(9)
4466 .channels(channels)
4467 .width(3)
4468 .input_zero_point(255)
4469 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004470 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004471 }
4472 }
4473
4474 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, kernel_zero_point_only) {
4475 TEST_REQUIRES_X86_AVX2;
4476 for (size_t channels = 1; channels <= 40; channels += 7) {
4477 DWConvMicrokernelTester()
4478 .cr(8)
4479 .kr(9)
4480 .channels(channels)
4481 .width(3)
4482 .input_zero_point(0)
4483 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08004484 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004485 }
4486 }
4487
4488 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_offset) {
4489 TEST_REQUIRES_X86_AVX2;
4490 for (uint32_t channels = 16; channels < 128; channels += 24) {
4491 DWConvMicrokernelTester()
4492 .cr(8)
4493 .kr(9)
4494 .channels(channels)
4495 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08004496 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004497 }
4498 }
4499
4500 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, zero) {
4501 TEST_REQUIRES_X86_AVX2;
4502 for (uint32_t mz = 0; mz < 9; mz++) {
4503 for (uint32_t channels = 16; channels < 128; channels += 24) {
4504 DWConvMicrokernelTester()
4505 .cr(8)
4506 .kr(9)
4507 .channels(channels)
4508 .input_offset(176)
4509 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004510 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004511 }
4512 }
4513 }
4514#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4515
4516
4517#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4518 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_eq_16) {
4519 TEST_REQUIRES_X86_AVX2;
4520 DWConvMicrokernelTester()
4521 .cr(16)
4522 .kr(9)
4523 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004524 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004525 }
4526
4527 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16) {
4528 TEST_REQUIRES_X86_AVX2;
4529 for (uint32_t channels = 32; channels < 256; channels += 48) {
4530 DWConvMicrokernelTester()
4531 .cr(16)
4532 .kr(9)
4533 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004534 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004535 }
4536 }
4537
4538 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
4539 TEST_REQUIRES_X86_AVX2;
4540 for (uint32_t channels = 32; channels < 256; channels += 48) {
4541 DWConvMicrokernelTester()
4542 .cr(16)
4543 .kr(9)
4544 .channels(channels)
4545 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004546 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004547 }
4548 }
4549
4550 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
4551 TEST_REQUIRES_X86_AVX2;
4552 for (uint32_t channels = 32; channels < 256; channels += 48) {
4553 DWConvMicrokernelTester()
4554 .cr(16)
4555 .kr(9)
4556 .channels(channels)
4557 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004558 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004559 }
4560 }
4561
4562 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_lt_16) {
4563 TEST_REQUIRES_X86_AVX2;
4564 for (uint32_t channels = 1; channels < 16; channels++) {
4565 DWConvMicrokernelTester()
4566 .cr(16)
4567 .kr(9)
4568 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004569 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004570 }
4571 }
4572
4573 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16) {
4574 TEST_REQUIRES_X86_AVX2;
4575 for (uint32_t channels = 17; channels < 32; channels++) {
4576 DWConvMicrokernelTester()
4577 .cr(16)
4578 .kr(9)
4579 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004580 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004581 }
4582 }
4583
4584 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
4585 TEST_REQUIRES_X86_AVX2;
4586 for (uint32_t channels = 17; channels < 32; channels++) {
4587 DWConvMicrokernelTester()
4588 .cr(16)
4589 .kr(9)
4590 .channels(channels)
4591 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004592 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004593 }
4594 }
4595
4596 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
4597 TEST_REQUIRES_X86_AVX2;
4598 for (uint32_t channels = 17; channels < 32; channels++) {
4599 DWConvMicrokernelTester()
4600 .cr(16)
4601 .kr(9)
4602 .channels(channels)
4603 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004604 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004605 }
4606 }
4607
4608 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel) {
4609 TEST_REQUIRES_X86_AVX2;
4610 for (size_t channels = 1; channels <= 80; channels += 15) {
4611 DWConvMicrokernelTester()
4612 .cr(16)
4613 .kr(9)
4614 .channels(channels)
4615 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004616 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004617 }
4618 }
4619
4620 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_step) {
4621 TEST_REQUIRES_X86_AVX2;
4622 for (size_t channels = 1; channels <= 80; channels += 15) {
4623 for (size_t step = 2; step <= 9; step++) {
4624 DWConvMicrokernelTester()
4625 .cr(16)
4626 .kr(9)
4627 .channels(channels)
4628 .width(3)
4629 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004630 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004631 }
4632 }
4633 }
4634
4635 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
4636 TEST_REQUIRES_X86_AVX2;
4637 for (size_t channels = 1; channels <= 80; channels += 15) {
4638 DWConvMicrokernelTester()
4639 .cr(16)
4640 .kr(9)
4641 .channels(16)
4642 .width(5)
4643 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004644 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004645 }
4646 }
4647
4648 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
4649 TEST_REQUIRES_X86_AVX2;
4650 for (size_t channels = 1; channels <= 80; channels += 15) {
4651 DWConvMicrokernelTester()
4652 .cr(16)
4653 .kr(9)
4654 .channels(channels)
4655 .width(3)
4656 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004657 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004658 }
4659 }
4660
4661 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
4662 TEST_REQUIRES_X86_AVX2;
4663 for (size_t channels = 1; channels <= 80; channels += 15) {
4664 DWConvMicrokernelTester()
4665 .cr(16)
4666 .kr(9)
4667 .channels(channels)
4668 .width(3)
4669 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004670 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004671 }
4672 }
4673
4674 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_zero_point_only) {
4675 TEST_REQUIRES_X86_AVX2;
4676 for (size_t channels = 1; channels <= 80; channels += 15) {
4677 DWConvMicrokernelTester()
4678 .cr(16)
4679 .kr(9)
4680 .channels(channels)
4681 .width(3)
4682 .input_zero_point(255)
4683 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004684 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004685 }
4686 }
4687
4688 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, kernel_zero_point_only) {
4689 TEST_REQUIRES_X86_AVX2;
4690 for (size_t channels = 1; channels <= 80; channels += 15) {
4691 DWConvMicrokernelTester()
4692 .cr(16)
4693 .kr(9)
4694 .channels(channels)
4695 .width(3)
4696 .input_zero_point(0)
4697 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08004698 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004699 }
4700 }
4701
4702 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_offset) {
4703 TEST_REQUIRES_X86_AVX2;
4704 for (uint32_t channels = 32; channels < 256; channels += 48) {
4705 DWConvMicrokernelTester()
4706 .cr(16)
4707 .kr(9)
4708 .channels(channels)
4709 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08004710 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004711 }
4712 }
4713
4714 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, zero) {
4715 TEST_REQUIRES_X86_AVX2;
4716 for (uint32_t mz = 0; mz < 9; mz++) {
4717 for (uint32_t channels = 32; channels < 256; channels += 48) {
4718 DWConvMicrokernelTester()
4719 .cr(16)
4720 .kr(9)
4721 .channels(channels)
4722 .input_offset(304)
4723 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004724 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004725 }
4726 }
4727 }
4728#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4729
4730
4731#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4732 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_eq_32) {
4733 TEST_REQUIRES_X86_AVX2;
4734 DWConvMicrokernelTester()
4735 .cr(32)
4736 .kr(9)
4737 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08004738 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004739 }
4740
4741 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32) {
4742 TEST_REQUIRES_X86_AVX2;
4743 for (uint32_t channels = 64; channels < 512; channels += 96) {
4744 DWConvMicrokernelTester()
4745 .cr(32)
4746 .kr(9)
4747 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004748 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004749 }
4750 }
4751
4752 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
4753 TEST_REQUIRES_X86_AVX2;
4754 for (uint32_t channels = 64; channels < 512; channels += 96) {
4755 DWConvMicrokernelTester()
4756 .cr(32)
4757 .kr(9)
4758 .channels(channels)
4759 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004760 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004761 }
4762 }
4763
4764 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
4765 TEST_REQUIRES_X86_AVX2;
4766 for (uint32_t channels = 64; channels < 512; channels += 96) {
4767 DWConvMicrokernelTester()
4768 .cr(32)
4769 .kr(9)
4770 .channels(channels)
4771 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004772 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004773 }
4774 }
4775
4776 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_lt_32) {
4777 TEST_REQUIRES_X86_AVX2;
4778 for (uint32_t channels = 1; channels < 32; channels++) {
4779 DWConvMicrokernelTester()
4780 .cr(32)
4781 .kr(9)
4782 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004783 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004784 }
4785 }
4786
4787 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32) {
4788 TEST_REQUIRES_X86_AVX2;
4789 for (uint32_t channels = 33; channels < 64; channels++) {
4790 DWConvMicrokernelTester()
4791 .cr(32)
4792 .kr(9)
4793 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004794 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004795 }
4796 }
4797
4798 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
4799 TEST_REQUIRES_X86_AVX2;
4800 for (uint32_t channels = 33; channels < 64; channels++) {
4801 DWConvMicrokernelTester()
4802 .cr(32)
4803 .kr(9)
4804 .channels(channels)
4805 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004806 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004807 }
4808 }
4809
4810 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
4811 TEST_REQUIRES_X86_AVX2;
4812 for (uint32_t channels = 33; channels < 64; channels++) {
4813 DWConvMicrokernelTester()
4814 .cr(32)
4815 .kr(9)
4816 .channels(channels)
4817 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004818 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004819 }
4820 }
4821
4822 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel) {
4823 TEST_REQUIRES_X86_AVX2;
4824 for (size_t channels = 1; channels <= 160; channels += 31) {
4825 DWConvMicrokernelTester()
4826 .cr(32)
4827 .kr(9)
4828 .channels(channels)
4829 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004830 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004831 }
4832 }
4833
4834 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_step) {
4835 TEST_REQUIRES_X86_AVX2;
4836 for (size_t channels = 1; channels <= 160; channels += 31) {
4837 for (size_t step = 2; step <= 9; step++) {
4838 DWConvMicrokernelTester()
4839 .cr(32)
4840 .kr(9)
4841 .channels(channels)
4842 .width(3)
4843 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004844 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004845 }
4846 }
4847 }
4848
4849 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
4850 TEST_REQUIRES_X86_AVX2;
4851 for (size_t channels = 1; channels <= 160; channels += 31) {
4852 DWConvMicrokernelTester()
4853 .cr(32)
4854 .kr(9)
4855 .channels(32)
4856 .width(5)
4857 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004858 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004859 }
4860 }
4861
4862 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
4863 TEST_REQUIRES_X86_AVX2;
4864 for (size_t channels = 1; channels <= 160; channels += 31) {
4865 DWConvMicrokernelTester()
4866 .cr(32)
4867 .kr(9)
4868 .channels(channels)
4869 .width(3)
4870 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004871 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004872 }
4873 }
4874
4875 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
4876 TEST_REQUIRES_X86_AVX2;
4877 for (size_t channels = 1; channels <= 160; channels += 31) {
4878 DWConvMicrokernelTester()
4879 .cr(32)
4880 .kr(9)
4881 .channels(channels)
4882 .width(3)
4883 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004884 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004885 }
4886 }
4887
4888 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_zero_point_only) {
4889 TEST_REQUIRES_X86_AVX2;
4890 for (size_t channels = 1; channels <= 160; channels += 31) {
4891 DWConvMicrokernelTester()
4892 .cr(32)
4893 .kr(9)
4894 .channels(channels)
4895 .width(3)
4896 .input_zero_point(255)
4897 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004898 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004899 }
4900 }
4901
4902 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, kernel_zero_point_only) {
4903 TEST_REQUIRES_X86_AVX2;
4904 for (size_t channels = 1; channels <= 160; channels += 31) {
4905 DWConvMicrokernelTester()
4906 .cr(32)
4907 .kr(9)
4908 .channels(channels)
4909 .width(3)
4910 .input_zero_point(0)
4911 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08004912 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004913 }
4914 }
4915
4916 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_offset) {
4917 TEST_REQUIRES_X86_AVX2;
4918 for (uint32_t channels = 64; channels < 512; channels += 96) {
4919 DWConvMicrokernelTester()
4920 .cr(32)
4921 .kr(9)
4922 .channels(channels)
4923 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08004924 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004925 }
4926 }
4927
4928 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, zero) {
4929 TEST_REQUIRES_X86_AVX2;
4930 for (uint32_t mz = 0; mz < 9; mz++) {
4931 for (uint32_t channels = 64; channels < 512; channels += 96) {
4932 DWConvMicrokernelTester()
4933 .cr(32)
4934 .kr(9)
4935 .channels(channels)
4936 .input_offset(592)
4937 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004938 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -07004939 }
4940 }
4941 }
4942#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4943
4944
4945#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancfd606b2021-07-09 01:18:45 -07004946 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_eq_16) {
4947 TEST_REQUIRES_X86_AVX512SKX;
4948 DWConvMicrokernelTester()
4949 .cr(16)
4950 .kr(9)
4951 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004952 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07004953 }
4954
4955 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16) {
4956 TEST_REQUIRES_X86_AVX512SKX;
4957 for (uint32_t channels = 32; channels < 256; channels += 48) {
4958 DWConvMicrokernelTester()
4959 .cr(16)
4960 .kr(9)
4961 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004962 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07004963 }
4964 }
4965
4966 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
4967 TEST_REQUIRES_X86_AVX512SKX;
4968 for (uint32_t channels = 32; channels < 256; channels += 48) {
4969 DWConvMicrokernelTester()
4970 .cr(16)
4971 .kr(9)
4972 .channels(channels)
4973 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004974 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07004975 }
4976 }
4977
4978 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
4979 TEST_REQUIRES_X86_AVX512SKX;
4980 for (uint32_t channels = 32; channels < 256; channels += 48) {
4981 DWConvMicrokernelTester()
4982 .cr(16)
4983 .kr(9)
4984 .channels(channels)
4985 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004986 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07004987 }
4988 }
4989
4990 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_lt_16) {
4991 TEST_REQUIRES_X86_AVX512SKX;
4992 for (uint32_t channels = 1; channels < 16; channels++) {
4993 DWConvMicrokernelTester()
4994 .cr(16)
4995 .kr(9)
4996 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004997 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07004998 }
4999 }
5000
5001 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16) {
5002 TEST_REQUIRES_X86_AVX512SKX;
5003 for (uint32_t channels = 17; channels < 32; channels++) {
5004 DWConvMicrokernelTester()
5005 .cr(16)
5006 .kr(9)
5007 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005008 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005009 }
5010 }
5011
5012 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
5013 TEST_REQUIRES_X86_AVX512SKX;
5014 for (uint32_t channels = 17; channels < 32; channels++) {
5015 DWConvMicrokernelTester()
5016 .cr(16)
5017 .kr(9)
5018 .channels(channels)
5019 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005020 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005021 }
5022 }
5023
5024 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
5025 TEST_REQUIRES_X86_AVX512SKX;
5026 for (uint32_t channels = 17; channels < 32; channels++) {
5027 DWConvMicrokernelTester()
5028 .cr(16)
5029 .kr(9)
5030 .channels(channels)
5031 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005032 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005033 }
5034 }
5035
5036 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel) {
5037 TEST_REQUIRES_X86_AVX512SKX;
5038 for (size_t channels = 1; channels <= 80; channels += 15) {
5039 DWConvMicrokernelTester()
5040 .cr(16)
5041 .kr(9)
5042 .channels(channels)
5043 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005044 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005045 }
5046 }
5047
5048 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
5049 TEST_REQUIRES_X86_AVX512SKX;
5050 for (size_t channels = 1; channels <= 80; channels += 15) {
5051 for (size_t step = 2; step <= 9; step++) {
5052 DWConvMicrokernelTester()
5053 .cr(16)
5054 .kr(9)
5055 .channels(channels)
5056 .width(3)
5057 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005058 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005059 }
5060 }
5061 }
5062
5063 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
5064 TEST_REQUIRES_X86_AVX512SKX;
5065 for (size_t channels = 1; channels <= 80; channels += 15) {
5066 DWConvMicrokernelTester()
5067 .cr(16)
5068 .kr(9)
5069 .channels(16)
5070 .width(5)
5071 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005072 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005073 }
5074 }
5075
5076 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
5077 TEST_REQUIRES_X86_AVX512SKX;
5078 for (size_t channels = 1; channels <= 80; channels += 15) {
5079 DWConvMicrokernelTester()
5080 .cr(16)
5081 .kr(9)
5082 .channels(channels)
5083 .width(3)
5084 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005085 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005086 }
5087 }
5088
5089 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
5090 TEST_REQUIRES_X86_AVX512SKX;
5091 for (size_t channels = 1; channels <= 80; channels += 15) {
5092 DWConvMicrokernelTester()
5093 .cr(16)
5094 .kr(9)
5095 .channels(channels)
5096 .width(3)
5097 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005098 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005099 }
5100 }
5101
5102 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_zero_point_only) {
5103 TEST_REQUIRES_X86_AVX512SKX;
5104 for (size_t channels = 1; channels <= 80; channels += 15) {
5105 DWConvMicrokernelTester()
5106 .cr(16)
5107 .kr(9)
5108 .channels(channels)
5109 .width(3)
5110 .input_zero_point(255)
5111 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005112 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005113 }
5114 }
5115
5116 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, kernel_zero_point_only) {
5117 TEST_REQUIRES_X86_AVX512SKX;
5118 for (size_t channels = 1; channels <= 80; channels += 15) {
5119 DWConvMicrokernelTester()
5120 .cr(16)
5121 .kr(9)
5122 .channels(channels)
5123 .width(3)
5124 .input_zero_point(0)
5125 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08005126 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005127 }
5128 }
5129
5130 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_offset) {
5131 TEST_REQUIRES_X86_AVX512SKX;
5132 for (uint32_t channels = 32; channels < 256; channels += 48) {
5133 DWConvMicrokernelTester()
5134 .cr(16)
5135 .kr(9)
5136 .channels(channels)
5137 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08005138 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005139 }
5140 }
5141
5142 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, zero) {
5143 TEST_REQUIRES_X86_AVX512SKX;
5144 for (uint32_t mz = 0; mz < 9; mz++) {
5145 for (uint32_t channels = 32; channels < 256; channels += 48) {
5146 DWConvMicrokernelTester()
5147 .cr(16)
5148 .kr(9)
5149 .channels(channels)
5150 .input_offset(304)
5151 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005152 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005153 }
5154 }
5155 }
5156#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5157
5158
5159#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5160 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_eq_32) {
5161 TEST_REQUIRES_X86_AVX512SKX;
5162 DWConvMicrokernelTester()
5163 .cr(32)
5164 .kr(9)
5165 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08005166 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005167 }
5168
5169 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32) {
5170 TEST_REQUIRES_X86_AVX512SKX;
5171 for (uint32_t channels = 64; channels < 512; channels += 96) {
5172 DWConvMicrokernelTester()
5173 .cr(32)
5174 .kr(9)
5175 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005177 }
5178 }
5179
5180 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
5181 TEST_REQUIRES_X86_AVX512SKX;
5182 for (uint32_t channels = 64; channels < 512; channels += 96) {
5183 DWConvMicrokernelTester()
5184 .cr(32)
5185 .kr(9)
5186 .channels(channels)
5187 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005188 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005189 }
5190 }
5191
5192 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
5193 TEST_REQUIRES_X86_AVX512SKX;
5194 for (uint32_t channels = 64; channels < 512; channels += 96) {
5195 DWConvMicrokernelTester()
5196 .cr(32)
5197 .kr(9)
5198 .channels(channels)
5199 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005200 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005201 }
5202 }
5203
5204 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_lt_32) {
5205 TEST_REQUIRES_X86_AVX512SKX;
5206 for (uint32_t channels = 1; channels < 32; channels++) {
5207 DWConvMicrokernelTester()
5208 .cr(32)
5209 .kr(9)
5210 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005211 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005212 }
5213 }
5214
5215 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32) {
5216 TEST_REQUIRES_X86_AVX512SKX;
5217 for (uint32_t channels = 33; channels < 64; channels++) {
5218 DWConvMicrokernelTester()
5219 .cr(32)
5220 .kr(9)
5221 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005222 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005223 }
5224 }
5225
5226 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
5227 TEST_REQUIRES_X86_AVX512SKX;
5228 for (uint32_t channels = 33; channels < 64; channels++) {
5229 DWConvMicrokernelTester()
5230 .cr(32)
5231 .kr(9)
5232 .channels(channels)
5233 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005234 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005235 }
5236 }
5237
5238 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
5239 TEST_REQUIRES_X86_AVX512SKX;
5240 for (uint32_t channels = 33; channels < 64; channels++) {
5241 DWConvMicrokernelTester()
5242 .cr(32)
5243 .kr(9)
5244 .channels(channels)
5245 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005246 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005247 }
5248 }
5249
5250 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel) {
5251 TEST_REQUIRES_X86_AVX512SKX;
5252 for (size_t channels = 1; channels <= 160; channels += 31) {
5253 DWConvMicrokernelTester()
5254 .cr(32)
5255 .kr(9)
5256 .channels(channels)
5257 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005258 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005259 }
5260 }
5261
5262 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
5263 TEST_REQUIRES_X86_AVX512SKX;
5264 for (size_t channels = 1; channels <= 160; channels += 31) {
5265 for (size_t step = 2; step <= 9; step++) {
5266 DWConvMicrokernelTester()
5267 .cr(32)
5268 .kr(9)
5269 .channels(channels)
5270 .width(3)
5271 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005272 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005273 }
5274 }
5275 }
5276
5277 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
5278 TEST_REQUIRES_X86_AVX512SKX;
5279 for (size_t channels = 1; channels <= 160; channels += 31) {
5280 DWConvMicrokernelTester()
5281 .cr(32)
5282 .kr(9)
5283 .channels(32)
5284 .width(5)
5285 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08005286 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005287 }
5288 }
5289
5290 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
5291 TEST_REQUIRES_X86_AVX512SKX;
5292 for (size_t channels = 1; channels <= 160; channels += 31) {
5293 DWConvMicrokernelTester()
5294 .cr(32)
5295 .kr(9)
5296 .channels(channels)
5297 .width(3)
5298 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005299 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005300 }
5301 }
5302
5303 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
5304 TEST_REQUIRES_X86_AVX512SKX;
5305 for (size_t channels = 1; channels <= 160; channels += 31) {
5306 DWConvMicrokernelTester()
5307 .cr(32)
5308 .kr(9)
5309 .channels(channels)
5310 .width(3)
5311 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005312 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005313 }
5314 }
5315
5316 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_zero_point_only) {
5317 TEST_REQUIRES_X86_AVX512SKX;
5318 for (size_t channels = 1; channels <= 160; channels += 31) {
5319 DWConvMicrokernelTester()
5320 .cr(32)
5321 .kr(9)
5322 .channels(channels)
5323 .width(3)
5324 .input_zero_point(255)
5325 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005326 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005327 }
5328 }
5329
5330 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, kernel_zero_point_only) {
5331 TEST_REQUIRES_X86_AVX512SKX;
5332 for (size_t channels = 1; channels <= 160; channels += 31) {
5333 DWConvMicrokernelTester()
5334 .cr(32)
5335 .kr(9)
5336 .channels(channels)
5337 .width(3)
5338 .input_zero_point(0)
5339 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08005340 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005341 }
5342 }
5343
5344 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_offset) {
5345 TEST_REQUIRES_X86_AVX512SKX;
5346 for (uint32_t channels = 64; channels < 512; channels += 96) {
5347 DWConvMicrokernelTester()
5348 .cr(32)
5349 .kr(9)
5350 .channels(channels)
5351 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08005352 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005353 }
5354 }
5355
5356 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, zero) {
5357 TEST_REQUIRES_X86_AVX512SKX;
5358 for (uint32_t mz = 0; mz < 9; mz++) {
5359 for (uint32_t channels = 64; channels < 512; channels += 96) {
5360 DWConvMicrokernelTester()
5361 .cr(32)
5362 .kr(9)
5363 .channels(channels)
5364 .input_offset(592)
5365 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005366 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -07005367 }
5368 }
5369 }
5370#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5371
5372
Marat Dukhan4c617792021-12-21 15:47:58 -08005373#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -07005374 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_eq_8) {
5375 DWConvMicrokernelTester()
5376 .cr(8)
5377 .kr(9)
5378 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005379 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005380 }
5381
5382 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8) {
5383 for (uint32_t channels = 16; channels < 128; channels += 24) {
5384 DWConvMicrokernelTester()
5385 .cr(8)
5386 .kr(9)
5387 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005388 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005389 }
5390 }
5391
5392 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
5393 for (uint32_t channels = 16; channels < 128; channels += 24) {
5394 DWConvMicrokernelTester()
5395 .cr(8)
5396 .kr(9)
5397 .channels(channels)
5398 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005399 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005400 }
5401 }
5402
5403 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
5404 for (uint32_t channels = 16; channels < 128; channels += 24) {
5405 DWConvMicrokernelTester()
5406 .cr(8)
5407 .kr(9)
5408 .channels(channels)
5409 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005410 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005411 }
5412 }
5413
5414 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_lt_8) {
5415 for (uint32_t channels = 1; channels < 8; channels++) {
5416 DWConvMicrokernelTester()
5417 .cr(8)
5418 .kr(9)
5419 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005420 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005421 }
5422 }
5423
5424 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8) {
5425 for (uint32_t channels = 9; channels < 16; channels++) {
5426 DWConvMicrokernelTester()
5427 .cr(8)
5428 .kr(9)
5429 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005431 }
5432 }
5433
5434 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
5435 for (uint32_t channels = 9; channels < 16; channels++) {
5436 DWConvMicrokernelTester()
5437 .cr(8)
5438 .kr(9)
5439 .channels(channels)
5440 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005441 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005442 }
5443 }
5444
5445 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
5446 for (uint32_t channels = 9; channels < 16; channels++) {
5447 DWConvMicrokernelTester()
5448 .cr(8)
5449 .kr(9)
5450 .channels(channels)
5451 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005452 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005453 }
5454 }
5455
5456 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel) {
5457 for (size_t channels = 1; channels <= 40; channels += 7) {
5458 DWConvMicrokernelTester()
5459 .cr(8)
5460 .kr(9)
5461 .channels(channels)
5462 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005463 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005464 }
5465 }
5466
5467 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
5468 for (size_t channels = 1; channels <= 40; channels += 7) {
5469 for (size_t step = 2; step <= 9; step++) {
5470 DWConvMicrokernelTester()
5471 .cr(8)
5472 .kr(9)
5473 .channels(channels)
5474 .width(3)
5475 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005476 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005477 }
5478 }
5479 }
5480
5481 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
5482 for (size_t channels = 1; channels <= 40; channels += 7) {
5483 DWConvMicrokernelTester()
5484 .cr(8)
5485 .kr(9)
5486 .channels(8)
5487 .width(5)
5488 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005489 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005490 }
5491 }
5492
5493 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
5494 for (size_t channels = 1; channels <= 40; channels += 7) {
5495 DWConvMicrokernelTester()
5496 .cr(8)
5497 .kr(9)
5498 .channels(channels)
5499 .width(3)
5500 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005501 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005502 }
5503 }
5504
5505 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
5506 for (size_t channels = 1; channels <= 40; channels += 7) {
5507 DWConvMicrokernelTester()
5508 .cr(8)
5509 .kr(9)
5510 .channels(channels)
5511 .width(3)
5512 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005513 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005514 }
5515 }
5516
5517 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, input_zero_point_only) {
5518 for (size_t channels = 1; channels <= 40; channels += 7) {
5519 DWConvMicrokernelTester()
5520 .cr(8)
5521 .kr(9)
5522 .channels(channels)
5523 .width(3)
5524 .input_zero_point(255)
5525 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005526 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005527 }
5528 }
5529
5530 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, kernel_zero_point_only) {
5531 for (size_t channels = 1; channels <= 40; channels += 7) {
5532 DWConvMicrokernelTester()
5533 .cr(8)
5534 .kr(9)
5535 .channels(channels)
5536 .width(3)
5537 .input_zero_point(0)
5538 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08005539 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005540 }
5541 }
5542
5543 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, input_offset) {
5544 for (uint32_t channels = 16; channels < 128; channels += 24) {
5545 DWConvMicrokernelTester()
5546 .cr(8)
5547 .kr(9)
5548 .channels(channels)
5549 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08005550 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005551 }
5552 }
5553
5554 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, zero) {
5555 for (uint32_t mz = 0; mz < 9; mz++) {
5556 for (uint32_t channels = 16; channels < 128; channels += 24) {
5557 DWConvMicrokernelTester()
5558 .cr(8)
5559 .kr(9)
5560 .channels(channels)
5561 .input_offset(176)
5562 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005563 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005564 }
5565 }
5566 }
Marat Dukhan4c617792021-12-21 15:47:58 -08005567#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -07005568
5569
Marat Dukhan4c617792021-12-21 15:47:58 -08005570#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -07005571 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_eq_16) {
5572 DWConvMicrokernelTester()
5573 .cr(16)
5574 .kr(9)
5575 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005576 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005577 }
5578
5579 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16) {
5580 for (uint32_t channels = 32; channels < 256; channels += 48) {
5581 DWConvMicrokernelTester()
5582 .cr(16)
5583 .kr(9)
5584 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005585 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005586 }
5587 }
5588
5589 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
5590 for (uint32_t channels = 32; channels < 256; channels += 48) {
5591 DWConvMicrokernelTester()
5592 .cr(16)
5593 .kr(9)
5594 .channels(channels)
5595 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005596 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005597 }
5598 }
5599
5600 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
5601 for (uint32_t channels = 32; channels < 256; channels += 48) {
5602 DWConvMicrokernelTester()
5603 .cr(16)
5604 .kr(9)
5605 .channels(channels)
5606 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005607 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005608 }
5609 }
5610
5611 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_lt_16) {
5612 for (uint32_t channels = 1; channels < 16; channels++) {
5613 DWConvMicrokernelTester()
5614 .cr(16)
5615 .kr(9)
5616 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005617 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005618 }
5619 }
5620
5621 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16) {
5622 for (uint32_t channels = 17; channels < 32; channels++) {
5623 DWConvMicrokernelTester()
5624 .cr(16)
5625 .kr(9)
5626 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005627 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005628 }
5629 }
5630
5631 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
5632 for (uint32_t channels = 17; channels < 32; channels++) {
5633 DWConvMicrokernelTester()
5634 .cr(16)
5635 .kr(9)
5636 .channels(channels)
5637 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005638 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005639 }
5640 }
5641
5642 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
5643 for (uint32_t channels = 17; channels < 32; channels++) {
5644 DWConvMicrokernelTester()
5645 .cr(16)
5646 .kr(9)
5647 .channels(channels)
5648 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005649 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005650 }
5651 }
5652
5653 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel) {
5654 for (size_t channels = 1; channels <= 80; channels += 15) {
5655 DWConvMicrokernelTester()
5656 .cr(16)
5657 .kr(9)
5658 .channels(channels)
5659 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005660 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005661 }
5662 }
5663
5664 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
5665 for (size_t channels = 1; channels <= 80; channels += 15) {
5666 for (size_t step = 2; step <= 9; step++) {
5667 DWConvMicrokernelTester()
5668 .cr(16)
5669 .kr(9)
5670 .channels(channels)
5671 .width(3)
5672 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005673 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005674 }
5675 }
5676 }
5677
5678 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
5679 for (size_t channels = 1; channels <= 80; channels += 15) {
5680 DWConvMicrokernelTester()
5681 .cr(16)
5682 .kr(9)
5683 .channels(16)
5684 .width(5)
5685 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005686 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005687 }
5688 }
5689
5690 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
5691 for (size_t channels = 1; channels <= 80; channels += 15) {
5692 DWConvMicrokernelTester()
5693 .cr(16)
5694 .kr(9)
5695 .channels(channels)
5696 .width(3)
5697 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005698 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005699 }
5700 }
5701
5702 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
5703 for (size_t channels = 1; channels <= 80; channels += 15) {
5704 DWConvMicrokernelTester()
5705 .cr(16)
5706 .kr(9)
5707 .channels(channels)
5708 .width(3)
5709 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005710 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005711 }
5712 }
5713
5714 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, input_zero_point_only) {
5715 for (size_t channels = 1; channels <= 80; channels += 15) {
5716 DWConvMicrokernelTester()
5717 .cr(16)
5718 .kr(9)
5719 .channels(channels)
5720 .width(3)
5721 .input_zero_point(255)
5722 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005723 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005724 }
5725 }
5726
5727 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, kernel_zero_point_only) {
5728 for (size_t channels = 1; channels <= 80; channels += 15) {
5729 DWConvMicrokernelTester()
5730 .cr(16)
5731 .kr(9)
5732 .channels(channels)
5733 .width(3)
5734 .input_zero_point(0)
5735 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08005736 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005737 }
5738 }
5739
5740 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, input_offset) {
5741 for (uint32_t channels = 32; channels < 256; channels += 48) {
5742 DWConvMicrokernelTester()
5743 .cr(16)
5744 .kr(9)
5745 .channels(channels)
5746 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08005747 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005748 }
5749 }
5750
5751 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, zero) {
5752 for (uint32_t mz = 0; mz < 9; mz++) {
5753 for (uint32_t channels = 32; channels < 256; channels += 48) {
5754 DWConvMicrokernelTester()
5755 .cr(16)
5756 .kr(9)
5757 .channels(channels)
5758 .input_offset(304)
5759 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005760 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005761 }
5762 }
5763 }
Marat Dukhan4c617792021-12-21 15:47:58 -08005764#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -07005765
5766
Marat Dukhan4c617792021-12-21 15:47:58 -08005767#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -07005768 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_eq_24) {
5769 DWConvMicrokernelTester()
5770 .cr(24)
5771 .kr(9)
5772 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08005773 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005774 }
5775
5776 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24) {
5777 for (uint32_t channels = 48; channels < 384; channels += 72) {
5778 DWConvMicrokernelTester()
5779 .cr(24)
5780 .kr(9)
5781 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005782 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005783 }
5784 }
5785
5786 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
5787 for (uint32_t channels = 48; channels < 384; channels += 72) {
5788 DWConvMicrokernelTester()
5789 .cr(24)
5790 .kr(9)
5791 .channels(channels)
5792 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005793 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005794 }
5795 }
5796
5797 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
5798 for (uint32_t channels = 48; channels < 384; channels += 72) {
5799 DWConvMicrokernelTester()
5800 .cr(24)
5801 .kr(9)
5802 .channels(channels)
5803 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005804 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005805 }
5806 }
5807
5808 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_lt_24) {
5809 for (uint32_t channels = 1; channels < 24; channels++) {
5810 DWConvMicrokernelTester()
5811 .cr(24)
5812 .kr(9)
5813 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005814 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005815 }
5816 }
5817
5818 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24) {
5819 for (uint32_t channels = 25; channels < 48; channels++) {
5820 DWConvMicrokernelTester()
5821 .cr(24)
5822 .kr(9)
5823 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005824 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005825 }
5826 }
5827
5828 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
5829 for (uint32_t channels = 25; channels < 48; channels++) {
5830 DWConvMicrokernelTester()
5831 .cr(24)
5832 .kr(9)
5833 .channels(channels)
5834 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005835 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005836 }
5837 }
5838
5839 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
5840 for (uint32_t channels = 25; channels < 48; channels++) {
5841 DWConvMicrokernelTester()
5842 .cr(24)
5843 .kr(9)
5844 .channels(channels)
5845 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005846 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005847 }
5848 }
5849
5850 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel) {
5851 for (size_t channels = 1; channels <= 120; channels += 23) {
5852 DWConvMicrokernelTester()
5853 .cr(24)
5854 .kr(9)
5855 .channels(channels)
5856 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005857 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005858 }
5859 }
5860
5861 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
5862 for (size_t channels = 1; channels <= 120; channels += 23) {
5863 for (size_t step = 2; step <= 9; step++) {
5864 DWConvMicrokernelTester()
5865 .cr(24)
5866 .kr(9)
5867 .channels(channels)
5868 .width(3)
5869 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005870 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005871 }
5872 }
5873 }
5874
5875 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
5876 for (size_t channels = 1; channels <= 120; channels += 23) {
5877 DWConvMicrokernelTester()
5878 .cr(24)
5879 .kr(9)
5880 .channels(24)
5881 .width(5)
5882 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08005883 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005884 }
5885 }
5886
5887 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
5888 for (size_t channels = 1; channels <= 120; channels += 23) {
5889 DWConvMicrokernelTester()
5890 .cr(24)
5891 .kr(9)
5892 .channels(channels)
5893 .width(3)
5894 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005895 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005896 }
5897 }
5898
5899 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
5900 for (size_t channels = 1; channels <= 120; channels += 23) {
5901 DWConvMicrokernelTester()
5902 .cr(24)
5903 .kr(9)
5904 .channels(channels)
5905 .width(3)
5906 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005907 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005908 }
5909 }
5910
5911 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, input_zero_point_only) {
5912 for (size_t channels = 1; channels <= 120; channels += 23) {
5913 DWConvMicrokernelTester()
5914 .cr(24)
5915 .kr(9)
5916 .channels(channels)
5917 .width(3)
5918 .input_zero_point(255)
5919 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005920 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005921 }
5922 }
5923
5924 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, kernel_zero_point_only) {
5925 for (size_t channels = 1; channels <= 120; channels += 23) {
5926 DWConvMicrokernelTester()
5927 .cr(24)
5928 .kr(9)
5929 .channels(channels)
5930 .width(3)
5931 .input_zero_point(0)
5932 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08005933 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005934 }
5935 }
5936
5937 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, input_offset) {
5938 for (uint32_t channels = 48; channels < 384; channels += 72) {
5939 DWConvMicrokernelTester()
5940 .cr(24)
5941 .kr(9)
5942 .channels(channels)
5943 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08005944 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005945 }
5946 }
5947
5948 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, zero) {
5949 for (uint32_t mz = 0; mz < 9; mz++) {
5950 for (uint32_t channels = 48; channels < 384; channels += 72) {
5951 DWConvMicrokernelTester()
5952 .cr(24)
5953 .kr(9)
5954 .channels(channels)
5955 .input_offset(464)
5956 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005957 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -07005958 }
5959 }
5960 }
Marat Dukhan4c617792021-12-21 15:47:58 -08005961#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -07005962
5963
Marat Dukhan7c1115f2022-01-04 17:18:41 -08005964#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5965 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_eq_1) {
5966 DWConvMicrokernelTester()
5967 .cr(1)
5968 .kr(9)
5969 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005970 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08005971 }
5972
5973 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1) {
5974 for (uint32_t channels = 2; channels < 10; channels++) {
5975 DWConvMicrokernelTester()
5976 .cr(1)
5977 .kr(9)
5978 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005979 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08005980 }
5981 }
5982
5983 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmin) {
5984 for (uint32_t channels = 2; channels < 10; channels++) {
5985 DWConvMicrokernelTester()
5986 .cr(1)
5987 .kr(9)
5988 .channels(channels)
5989 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005990 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08005991 }
5992 }
5993
5994 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmax) {
5995 for (uint32_t channels = 2; channels < 10; channels++) {
5996 DWConvMicrokernelTester()
5997 .cr(1)
5998 .kr(9)
5999 .channels(channels)
6000 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006001 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006002 }
6003 }
6004
6005 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel) {
6006 for (size_t channels = 1; channels <= 5; channels += 1) {
6007 DWConvMicrokernelTester()
6008 .cr(1)
6009 .kr(9)
6010 .channels(channels)
6011 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006012 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006013 }
6014 }
6015
6016 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_step) {
6017 for (size_t channels = 1; channels <= 5; channels += 1) {
6018 for (size_t step = 2; step <= 9; step++) {
6019 DWConvMicrokernelTester()
6020 .cr(1)
6021 .kr(9)
6022 .channels(channels)
6023 .width(3)
6024 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006025 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006026 }
6027 }
6028 }
6029
6030 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_output_stride) {
6031 for (size_t channels = 1; channels <= 5; channels += 1) {
6032 DWConvMicrokernelTester()
6033 .cr(1)
6034 .kr(9)
6035 .channels(1)
6036 .width(5)
6037 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006038 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006039 }
6040 }
6041
6042 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmin) {
6043 for (size_t channels = 1; channels <= 5; channels += 1) {
6044 DWConvMicrokernelTester()
6045 .cr(1)
6046 .kr(9)
6047 .channels(channels)
6048 .width(3)
6049 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006050 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006051 }
6052 }
6053
6054 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmax) {
6055 for (size_t channels = 1; channels <= 5; channels += 1) {
6056 DWConvMicrokernelTester()
6057 .cr(1)
6058 .kr(9)
6059 .channels(channels)
6060 .width(3)
6061 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006062 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006063 }
6064 }
6065
6066 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, input_zero_point_only) {
6067 for (size_t channels = 1; channels <= 5; channels += 1) {
6068 DWConvMicrokernelTester()
6069 .cr(1)
6070 .kr(9)
6071 .channels(channels)
6072 .width(3)
6073 .input_zero_point(255)
6074 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006075 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006076 }
6077 }
6078
6079 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, kernel_zero_point_only) {
6080 for (size_t channels = 1; channels <= 5; channels += 1) {
6081 DWConvMicrokernelTester()
6082 .cr(1)
6083 .kr(9)
6084 .channels(channels)
6085 .width(3)
6086 .input_zero_point(0)
6087 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08006088 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006089 }
6090 }
6091
6092 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, input_offset) {
6093 for (uint32_t channels = 2; channels < 16; channels += 3) {
6094 DWConvMicrokernelTester()
6095 .cr(1)
6096 .kr(9)
6097 .channels(channels)
6098 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -08006099 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006100 }
6101 }
6102
6103 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, zero) {
6104 for (uint32_t mz = 0; mz < 9; mz++) {
6105 for (uint32_t channels = 2; channels < 16; channels += 3) {
6106 DWConvMicrokernelTester()
6107 .cr(1)
6108 .kr(9)
6109 .channels(channels)
6110 .input_offset(48)
6111 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006112 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006113 }
6114 }
6115 }
6116#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6117
6118
6119#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6120 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_eq_2) {
6121 DWConvMicrokernelTester()
6122 .cr(2)
6123 .kr(9)
6124 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -08006125 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006126 }
6127
6128 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2) {
6129 for (uint32_t channels = 4; channels < 32; channels += 6) {
6130 DWConvMicrokernelTester()
6131 .cr(2)
6132 .kr(9)
6133 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006134 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006135 }
6136 }
6137
6138 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmin) {
6139 for (uint32_t channels = 4; channels < 32; channels += 6) {
6140 DWConvMicrokernelTester()
6141 .cr(2)
6142 .kr(9)
6143 .channels(channels)
6144 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006145 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006146 }
6147 }
6148
6149 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmax) {
6150 for (uint32_t channels = 4; channels < 32; channels += 6) {
6151 DWConvMicrokernelTester()
6152 .cr(2)
6153 .kr(9)
6154 .channels(channels)
6155 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006156 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006157 }
6158 }
6159
6160 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_lt_2) {
6161 for (uint32_t channels = 1; channels < 2; channels++) {
6162 DWConvMicrokernelTester()
6163 .cr(2)
6164 .kr(9)
6165 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006166 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006167 }
6168 }
6169
6170 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2) {
6171 for (uint32_t channels = 3; channels < 4; channels++) {
6172 DWConvMicrokernelTester()
6173 .cr(2)
6174 .kr(9)
6175 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006177 }
6178 }
6179
6180 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmin) {
6181 for (uint32_t channels = 3; channels < 4; channels++) {
6182 DWConvMicrokernelTester()
6183 .cr(2)
6184 .kr(9)
6185 .channels(channels)
6186 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006187 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006188 }
6189 }
6190
6191 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmax) {
6192 for (uint32_t channels = 3; channels < 4; channels++) {
6193 DWConvMicrokernelTester()
6194 .cr(2)
6195 .kr(9)
6196 .channels(channels)
6197 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006198 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006199 }
6200 }
6201
6202 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel) {
6203 for (size_t channels = 1; channels <= 10; channels += 1) {
6204 DWConvMicrokernelTester()
6205 .cr(2)
6206 .kr(9)
6207 .channels(channels)
6208 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006209 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006210 }
6211 }
6212
6213 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_step) {
6214 for (size_t channels = 1; channels <= 10; channels += 1) {
6215 for (size_t step = 2; step <= 9; step++) {
6216 DWConvMicrokernelTester()
6217 .cr(2)
6218 .kr(9)
6219 .channels(channels)
6220 .width(3)
6221 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006222 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006223 }
6224 }
6225 }
6226
6227 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_output_stride) {
6228 for (size_t channels = 1; channels <= 10; channels += 1) {
6229 DWConvMicrokernelTester()
6230 .cr(2)
6231 .kr(9)
6232 .channels(2)
6233 .width(5)
6234 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -08006235 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006236 }
6237 }
6238
6239 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmin) {
6240 for (size_t channels = 1; channels <= 10; channels += 1) {
6241 DWConvMicrokernelTester()
6242 .cr(2)
6243 .kr(9)
6244 .channels(channels)
6245 .width(3)
6246 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006247 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006248 }
6249 }
6250
6251 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmax) {
6252 for (size_t channels = 1; channels <= 10; channels += 1) {
6253 DWConvMicrokernelTester()
6254 .cr(2)
6255 .kr(9)
6256 .channels(channels)
6257 .width(3)
6258 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006259 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006260 }
6261 }
6262
6263 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, input_zero_point_only) {
6264 for (size_t channels = 1; channels <= 10; channels += 1) {
6265 DWConvMicrokernelTester()
6266 .cr(2)
6267 .kr(9)
6268 .channels(channels)
6269 .width(3)
6270 .input_zero_point(255)
6271 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006272 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006273 }
6274 }
6275
6276 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, kernel_zero_point_only) {
6277 for (size_t channels = 1; channels <= 10; channels += 1) {
6278 DWConvMicrokernelTester()
6279 .cr(2)
6280 .kr(9)
6281 .channels(channels)
6282 .width(3)
6283 .input_zero_point(0)
6284 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08006285 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006286 }
6287 }
6288
6289 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, input_offset) {
6290 for (uint32_t channels = 4; channels < 32; channels += 6) {
6291 DWConvMicrokernelTester()
6292 .cr(2)
6293 .kr(9)
6294 .channels(channels)
6295 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -08006296 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006297 }
6298 }
6299
6300 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, zero) {
6301 for (uint32_t mz = 0; mz < 9; mz++) {
6302 for (uint32_t channels = 4; channels < 32; channels += 6) {
6303 DWConvMicrokernelTester()
6304 .cr(2)
6305 .kr(9)
6306 .channels(channels)
6307 .input_offset(80)
6308 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006309 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006310 }
6311 }
6312 }
6313#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6314
6315
6316#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6317 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_eq_4) {
6318 DWConvMicrokernelTester()
6319 .cr(4)
6320 .kr(9)
6321 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -08006322 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006323 }
6324
6325 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4) {
6326 for (uint32_t channels = 8; channels < 64; channels += 12) {
6327 DWConvMicrokernelTester()
6328 .cr(4)
6329 .kr(9)
6330 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006331 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006332 }
6333 }
6334
6335 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmin) {
6336 for (uint32_t channels = 8; channels < 64; channels += 12) {
6337 DWConvMicrokernelTester()
6338 .cr(4)
6339 .kr(9)
6340 .channels(channels)
6341 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006342 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006343 }
6344 }
6345
6346 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmax) {
6347 for (uint32_t channels = 8; channels < 64; channels += 12) {
6348 DWConvMicrokernelTester()
6349 .cr(4)
6350 .kr(9)
6351 .channels(channels)
6352 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006353 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006354 }
6355 }
6356
6357 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_lt_4) {
6358 for (uint32_t channels = 1; channels < 4; channels++) {
6359 DWConvMicrokernelTester()
6360 .cr(4)
6361 .kr(9)
6362 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006363 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006364 }
6365 }
6366
6367 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4) {
6368 for (uint32_t channels = 5; channels < 8; channels++) {
6369 DWConvMicrokernelTester()
6370 .cr(4)
6371 .kr(9)
6372 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006373 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006374 }
6375 }
6376
6377 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmin) {
6378 for (uint32_t channels = 5; channels < 8; channels++) {
6379 DWConvMicrokernelTester()
6380 .cr(4)
6381 .kr(9)
6382 .channels(channels)
6383 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006384 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006385 }
6386 }
6387
6388 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmax) {
6389 for (uint32_t channels = 5; channels < 8; channels++) {
6390 DWConvMicrokernelTester()
6391 .cr(4)
6392 .kr(9)
6393 .channels(channels)
6394 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006395 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006396 }
6397 }
6398
6399 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel) {
6400 for (size_t channels = 1; channels <= 20; channels += 3) {
6401 DWConvMicrokernelTester()
6402 .cr(4)
6403 .kr(9)
6404 .channels(channels)
6405 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006406 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006407 }
6408 }
6409
6410 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_step) {
6411 for (size_t channels = 1; channels <= 20; channels += 3) {
6412 for (size_t step = 2; step <= 9; step++) {
6413 DWConvMicrokernelTester()
6414 .cr(4)
6415 .kr(9)
6416 .channels(channels)
6417 .width(3)
6418 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006419 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006420 }
6421 }
6422 }
6423
6424 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_output_stride) {
6425 for (size_t channels = 1; channels <= 20; channels += 3) {
6426 DWConvMicrokernelTester()
6427 .cr(4)
6428 .kr(9)
6429 .channels(4)
6430 .width(5)
6431 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -08006432 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006433 }
6434 }
6435
6436 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmin) {
6437 for (size_t channels = 1; channels <= 20; channels += 3) {
6438 DWConvMicrokernelTester()
6439 .cr(4)
6440 .kr(9)
6441 .channels(channels)
6442 .width(3)
6443 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006444 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006445 }
6446 }
6447
6448 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmax) {
6449 for (size_t channels = 1; channels <= 20; channels += 3) {
6450 DWConvMicrokernelTester()
6451 .cr(4)
6452 .kr(9)
6453 .channels(channels)
6454 .width(3)
6455 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006456 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006457 }
6458 }
6459
6460 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, input_zero_point_only) {
6461 for (size_t channels = 1; channels <= 20; channels += 3) {
6462 DWConvMicrokernelTester()
6463 .cr(4)
6464 .kr(9)
6465 .channels(channels)
6466 .width(3)
6467 .input_zero_point(255)
6468 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006469 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006470 }
6471 }
6472
6473 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, kernel_zero_point_only) {
6474 for (size_t channels = 1; channels <= 20; channels += 3) {
6475 DWConvMicrokernelTester()
6476 .cr(4)
6477 .kr(9)
6478 .channels(channels)
6479 .width(3)
6480 .input_zero_point(0)
6481 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08006482 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006483 }
6484 }
6485
6486 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, input_offset) {
6487 for (uint32_t channels = 8; channels < 64; channels += 12) {
6488 DWConvMicrokernelTester()
6489 .cr(4)
6490 .kr(9)
6491 .channels(channels)
6492 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -08006493 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006494 }
6495 }
6496
6497 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, zero) {
6498 for (uint32_t mz = 0; mz < 9; mz++) {
6499 for (uint32_t channels = 8; channels < 64; channels += 12) {
6500 DWConvMicrokernelTester()
6501 .cr(4)
6502 .kr(9)
6503 .channels(channels)
6504 .input_offset(112)
6505 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006506 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -08006507 }
6508 }
6509 }
6510#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6511
6512
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006513TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006514 DWConvMicrokernelTester()
6515 .cr(1)
6516 .kr(9)
6517 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006518 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006519}
6520
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006521TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006522 for (uint32_t channels = 2; channels < 10; channels++) {
6523 DWConvMicrokernelTester()
6524 .cr(1)
6525 .kr(9)
6526 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006527 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006528 }
6529}
6530
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006531TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006532 for (uint32_t channels = 2; channels < 10; channels++) {
6533 DWConvMicrokernelTester()
6534 .cr(1)
6535 .kr(9)
6536 .channels(channels)
6537 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006538 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006539 }
6540}
6541
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006542TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006543 for (uint32_t channels = 2; channels < 10; channels++) {
6544 DWConvMicrokernelTester()
6545 .cr(1)
6546 .kr(9)
6547 .channels(channels)
6548 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006549 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006550 }
6551}
6552
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006553TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006554 for (size_t channels = 1; channels <= 5; channels += 1) {
6555 DWConvMicrokernelTester()
6556 .cr(1)
6557 .kr(9)
6558 .channels(channels)
6559 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006560 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006561 }
6562}
6563
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006564TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006565 for (size_t channels = 1; channels <= 5; channels += 1) {
6566 for (size_t step = 2; step <= 9; step++) {
6567 DWConvMicrokernelTester()
6568 .cr(1)
6569 .kr(9)
6570 .channels(channels)
6571 .width(3)
6572 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006573 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006574 }
6575 }
6576}
6577
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006578TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006579 for (size_t channels = 1; channels <= 5; channels += 1) {
6580 DWConvMicrokernelTester()
6581 .cr(1)
6582 .kr(9)
6583 .channels(1)
6584 .width(5)
6585 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006586 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006587 }
6588}
6589
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006590TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006591 for (size_t channels = 1; channels <= 5; channels += 1) {
6592 DWConvMicrokernelTester()
6593 .cr(1)
6594 .kr(9)
6595 .channels(channels)
6596 .width(3)
6597 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006598 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006599 }
6600}
6601
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006602TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006603 for (size_t channels = 1; channels <= 5; channels += 1) {
6604 DWConvMicrokernelTester()
6605 .cr(1)
6606 .kr(9)
6607 .channels(channels)
6608 .width(3)
6609 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006610 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006611 }
6612}
6613
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006614TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, input_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006615 for (size_t channels = 1; channels <= 5; channels += 1) {
6616 DWConvMicrokernelTester()
6617 .cr(1)
6618 .kr(9)
6619 .channels(channels)
6620 .width(3)
6621 .input_zero_point(255)
6622 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006623 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006624 }
6625}
6626
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006627TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, kernel_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006628 for (size_t channels = 1; channels <= 5; channels += 1) {
6629 DWConvMicrokernelTester()
6630 .cr(1)
6631 .kr(9)
6632 .channels(channels)
6633 .width(3)
6634 .input_zero_point(0)
6635 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08006636 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006637 }
6638}
6639
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006640TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, input_offset) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006641 for (uint32_t channels = 2; channels < 16; channels += 3) {
6642 DWConvMicrokernelTester()
6643 .cr(1)
6644 .kr(9)
6645 .channels(channels)
6646 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -08006647 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006648 }
6649}
6650
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006651TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, zero) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006652 for (uint32_t mz = 0; mz < 9; mz++) {
6653 for (uint32_t channels = 2; channels < 16; channels += 3) {
6654 DWConvMicrokernelTester()
6655 .cr(1)
6656 .kr(9)
6657 .channels(channels)
6658 .input_offset(48)
6659 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006660 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006661 }
6662 }
6663}
6664
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006665TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_eq_2) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006666 DWConvMicrokernelTester()
6667 .cr(2)
6668 .kr(9)
6669 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -08006670 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006671}
6672
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006673TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006674 for (uint32_t channels = 4; channels < 32; channels += 6) {
6675 DWConvMicrokernelTester()
6676 .cr(2)
6677 .kr(9)
6678 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006679 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006680 }
6681}
6682
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006683TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006684 for (uint32_t channels = 4; channels < 32; channels += 6) {
6685 DWConvMicrokernelTester()
6686 .cr(2)
6687 .kr(9)
6688 .channels(channels)
6689 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006690 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006691 }
6692}
6693
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006694TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006695 for (uint32_t channels = 4; channels < 32; channels += 6) {
6696 DWConvMicrokernelTester()
6697 .cr(2)
6698 .kr(9)
6699 .channels(channels)
6700 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006701 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006702 }
6703}
6704
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006705TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_lt_2) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006706 for (uint32_t channels = 1; channels < 2; channels++) {
6707 DWConvMicrokernelTester()
6708 .cr(2)
6709 .kr(9)
6710 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006711 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006712 }
6713}
6714
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006715TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006716 for (uint32_t channels = 3; channels < 4; channels++) {
6717 DWConvMicrokernelTester()
6718 .cr(2)
6719 .kr(9)
6720 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006721 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006722 }
6723}
6724
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006725TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006726 for (uint32_t channels = 3; channels < 4; channels++) {
6727 DWConvMicrokernelTester()
6728 .cr(2)
6729 .kr(9)
6730 .channels(channels)
6731 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006732 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006733 }
6734}
6735
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006736TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006737 for (uint32_t channels = 3; channels < 4; channels++) {
6738 DWConvMicrokernelTester()
6739 .cr(2)
6740 .kr(9)
6741 .channels(channels)
6742 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006743 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006744 }
6745}
6746
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006747TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006748 for (size_t channels = 1; channels <= 10; channels += 1) {
6749 DWConvMicrokernelTester()
6750 .cr(2)
6751 .kr(9)
6752 .channels(channels)
6753 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006754 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006755 }
6756}
6757
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006758TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006759 for (size_t channels = 1; channels <= 10; channels += 1) {
6760 for (size_t step = 2; step <= 9; step++) {
6761 DWConvMicrokernelTester()
6762 .cr(2)
6763 .kr(9)
6764 .channels(channels)
6765 .width(3)
6766 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006767 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006768 }
6769 }
6770}
6771
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006772TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006773 for (size_t channels = 1; channels <= 10; channels += 1) {
6774 DWConvMicrokernelTester()
6775 .cr(2)
6776 .kr(9)
6777 .channels(2)
6778 .width(5)
6779 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -08006780 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006781 }
6782}
6783
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006784TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006785 for (size_t channels = 1; channels <= 10; channels += 1) {
6786 DWConvMicrokernelTester()
6787 .cr(2)
6788 .kr(9)
6789 .channels(channels)
6790 .width(3)
6791 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006792 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006793 }
6794}
6795
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006796TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006797 for (size_t channels = 1; channels <= 10; channels += 1) {
6798 DWConvMicrokernelTester()
6799 .cr(2)
6800 .kr(9)
6801 .channels(channels)
6802 .width(3)
6803 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006804 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006805 }
6806}
6807
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006808TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, input_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006809 for (size_t channels = 1; channels <= 10; channels += 1) {
6810 DWConvMicrokernelTester()
6811 .cr(2)
6812 .kr(9)
6813 .channels(channels)
6814 .width(3)
6815 .input_zero_point(255)
6816 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006817 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006818 }
6819}
6820
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006821TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, kernel_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006822 for (size_t channels = 1; channels <= 10; channels += 1) {
6823 DWConvMicrokernelTester()
6824 .cr(2)
6825 .kr(9)
6826 .channels(channels)
6827 .width(3)
6828 .input_zero_point(0)
6829 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08006830 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006831 }
6832}
6833
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006834TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, input_offset) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006835 for (uint32_t channels = 4; channels < 32; channels += 6) {
6836 DWConvMicrokernelTester()
6837 .cr(2)
6838 .kr(9)
6839 .channels(channels)
6840 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -08006841 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006842 }
6843}
6844
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006845TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, zero) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006846 for (uint32_t mz = 0; mz < 9; mz++) {
6847 for (uint32_t channels = 4; channels < 32; channels += 6) {
6848 DWConvMicrokernelTester()
6849 .cr(2)
6850 .kr(9)
6851 .channels(channels)
6852 .input_offset(80)
6853 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006854 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006855 }
6856 }
6857}
6858
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006859TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_eq_4) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006860 DWConvMicrokernelTester()
6861 .cr(4)
6862 .kr(9)
6863 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -08006864 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006865}
6866
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006867TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006868 for (uint32_t channels = 8; channels < 64; channels += 12) {
6869 DWConvMicrokernelTester()
6870 .cr(4)
6871 .kr(9)
6872 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006873 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006874 }
6875}
6876
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006877TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006878 for (uint32_t channels = 8; channels < 64; channels += 12) {
6879 DWConvMicrokernelTester()
6880 .cr(4)
6881 .kr(9)
6882 .channels(channels)
6883 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006884 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006885 }
6886}
6887
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006888TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006889 for (uint32_t channels = 8; channels < 64; channels += 12) {
6890 DWConvMicrokernelTester()
6891 .cr(4)
6892 .kr(9)
6893 .channels(channels)
6894 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006895 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006896 }
6897}
6898
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006899TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_lt_4) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006900 for (uint32_t channels = 1; channels < 4; channels++) {
6901 DWConvMicrokernelTester()
6902 .cr(4)
6903 .kr(9)
6904 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006905 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006906 }
6907}
6908
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006909TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006910 for (uint32_t channels = 5; channels < 8; channels++) {
6911 DWConvMicrokernelTester()
6912 .cr(4)
6913 .kr(9)
6914 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006915 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006916 }
6917}
6918
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006919TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006920 for (uint32_t channels = 5; channels < 8; channels++) {
6921 DWConvMicrokernelTester()
6922 .cr(4)
6923 .kr(9)
6924 .channels(channels)
6925 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006926 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006927 }
6928}
6929
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006930TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006931 for (uint32_t channels = 5; channels < 8; channels++) {
6932 DWConvMicrokernelTester()
6933 .cr(4)
6934 .kr(9)
6935 .channels(channels)
6936 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006937 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006938 }
6939}
6940
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006941TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006942 for (size_t channels = 1; channels <= 20; channels += 3) {
6943 DWConvMicrokernelTester()
6944 .cr(4)
6945 .kr(9)
6946 .channels(channels)
6947 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006948 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006949 }
6950}
6951
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006952TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006953 for (size_t channels = 1; channels <= 20; channels += 3) {
6954 for (size_t step = 2; step <= 9; step++) {
6955 DWConvMicrokernelTester()
6956 .cr(4)
6957 .kr(9)
6958 .channels(channels)
6959 .width(3)
6960 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006961 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006962 }
6963 }
6964}
6965
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006966TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006967 for (size_t channels = 1; channels <= 20; channels += 3) {
6968 DWConvMicrokernelTester()
6969 .cr(4)
6970 .kr(9)
6971 .channels(4)
6972 .width(5)
6973 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -08006974 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006975 }
6976}
6977
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006978TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006979 for (size_t channels = 1; channels <= 20; channels += 3) {
6980 DWConvMicrokernelTester()
6981 .cr(4)
6982 .kr(9)
6983 .channels(channels)
6984 .width(3)
6985 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006986 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006987 }
6988}
6989
Marat Dukhan2ac722e2022-01-04 01:54:20 -08006990TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -07006991 for (size_t channels = 1; channels <= 20; channels += 3) {
6992 DWConvMicrokernelTester()
6993 .cr(4)
6994 .kr(9)
6995 .channels(channels)
6996 .width(3)
6997 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006998 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07006999 }
7000}
7001
Marat Dukhan2ac722e2022-01-04 01:54:20 -08007002TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, input_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -07007003 for (size_t channels = 1; channels <= 20; channels += 3) {
7004 DWConvMicrokernelTester()
7005 .cr(4)
7006 .kr(9)
7007 .channels(channels)
7008 .width(3)
7009 .input_zero_point(255)
7010 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007011 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07007012 }
7013}
7014
Marat Dukhan2ac722e2022-01-04 01:54:20 -08007015TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, kernel_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -07007016 for (size_t channels = 1; channels <= 20; channels += 3) {
7017 DWConvMicrokernelTester()
7018 .cr(4)
7019 .kr(9)
7020 .channels(channels)
7021 .width(3)
7022 .input_zero_point(0)
7023 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08007024 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07007025 }
7026}
7027
Marat Dukhan2ac722e2022-01-04 01:54:20 -08007028TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, input_offset) {
Marat Dukhan1f714282021-07-15 15:41:32 -07007029 for (uint32_t channels = 8; channels < 64; channels += 12) {
7030 DWConvMicrokernelTester()
7031 .cr(4)
7032 .kr(9)
7033 .channels(channels)
7034 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -08007035 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07007036 }
7037}
7038
Marat Dukhan2ac722e2022-01-04 01:54:20 -08007039TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, zero) {
Marat Dukhan1f714282021-07-15 15:41:32 -07007040 for (uint32_t mz = 0; mz < 9; mz++) {
7041 for (uint32_t channels = 8; channels < 64; channels += 12) {
7042 DWConvMicrokernelTester()
7043 .cr(4)
7044 .kr(9)
7045 .channels(channels)
7046 .input_offset(112)
7047 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007048 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -07007049 }
7050 }
7051}
7052
Marat Dukhan272d4d92022-01-04 15:07:14 -08007053TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
7054 DWConvMicrokernelTester()
7055 .cr(1)
7056 .kr(9)
7057 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007058 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007059}
7060
7061TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
7062 for (uint32_t channels = 2; channels < 10; channels++) {
7063 DWConvMicrokernelTester()
7064 .cr(1)
7065 .kr(9)
7066 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007067 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007068 }
7069}
7070
7071TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
7072 for (uint32_t channels = 2; channels < 10; channels++) {
7073 DWConvMicrokernelTester()
7074 .cr(1)
7075 .kr(9)
7076 .channels(channels)
7077 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007078 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007079 }
7080}
7081
7082TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
7083 for (uint32_t channels = 2; channels < 10; channels++) {
7084 DWConvMicrokernelTester()
7085 .cr(1)
7086 .kr(9)
7087 .channels(channels)
7088 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007089 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007090 }
7091}
7092
7093TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
7094 for (size_t channels = 1; channels <= 5; channels += 1) {
7095 DWConvMicrokernelTester()
7096 .cr(1)
7097 .kr(9)
7098 .channels(channels)
7099 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007100 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007101 }
7102}
7103
7104TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
7105 for (size_t channels = 1; channels <= 5; channels += 1) {
7106 for (size_t step = 2; step <= 9; step++) {
7107 DWConvMicrokernelTester()
7108 .cr(1)
7109 .kr(9)
7110 .channels(channels)
7111 .width(3)
7112 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007113 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007114 }
7115 }
7116}
7117
7118TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
7119 for (size_t channels = 1; channels <= 5; channels += 1) {
7120 DWConvMicrokernelTester()
7121 .cr(1)
7122 .kr(9)
7123 .channels(1)
7124 .width(5)
7125 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007126 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007127 }
7128}
7129
7130TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
7131 for (size_t channels = 1; channels <= 5; channels += 1) {
7132 DWConvMicrokernelTester()
7133 .cr(1)
7134 .kr(9)
7135 .channels(channels)
7136 .width(3)
7137 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007138 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007139 }
7140}
7141
7142TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
7143 for (size_t channels = 1; channels <= 5; channels += 1) {
7144 DWConvMicrokernelTester()
7145 .cr(1)
7146 .kr(9)
7147 .channels(channels)
7148 .width(3)
7149 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007150 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007151 }
7152}
7153
7154TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_zero_point_only) {
7155 for (size_t channels = 1; channels <= 5; channels += 1) {
7156 DWConvMicrokernelTester()
7157 .cr(1)
7158 .kr(9)
7159 .channels(channels)
7160 .width(3)
7161 .input_zero_point(255)
7162 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007163 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007164 }
7165}
7166
7167TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, kernel_zero_point_only) {
7168 for (size_t channels = 1; channels <= 5; channels += 1) {
7169 DWConvMicrokernelTester()
7170 .cr(1)
7171 .kr(9)
7172 .channels(channels)
7173 .width(3)
7174 .input_zero_point(0)
7175 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08007176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007177 }
7178}
7179
7180TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
7181 for (uint32_t channels = 2; channels < 16; channels += 3) {
7182 DWConvMicrokernelTester()
7183 .cr(1)
7184 .kr(9)
7185 .channels(channels)
7186 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -08007187 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007188 }
7189}
7190
7191TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
7192 for (uint32_t mz = 0; mz < 9; mz++) {
7193 for (uint32_t channels = 2; channels < 16; channels += 3) {
7194 DWConvMicrokernelTester()
7195 .cr(1)
7196 .kr(9)
7197 .channels(channels)
7198 .input_offset(48)
7199 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007200 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007201 }
7202 }
7203}
7204
7205TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
7206 DWConvMicrokernelTester()
7207 .cr(2)
7208 .kr(9)
7209 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -08007210 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007211}
7212
7213TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
7214 for (uint32_t channels = 4; channels < 32; channels += 6) {
7215 DWConvMicrokernelTester()
7216 .cr(2)
7217 .kr(9)
7218 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007219 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007220 }
7221}
7222
7223TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
7224 for (uint32_t channels = 4; channels < 32; channels += 6) {
7225 DWConvMicrokernelTester()
7226 .cr(2)
7227 .kr(9)
7228 .channels(channels)
7229 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007230 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007231 }
7232}
7233
7234TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
7235 for (uint32_t channels = 4; channels < 32; channels += 6) {
7236 DWConvMicrokernelTester()
7237 .cr(2)
7238 .kr(9)
7239 .channels(channels)
7240 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007241 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007242 }
7243}
7244
7245TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
7246 for (uint32_t channels = 1; channels < 2; channels++) {
7247 DWConvMicrokernelTester()
7248 .cr(2)
7249 .kr(9)
7250 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007251 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007252 }
7253}
7254
7255TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
7256 for (uint32_t channels = 3; channels < 4; channels++) {
7257 DWConvMicrokernelTester()
7258 .cr(2)
7259 .kr(9)
7260 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007261 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007262 }
7263}
7264
7265TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
7266 for (uint32_t channels = 3; channels < 4; channels++) {
7267 DWConvMicrokernelTester()
7268 .cr(2)
7269 .kr(9)
7270 .channels(channels)
7271 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007272 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007273 }
7274}
7275
7276TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
7277 for (uint32_t channels = 3; channels < 4; channels++) {
7278 DWConvMicrokernelTester()
7279 .cr(2)
7280 .kr(9)
7281 .channels(channels)
7282 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007283 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007284 }
7285}
7286
7287TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
7288 for (size_t channels = 1; channels <= 10; channels += 1) {
7289 DWConvMicrokernelTester()
7290 .cr(2)
7291 .kr(9)
7292 .channels(channels)
7293 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007294 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007295 }
7296}
7297
7298TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
7299 for (size_t channels = 1; channels <= 10; channels += 1) {
7300 for (size_t step = 2; step <= 9; step++) {
7301 DWConvMicrokernelTester()
7302 .cr(2)
7303 .kr(9)
7304 .channels(channels)
7305 .width(3)
7306 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007307 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007308 }
7309 }
7310}
7311
7312TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
7313 for (size_t channels = 1; channels <= 10; channels += 1) {
7314 DWConvMicrokernelTester()
7315 .cr(2)
7316 .kr(9)
7317 .channels(2)
7318 .width(5)
7319 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -08007320 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007321 }
7322}
7323
7324TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
7325 for (size_t channels = 1; channels <= 10; channels += 1) {
7326 DWConvMicrokernelTester()
7327 .cr(2)
7328 .kr(9)
7329 .channels(channels)
7330 .width(3)
7331 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007332 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007333 }
7334}
7335
7336TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
7337 for (size_t channels = 1; channels <= 10; channels += 1) {
7338 DWConvMicrokernelTester()
7339 .cr(2)
7340 .kr(9)
7341 .channels(channels)
7342 .width(3)
7343 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007344 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007345 }
7346}
7347
7348TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_zero_point_only) {
7349 for (size_t channels = 1; channels <= 10; channels += 1) {
7350 DWConvMicrokernelTester()
7351 .cr(2)
7352 .kr(9)
7353 .channels(channels)
7354 .width(3)
7355 .input_zero_point(255)
7356 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007357 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007358 }
7359}
7360
7361TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, kernel_zero_point_only) {
7362 for (size_t channels = 1; channels <= 10; channels += 1) {
7363 DWConvMicrokernelTester()
7364 .cr(2)
7365 .kr(9)
7366 .channels(channels)
7367 .width(3)
7368 .input_zero_point(0)
7369 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08007370 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007371 }
7372}
7373
7374TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
7375 for (uint32_t channels = 4; channels < 32; channels += 6) {
7376 DWConvMicrokernelTester()
7377 .cr(2)
7378 .kr(9)
7379 .channels(channels)
7380 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -08007381 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007382 }
7383}
7384
7385TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
7386 for (uint32_t mz = 0; mz < 9; mz++) {
7387 for (uint32_t channels = 4; channels < 32; channels += 6) {
7388 DWConvMicrokernelTester()
7389 .cr(2)
7390 .kr(9)
7391 .channels(channels)
7392 .input_offset(80)
7393 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007394 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007395 }
7396 }
7397}
7398
7399TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
7400 DWConvMicrokernelTester()
7401 .cr(4)
7402 .kr(9)
7403 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -08007404 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007405}
7406
7407TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
7408 for (uint32_t channels = 8; channels < 64; channels += 12) {
7409 DWConvMicrokernelTester()
7410 .cr(4)
7411 .kr(9)
7412 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007413 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007414 }
7415}
7416
7417TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
7418 for (uint32_t channels = 8; channels < 64; channels += 12) {
7419 DWConvMicrokernelTester()
7420 .cr(4)
7421 .kr(9)
7422 .channels(channels)
7423 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007424 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007425 }
7426}
7427
7428TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
7429 for (uint32_t channels = 8; channels < 64; channels += 12) {
7430 DWConvMicrokernelTester()
7431 .cr(4)
7432 .kr(9)
7433 .channels(channels)
7434 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007435 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007436 }
7437}
7438
7439TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
7440 for (uint32_t channels = 1; channels < 4; channels++) {
7441 DWConvMicrokernelTester()
7442 .cr(4)
7443 .kr(9)
7444 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007445 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007446 }
7447}
7448
7449TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
7450 for (uint32_t channels = 5; channels < 8; channels++) {
7451 DWConvMicrokernelTester()
7452 .cr(4)
7453 .kr(9)
7454 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007455 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007456 }
7457}
7458
7459TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
7460 for (uint32_t channels = 5; channels < 8; channels++) {
7461 DWConvMicrokernelTester()
7462 .cr(4)
7463 .kr(9)
7464 .channels(channels)
7465 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007466 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007467 }
7468}
7469
7470TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
7471 for (uint32_t channels = 5; channels < 8; channels++) {
7472 DWConvMicrokernelTester()
7473 .cr(4)
7474 .kr(9)
7475 .channels(channels)
7476 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007477 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007478 }
7479}
7480
7481TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
7482 for (size_t channels = 1; channels <= 20; channels += 3) {
7483 DWConvMicrokernelTester()
7484 .cr(4)
7485 .kr(9)
7486 .channels(channels)
7487 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007488 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007489 }
7490}
7491
7492TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
7493 for (size_t channels = 1; channels <= 20; channels += 3) {
7494 for (size_t step = 2; step <= 9; step++) {
7495 DWConvMicrokernelTester()
7496 .cr(4)
7497 .kr(9)
7498 .channels(channels)
7499 .width(3)
7500 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007501 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007502 }
7503 }
7504}
7505
7506TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
7507 for (size_t channels = 1; channels <= 20; channels += 3) {
7508 DWConvMicrokernelTester()
7509 .cr(4)
7510 .kr(9)
7511 .channels(4)
7512 .width(5)
7513 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -08007514 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007515 }
7516}
7517
7518TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
7519 for (size_t channels = 1; channels <= 20; channels += 3) {
7520 DWConvMicrokernelTester()
7521 .cr(4)
7522 .kr(9)
7523 .channels(channels)
7524 .width(3)
7525 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007526 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007527 }
7528}
7529
7530TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
7531 for (size_t channels = 1; channels <= 20; channels += 3) {
7532 DWConvMicrokernelTester()
7533 .cr(4)
7534 .kr(9)
7535 .channels(channels)
7536 .width(3)
7537 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007538 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007539 }
7540}
7541
7542TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_zero_point_only) {
7543 for (size_t channels = 1; channels <= 20; channels += 3) {
7544 DWConvMicrokernelTester()
7545 .cr(4)
7546 .kr(9)
7547 .channels(channels)
7548 .width(3)
7549 .input_zero_point(255)
7550 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007551 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007552 }
7553}
7554
7555TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, kernel_zero_point_only) {
7556 for (size_t channels = 1; channels <= 20; channels += 3) {
7557 DWConvMicrokernelTester()
7558 .cr(4)
7559 .kr(9)
7560 .channels(channels)
7561 .width(3)
7562 .input_zero_point(0)
7563 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08007564 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007565 }
7566}
7567
7568TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
7569 for (uint32_t channels = 8; channels < 64; channels += 12) {
7570 DWConvMicrokernelTester()
7571 .cr(4)
7572 .kr(9)
7573 .channels(channels)
7574 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -08007575 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007576 }
7577}
7578
7579TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
7580 for (uint32_t mz = 0; mz < 9; mz++) {
7581 for (uint32_t channels = 8; channels < 64; channels += 12) {
7582 DWConvMicrokernelTester()
7583 .cr(4)
7584 .kr(9)
7585 .channels(channels)
7586 .input_offset(112)
7587 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007588 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007589 }
7590 }
7591}
7592
7593TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
7594 DWConvMicrokernelTester()
7595 .cr(1)
7596 .kr(9)
7597 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007598 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007599}
7600
7601TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
7602 for (uint32_t channels = 2; channels < 10; channels++) {
7603 DWConvMicrokernelTester()
7604 .cr(1)
7605 .kr(9)
7606 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007607 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007608 }
7609}
7610
7611TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
7612 for (uint32_t channels = 2; channels < 10; channels++) {
7613 DWConvMicrokernelTester()
7614 .cr(1)
7615 .kr(9)
7616 .channels(channels)
7617 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007618 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007619 }
7620}
7621
7622TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
7623 for (uint32_t channels = 2; channels < 10; channels++) {
7624 DWConvMicrokernelTester()
7625 .cr(1)
7626 .kr(9)
7627 .channels(channels)
7628 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007629 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007630 }
7631}
7632
7633TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
7634 for (size_t channels = 1; channels <= 5; channels += 1) {
7635 DWConvMicrokernelTester()
7636 .cr(1)
7637 .kr(9)
7638 .channels(channels)
7639 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007640 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007641 }
7642}
7643
7644TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
7645 for (size_t channels = 1; channels <= 5; channels += 1) {
7646 for (size_t step = 2; step <= 9; step++) {
7647 DWConvMicrokernelTester()
7648 .cr(1)
7649 .kr(9)
7650 .channels(channels)
7651 .width(3)
7652 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007653 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007654 }
7655 }
7656}
7657
7658TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
7659 for (size_t channels = 1; channels <= 5; channels += 1) {
7660 DWConvMicrokernelTester()
7661 .cr(1)
7662 .kr(9)
7663 .channels(1)
7664 .width(5)
7665 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007666 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007667 }
7668}
7669
7670TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
7671 for (size_t channels = 1; channels <= 5; channels += 1) {
7672 DWConvMicrokernelTester()
7673 .cr(1)
7674 .kr(9)
7675 .channels(channels)
7676 .width(3)
7677 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007678 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007679 }
7680}
7681
7682TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
7683 for (size_t channels = 1; channels <= 5; channels += 1) {
7684 DWConvMicrokernelTester()
7685 .cr(1)
7686 .kr(9)
7687 .channels(channels)
7688 .width(3)
7689 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007690 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007691 }
7692}
7693
7694TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_zero_point_only) {
7695 for (size_t channels = 1; channels <= 5; channels += 1) {
7696 DWConvMicrokernelTester()
7697 .cr(1)
7698 .kr(9)
7699 .channels(channels)
7700 .width(3)
7701 .input_zero_point(255)
7702 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007703 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007704 }
7705}
7706
7707TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, kernel_zero_point_only) {
7708 for (size_t channels = 1; channels <= 5; channels += 1) {
7709 DWConvMicrokernelTester()
7710 .cr(1)
7711 .kr(9)
7712 .channels(channels)
7713 .width(3)
7714 .input_zero_point(0)
7715 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08007716 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007717 }
7718}
7719
7720TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
7721 for (uint32_t channels = 2; channels < 16; channels += 3) {
7722 DWConvMicrokernelTester()
7723 .cr(1)
7724 .kr(9)
7725 .channels(channels)
7726 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -08007727 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007728 }
7729}
7730
7731TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
7732 for (uint32_t mz = 0; mz < 9; mz++) {
7733 for (uint32_t channels = 2; channels < 16; channels += 3) {
7734 DWConvMicrokernelTester()
7735 .cr(1)
7736 .kr(9)
7737 .channels(channels)
7738 .input_offset(48)
7739 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007740 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007741 }
7742 }
7743}
7744
7745TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
7746 DWConvMicrokernelTester()
7747 .cr(2)
7748 .kr(9)
7749 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -08007750 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007751}
7752
7753TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
7754 for (uint32_t channels = 4; channels < 32; channels += 6) {
7755 DWConvMicrokernelTester()
7756 .cr(2)
7757 .kr(9)
7758 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007759 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007760 }
7761}
7762
7763TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
7764 for (uint32_t channels = 4; channels < 32; channels += 6) {
7765 DWConvMicrokernelTester()
7766 .cr(2)
7767 .kr(9)
7768 .channels(channels)
7769 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007770 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007771 }
7772}
7773
7774TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
7775 for (uint32_t channels = 4; channels < 32; channels += 6) {
7776 DWConvMicrokernelTester()
7777 .cr(2)
7778 .kr(9)
7779 .channels(channels)
7780 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007781 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007782 }
7783}
7784
7785TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
7786 for (uint32_t channels = 1; channels < 2; channels++) {
7787 DWConvMicrokernelTester()
7788 .cr(2)
7789 .kr(9)
7790 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007791 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007792 }
7793}
7794
7795TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
7796 for (uint32_t channels = 3; channels < 4; channels++) {
7797 DWConvMicrokernelTester()
7798 .cr(2)
7799 .kr(9)
7800 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007801 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007802 }
7803}
7804
7805TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
7806 for (uint32_t channels = 3; channels < 4; channels++) {
7807 DWConvMicrokernelTester()
7808 .cr(2)
7809 .kr(9)
7810 .channels(channels)
7811 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007812 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007813 }
7814}
7815
7816TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
7817 for (uint32_t channels = 3; channels < 4; channels++) {
7818 DWConvMicrokernelTester()
7819 .cr(2)
7820 .kr(9)
7821 .channels(channels)
7822 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007823 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007824 }
7825}
7826
7827TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
7828 for (size_t channels = 1; channels <= 10; channels += 1) {
7829 DWConvMicrokernelTester()
7830 .cr(2)
7831 .kr(9)
7832 .channels(channels)
7833 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007834 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007835 }
7836}
7837
7838TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
7839 for (size_t channels = 1; channels <= 10; channels += 1) {
7840 for (size_t step = 2; step <= 9; step++) {
7841 DWConvMicrokernelTester()
7842 .cr(2)
7843 .kr(9)
7844 .channels(channels)
7845 .width(3)
7846 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007847 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007848 }
7849 }
7850}
7851
7852TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
7853 for (size_t channels = 1; channels <= 10; channels += 1) {
7854 DWConvMicrokernelTester()
7855 .cr(2)
7856 .kr(9)
7857 .channels(2)
7858 .width(5)
7859 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -08007860 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007861 }
7862}
7863
7864TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
7865 for (size_t channels = 1; channels <= 10; channels += 1) {
7866 DWConvMicrokernelTester()
7867 .cr(2)
7868 .kr(9)
7869 .channels(channels)
7870 .width(3)
7871 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007872 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007873 }
7874}
7875
7876TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
7877 for (size_t channels = 1; channels <= 10; channels += 1) {
7878 DWConvMicrokernelTester()
7879 .cr(2)
7880 .kr(9)
7881 .channels(channels)
7882 .width(3)
7883 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007884 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007885 }
7886}
7887
7888TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_zero_point_only) {
7889 for (size_t channels = 1; channels <= 10; channels += 1) {
7890 DWConvMicrokernelTester()
7891 .cr(2)
7892 .kr(9)
7893 .channels(channels)
7894 .width(3)
7895 .input_zero_point(255)
7896 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007897 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007898 }
7899}
7900
7901TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, kernel_zero_point_only) {
7902 for (size_t channels = 1; channels <= 10; channels += 1) {
7903 DWConvMicrokernelTester()
7904 .cr(2)
7905 .kr(9)
7906 .channels(channels)
7907 .width(3)
7908 .input_zero_point(0)
7909 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08007910 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007911 }
7912}
7913
7914TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
7915 for (uint32_t channels = 4; channels < 32; channels += 6) {
7916 DWConvMicrokernelTester()
7917 .cr(2)
7918 .kr(9)
7919 .channels(channels)
7920 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -08007921 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007922 }
7923}
7924
7925TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
7926 for (uint32_t mz = 0; mz < 9; mz++) {
7927 for (uint32_t channels = 4; channels < 32; channels += 6) {
7928 DWConvMicrokernelTester()
7929 .cr(2)
7930 .kr(9)
7931 .channels(channels)
7932 .input_offset(80)
7933 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007934 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007935 }
7936 }
7937}
7938
7939TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
7940 DWConvMicrokernelTester()
7941 .cr(4)
7942 .kr(9)
7943 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -08007944 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007945}
7946
7947TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
7948 for (uint32_t channels = 8; channels < 64; channels += 12) {
7949 DWConvMicrokernelTester()
7950 .cr(4)
7951 .kr(9)
7952 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007953 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007954 }
7955}
7956
7957TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
7958 for (uint32_t channels = 8; channels < 64; channels += 12) {
7959 DWConvMicrokernelTester()
7960 .cr(4)
7961 .kr(9)
7962 .channels(channels)
7963 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007964 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007965 }
7966}
7967
7968TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
7969 for (uint32_t channels = 8; channels < 64; channels += 12) {
7970 DWConvMicrokernelTester()
7971 .cr(4)
7972 .kr(9)
7973 .channels(channels)
7974 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007975 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007976 }
7977}
7978
7979TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
7980 for (uint32_t channels = 1; channels < 4; channels++) {
7981 DWConvMicrokernelTester()
7982 .cr(4)
7983 .kr(9)
7984 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007985 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007986 }
7987}
7988
7989TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
7990 for (uint32_t channels = 5; channels < 8; channels++) {
7991 DWConvMicrokernelTester()
7992 .cr(4)
7993 .kr(9)
7994 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007995 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08007996 }
7997}
7998
7999TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
8000 for (uint32_t channels = 5; channels < 8; channels++) {
8001 DWConvMicrokernelTester()
8002 .cr(4)
8003 .kr(9)
8004 .channels(channels)
8005 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008006 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008007 }
8008}
8009
8010TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
8011 for (uint32_t channels = 5; channels < 8; channels++) {
8012 DWConvMicrokernelTester()
8013 .cr(4)
8014 .kr(9)
8015 .channels(channels)
8016 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008017 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008018 }
8019}
8020
8021TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
8022 for (size_t channels = 1; channels <= 20; channels += 3) {
8023 DWConvMicrokernelTester()
8024 .cr(4)
8025 .kr(9)
8026 .channels(channels)
8027 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008028 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008029 }
8030}
8031
8032TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
8033 for (size_t channels = 1; channels <= 20; channels += 3) {
8034 for (size_t step = 2; step <= 9; step++) {
8035 DWConvMicrokernelTester()
8036 .cr(4)
8037 .kr(9)
8038 .channels(channels)
8039 .width(3)
8040 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008041 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008042 }
8043 }
8044}
8045
8046TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
8047 for (size_t channels = 1; channels <= 20; channels += 3) {
8048 DWConvMicrokernelTester()
8049 .cr(4)
8050 .kr(9)
8051 .channels(4)
8052 .width(5)
8053 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -08008054 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008055 }
8056}
8057
8058TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
8059 for (size_t channels = 1; channels <= 20; channels += 3) {
8060 DWConvMicrokernelTester()
8061 .cr(4)
8062 .kr(9)
8063 .channels(channels)
8064 .width(3)
8065 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008066 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008067 }
8068}
8069
8070TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
8071 for (size_t channels = 1; channels <= 20; channels += 3) {
8072 DWConvMicrokernelTester()
8073 .cr(4)
8074 .kr(9)
8075 .channels(channels)
8076 .width(3)
8077 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008078 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008079 }
8080}
8081
8082TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_zero_point_only) {
8083 for (size_t channels = 1; channels <= 20; channels += 3) {
8084 DWConvMicrokernelTester()
8085 .cr(4)
8086 .kr(9)
8087 .channels(channels)
8088 .width(3)
8089 .input_zero_point(255)
8090 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008091 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008092 }
8093}
8094
8095TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, kernel_zero_point_only) {
8096 for (size_t channels = 1; channels <= 20; channels += 3) {
8097 DWConvMicrokernelTester()
8098 .cr(4)
8099 .kr(9)
8100 .channels(channels)
8101 .width(3)
8102 .input_zero_point(0)
8103 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08008104 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008105 }
8106}
8107
8108TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
8109 for (uint32_t channels = 8; channels < 64; channels += 12) {
8110 DWConvMicrokernelTester()
8111 .cr(4)
8112 .kr(9)
8113 .channels(channels)
8114 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -08008115 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008116 }
8117}
8118
8119TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
8120 for (uint32_t mz = 0; mz < 9; mz++) {
8121 for (uint32_t channels = 8; channels < 64; channels += 12) {
8122 DWConvMicrokernelTester()
8123 .cr(4)
8124 .kr(9)
8125 .channels(channels)
8126 .input_offset(112)
8127 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008128 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -08008129 }
8130 }
8131}
8132
Marat Dukhan605696a2021-07-15 18:01:30 -07008133#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8134 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_eq_8) {
8135 TEST_REQUIRES_ARM_NEON;
8136 DWConvMicrokernelTester()
8137 .cr(8)
8138 .kr(25)
8139 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008140 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008141 }
8142
8143 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8) {
8144 TEST_REQUIRES_ARM_NEON;
8145 for (uint32_t channels = 16; channels < 128; channels += 24) {
8146 DWConvMicrokernelTester()
8147 .cr(8)
8148 .kr(25)
8149 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008150 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008151 }
8152 }
8153
8154 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
8155 TEST_REQUIRES_ARM_NEON;
8156 for (uint32_t channels = 16; channels < 128; channels += 24) {
8157 DWConvMicrokernelTester()
8158 .cr(8)
8159 .kr(25)
8160 .channels(channels)
8161 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008162 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008163 }
8164 }
8165
8166 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
8167 TEST_REQUIRES_ARM_NEON;
8168 for (uint32_t channels = 16; channels < 128; channels += 24) {
8169 DWConvMicrokernelTester()
8170 .cr(8)
8171 .kr(25)
8172 .channels(channels)
8173 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008174 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008175 }
8176 }
8177
8178 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_lt_8) {
8179 TEST_REQUIRES_ARM_NEON;
8180 for (uint32_t channels = 1; channels < 8; channels++) {
8181 DWConvMicrokernelTester()
8182 .cr(8)
8183 .kr(25)
8184 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008185 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008186 }
8187 }
8188
8189 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8) {
8190 TEST_REQUIRES_ARM_NEON;
8191 for (uint32_t channels = 9; channels < 16; channels++) {
8192 DWConvMicrokernelTester()
8193 .cr(8)
8194 .kr(25)
8195 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008196 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008197 }
8198 }
8199
8200 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
8201 TEST_REQUIRES_ARM_NEON;
8202 for (uint32_t channels = 9; channels < 16; channels++) {
8203 DWConvMicrokernelTester()
8204 .cr(8)
8205 .kr(25)
8206 .channels(channels)
8207 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008208 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008209 }
8210 }
8211
8212 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
8213 TEST_REQUIRES_ARM_NEON;
8214 for (uint32_t channels = 9; channels < 16; channels++) {
8215 DWConvMicrokernelTester()
8216 .cr(8)
8217 .kr(25)
8218 .channels(channels)
8219 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008220 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008221 }
8222 }
8223
8224 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel) {
8225 TEST_REQUIRES_ARM_NEON;
8226 for (size_t channels = 1; channels <= 40; channels += 7) {
8227 DWConvMicrokernelTester()
8228 .cr(8)
8229 .kr(25)
8230 .channels(channels)
8231 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008232 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008233 }
8234 }
8235
8236 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_step) {
8237 TEST_REQUIRES_ARM_NEON;
8238 for (size_t channels = 1; channels <= 40; channels += 7) {
8239 for (size_t step = 2; step <= 25; step++) {
8240 DWConvMicrokernelTester()
8241 .cr(8)
8242 .kr(25)
8243 .channels(channels)
8244 .width(3)
8245 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008246 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008247 }
8248 }
8249 }
8250
8251 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
8252 TEST_REQUIRES_ARM_NEON;
8253 for (size_t channels = 1; channels <= 40; channels += 7) {
8254 DWConvMicrokernelTester()
8255 .cr(8)
8256 .kr(25)
8257 .channels(8)
8258 .width(5)
8259 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008260 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008261 }
8262 }
8263
8264 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmin) {
8265 TEST_REQUIRES_ARM_NEON;
8266 for (size_t channels = 1; channels <= 40; channels += 7) {
8267 DWConvMicrokernelTester()
8268 .cr(8)
8269 .kr(25)
8270 .channels(channels)
8271 .width(3)
8272 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008273 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008274 }
8275 }
8276
8277 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmax) {
8278 TEST_REQUIRES_ARM_NEON;
8279 for (size_t channels = 1; channels <= 40; channels += 7) {
8280 DWConvMicrokernelTester()
8281 .cr(8)
8282 .kr(25)
8283 .channels(channels)
8284 .width(3)
8285 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008286 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008287 }
8288 }
8289
8290 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, input_zero_point_only) {
8291 TEST_REQUIRES_ARM_NEON;
8292 for (size_t channels = 1; channels <= 40; channels += 7) {
8293 DWConvMicrokernelTester()
8294 .cr(8)
8295 .kr(25)
8296 .channels(channels)
8297 .width(3)
8298 .input_zero_point(255)
8299 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008300 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008301 }
8302 }
8303
8304 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, kernel_zero_point_only) {
8305 TEST_REQUIRES_ARM_NEON;
8306 for (size_t channels = 1; channels <= 40; channels += 7) {
8307 DWConvMicrokernelTester()
8308 .cr(8)
8309 .kr(25)
8310 .channels(channels)
8311 .width(3)
8312 .input_zero_point(0)
8313 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08008314 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008315 }
8316 }
8317
8318 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, input_offset) {
8319 TEST_REQUIRES_ARM_NEON;
8320 for (uint32_t channels = 16; channels < 128; channels += 24) {
8321 DWConvMicrokernelTester()
8322 .cr(8)
8323 .kr(25)
8324 .channels(channels)
8325 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08008326 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008327 }
8328 }
8329
8330 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, zero) {
8331 TEST_REQUIRES_ARM_NEON;
8332 for (uint32_t mz = 0; mz < 25; mz++) {
8333 for (uint32_t channels = 16; channels < 128; channels += 24) {
8334 DWConvMicrokernelTester()
8335 .cr(8)
8336 .kr(25)
8337 .channels(channels)
8338 .input_offset(176)
8339 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008340 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008341 }
8342 }
8343 }
8344#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8345
8346
8347#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8348 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_eq_16) {
8349 TEST_REQUIRES_ARM_NEON;
8350 DWConvMicrokernelTester()
8351 .cr(16)
8352 .kr(25)
8353 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008354 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008355 }
8356
8357 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16) {
8358 TEST_REQUIRES_ARM_NEON;
8359 for (uint32_t channels = 32; channels < 256; channels += 48) {
8360 DWConvMicrokernelTester()
8361 .cr(16)
8362 .kr(25)
8363 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008364 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008365 }
8366 }
8367
8368 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
8369 TEST_REQUIRES_ARM_NEON;
8370 for (uint32_t channels = 32; channels < 256; channels += 48) {
8371 DWConvMicrokernelTester()
8372 .cr(16)
8373 .kr(25)
8374 .channels(channels)
8375 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008376 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008377 }
8378 }
8379
8380 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
8381 TEST_REQUIRES_ARM_NEON;
8382 for (uint32_t channels = 32; channels < 256; channels += 48) {
8383 DWConvMicrokernelTester()
8384 .cr(16)
8385 .kr(25)
8386 .channels(channels)
8387 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008388 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008389 }
8390 }
8391
8392 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_lt_16) {
8393 TEST_REQUIRES_ARM_NEON;
8394 for (uint32_t channels = 1; channels < 16; channels++) {
8395 DWConvMicrokernelTester()
8396 .cr(16)
8397 .kr(25)
8398 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008399 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008400 }
8401 }
8402
8403 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16) {
8404 TEST_REQUIRES_ARM_NEON;
8405 for (uint32_t channels = 17; channels < 32; channels++) {
8406 DWConvMicrokernelTester()
8407 .cr(16)
8408 .kr(25)
8409 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008410 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008411 }
8412 }
8413
8414 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
8415 TEST_REQUIRES_ARM_NEON;
8416 for (uint32_t channels = 17; channels < 32; channels++) {
8417 DWConvMicrokernelTester()
8418 .cr(16)
8419 .kr(25)
8420 .channels(channels)
8421 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008422 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008423 }
8424 }
8425
8426 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
8427 TEST_REQUIRES_ARM_NEON;
8428 for (uint32_t channels = 17; channels < 32; channels++) {
8429 DWConvMicrokernelTester()
8430 .cr(16)
8431 .kr(25)
8432 .channels(channels)
8433 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008434 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008435 }
8436 }
8437
8438 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel) {
8439 TEST_REQUIRES_ARM_NEON;
8440 for (size_t channels = 1; channels <= 80; channels += 15) {
8441 DWConvMicrokernelTester()
8442 .cr(16)
8443 .kr(25)
8444 .channels(channels)
8445 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008446 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008447 }
8448 }
8449
8450 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_step) {
8451 TEST_REQUIRES_ARM_NEON;
8452 for (size_t channels = 1; channels <= 80; channels += 15) {
8453 for (size_t step = 2; step <= 25; step++) {
8454 DWConvMicrokernelTester()
8455 .cr(16)
8456 .kr(25)
8457 .channels(channels)
8458 .width(3)
8459 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008460 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008461 }
8462 }
8463 }
8464
8465 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
8466 TEST_REQUIRES_ARM_NEON;
8467 for (size_t channels = 1; channels <= 80; channels += 15) {
8468 DWConvMicrokernelTester()
8469 .cr(16)
8470 .kr(25)
8471 .channels(16)
8472 .width(5)
8473 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008474 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008475 }
8476 }
8477
8478 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmin) {
8479 TEST_REQUIRES_ARM_NEON;
8480 for (size_t channels = 1; channels <= 80; channels += 15) {
8481 DWConvMicrokernelTester()
8482 .cr(16)
8483 .kr(25)
8484 .channels(channels)
8485 .width(3)
8486 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008487 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008488 }
8489 }
8490
8491 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmax) {
8492 TEST_REQUIRES_ARM_NEON;
8493 for (size_t channels = 1; channels <= 80; channels += 15) {
8494 DWConvMicrokernelTester()
8495 .cr(16)
8496 .kr(25)
8497 .channels(channels)
8498 .width(3)
8499 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008500 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008501 }
8502 }
8503
8504 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, input_zero_point_only) {
8505 TEST_REQUIRES_ARM_NEON;
8506 for (size_t channels = 1; channels <= 80; channels += 15) {
8507 DWConvMicrokernelTester()
8508 .cr(16)
8509 .kr(25)
8510 .channels(channels)
8511 .width(3)
8512 .input_zero_point(255)
8513 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008514 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008515 }
8516 }
8517
8518 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, kernel_zero_point_only) {
8519 TEST_REQUIRES_ARM_NEON;
8520 for (size_t channels = 1; channels <= 80; channels += 15) {
8521 DWConvMicrokernelTester()
8522 .cr(16)
8523 .kr(25)
8524 .channels(channels)
8525 .width(3)
8526 .input_zero_point(0)
8527 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08008528 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008529 }
8530 }
8531
8532 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, input_offset) {
8533 TEST_REQUIRES_ARM_NEON;
8534 for (uint32_t channels = 32; channels < 256; channels += 48) {
8535 DWConvMicrokernelTester()
8536 .cr(16)
8537 .kr(25)
8538 .channels(channels)
8539 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08008540 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008541 }
8542 }
8543
8544 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, zero) {
8545 TEST_REQUIRES_ARM_NEON;
8546 for (uint32_t mz = 0; mz < 25; mz++) {
8547 for (uint32_t channels = 32; channels < 256; channels += 48) {
8548 DWConvMicrokernelTester()
8549 .cr(16)
8550 .kr(25)
8551 .channels(channels)
8552 .input_offset(304)
8553 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008554 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008555 }
8556 }
8557 }
8558#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8559
8560
8561#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8562 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_eq_24) {
8563 TEST_REQUIRES_ARM_NEON;
8564 DWConvMicrokernelTester()
8565 .cr(24)
8566 .kr(25)
8567 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08008568 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008569 }
8570
8571 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24) {
8572 TEST_REQUIRES_ARM_NEON;
8573 for (uint32_t channels = 48; channels < 384; channels += 72) {
8574 DWConvMicrokernelTester()
8575 .cr(24)
8576 .kr(25)
8577 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008578 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008579 }
8580 }
8581
8582 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
8583 TEST_REQUIRES_ARM_NEON;
8584 for (uint32_t channels = 48; channels < 384; channels += 72) {
8585 DWConvMicrokernelTester()
8586 .cr(24)
8587 .kr(25)
8588 .channels(channels)
8589 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008590 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008591 }
8592 }
8593
8594 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
8595 TEST_REQUIRES_ARM_NEON;
8596 for (uint32_t channels = 48; channels < 384; channels += 72) {
8597 DWConvMicrokernelTester()
8598 .cr(24)
8599 .kr(25)
8600 .channels(channels)
8601 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008602 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008603 }
8604 }
8605
8606 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_lt_24) {
8607 TEST_REQUIRES_ARM_NEON;
8608 for (uint32_t channels = 1; channels < 24; channels++) {
8609 DWConvMicrokernelTester()
8610 .cr(24)
8611 .kr(25)
8612 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008613 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008614 }
8615 }
8616
8617 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24) {
8618 TEST_REQUIRES_ARM_NEON;
8619 for (uint32_t channels = 25; channels < 48; channels++) {
8620 DWConvMicrokernelTester()
8621 .cr(24)
8622 .kr(25)
8623 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008624 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008625 }
8626 }
8627
8628 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
8629 TEST_REQUIRES_ARM_NEON;
8630 for (uint32_t channels = 25; channels < 48; channels++) {
8631 DWConvMicrokernelTester()
8632 .cr(24)
8633 .kr(25)
8634 .channels(channels)
8635 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008636 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008637 }
8638 }
8639
8640 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
8641 TEST_REQUIRES_ARM_NEON;
8642 for (uint32_t channels = 25; channels < 48; channels++) {
8643 DWConvMicrokernelTester()
8644 .cr(24)
8645 .kr(25)
8646 .channels(channels)
8647 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008648 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008649 }
8650 }
8651
8652 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel) {
8653 TEST_REQUIRES_ARM_NEON;
8654 for (size_t channels = 1; channels <= 120; channels += 23) {
8655 DWConvMicrokernelTester()
8656 .cr(24)
8657 .kr(25)
8658 .channels(channels)
8659 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008660 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008661 }
8662 }
8663
8664 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_step) {
8665 TEST_REQUIRES_ARM_NEON;
8666 for (size_t channels = 1; channels <= 120; channels += 23) {
8667 for (size_t step = 2; step <= 25; step++) {
8668 DWConvMicrokernelTester()
8669 .cr(24)
8670 .kr(25)
8671 .channels(channels)
8672 .width(3)
8673 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008674 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008675 }
8676 }
8677 }
8678
8679 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
8680 TEST_REQUIRES_ARM_NEON;
8681 for (size_t channels = 1; channels <= 120; channels += 23) {
8682 DWConvMicrokernelTester()
8683 .cr(24)
8684 .kr(25)
8685 .channels(24)
8686 .width(5)
8687 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08008688 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008689 }
8690 }
8691
8692 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmin) {
8693 TEST_REQUIRES_ARM_NEON;
8694 for (size_t channels = 1; channels <= 120; channels += 23) {
8695 DWConvMicrokernelTester()
8696 .cr(24)
8697 .kr(25)
8698 .channels(channels)
8699 .width(3)
8700 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008701 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008702 }
8703 }
8704
8705 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmax) {
8706 TEST_REQUIRES_ARM_NEON;
8707 for (size_t channels = 1; channels <= 120; channels += 23) {
8708 DWConvMicrokernelTester()
8709 .cr(24)
8710 .kr(25)
8711 .channels(channels)
8712 .width(3)
8713 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008714 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008715 }
8716 }
8717
8718 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, input_zero_point_only) {
8719 TEST_REQUIRES_ARM_NEON;
8720 for (size_t channels = 1; channels <= 120; channels += 23) {
8721 DWConvMicrokernelTester()
8722 .cr(24)
8723 .kr(25)
8724 .channels(channels)
8725 .width(3)
8726 .input_zero_point(255)
8727 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008728 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008729 }
8730 }
8731
8732 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, kernel_zero_point_only) {
8733 TEST_REQUIRES_ARM_NEON;
8734 for (size_t channels = 1; channels <= 120; channels += 23) {
8735 DWConvMicrokernelTester()
8736 .cr(24)
8737 .kr(25)
8738 .channels(channels)
8739 .width(3)
8740 .input_zero_point(0)
8741 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08008742 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008743 }
8744 }
8745
8746 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, input_offset) {
8747 TEST_REQUIRES_ARM_NEON;
8748 for (uint32_t channels = 48; channels < 384; channels += 72) {
8749 DWConvMicrokernelTester()
8750 .cr(24)
8751 .kr(25)
8752 .channels(channels)
8753 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08008754 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008755 }
8756 }
8757
8758 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, zero) {
8759 TEST_REQUIRES_ARM_NEON;
8760 for (uint32_t mz = 0; mz < 25; mz++) {
8761 for (uint32_t channels = 48; channels < 384; channels += 72) {
8762 DWConvMicrokernelTester()
8763 .cr(24)
8764 .kr(25)
8765 .channels(channels)
8766 .input_offset(464)
8767 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008768 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008769 }
8770 }
8771 }
8772#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8773
8774
8775#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8776 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_eq_32) {
8777 TEST_REQUIRES_ARM_NEON;
8778 DWConvMicrokernelTester()
8779 .cr(32)
8780 .kr(25)
8781 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08008782 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008783 }
8784
8785 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32) {
8786 TEST_REQUIRES_ARM_NEON;
8787 for (uint32_t channels = 64; channels < 512; channels += 96) {
8788 DWConvMicrokernelTester()
8789 .cr(32)
8790 .kr(25)
8791 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008792 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008793 }
8794 }
8795
8796 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
8797 TEST_REQUIRES_ARM_NEON;
8798 for (uint32_t channels = 64; channels < 512; channels += 96) {
8799 DWConvMicrokernelTester()
8800 .cr(32)
8801 .kr(25)
8802 .channels(channels)
8803 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008804 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008805 }
8806 }
8807
8808 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
8809 TEST_REQUIRES_ARM_NEON;
8810 for (uint32_t channels = 64; channels < 512; channels += 96) {
8811 DWConvMicrokernelTester()
8812 .cr(32)
8813 .kr(25)
8814 .channels(channels)
8815 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008816 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008817 }
8818 }
8819
8820 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_lt_32) {
8821 TEST_REQUIRES_ARM_NEON;
8822 for (uint32_t channels = 1; channels < 32; channels++) {
8823 DWConvMicrokernelTester()
8824 .cr(32)
8825 .kr(25)
8826 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008827 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008828 }
8829 }
8830
8831 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32) {
8832 TEST_REQUIRES_ARM_NEON;
8833 for (uint32_t channels = 33; channels < 64; channels++) {
8834 DWConvMicrokernelTester()
8835 .cr(32)
8836 .kr(25)
8837 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008838 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008839 }
8840 }
8841
8842 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
8843 TEST_REQUIRES_ARM_NEON;
8844 for (uint32_t channels = 33; channels < 64; channels++) {
8845 DWConvMicrokernelTester()
8846 .cr(32)
8847 .kr(25)
8848 .channels(channels)
8849 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008850 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008851 }
8852 }
8853
8854 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
8855 TEST_REQUIRES_ARM_NEON;
8856 for (uint32_t channels = 33; channels < 64; channels++) {
8857 DWConvMicrokernelTester()
8858 .cr(32)
8859 .kr(25)
8860 .channels(channels)
8861 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008862 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008863 }
8864 }
8865
8866 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel) {
8867 TEST_REQUIRES_ARM_NEON;
8868 for (size_t channels = 1; channels <= 160; channels += 31) {
8869 DWConvMicrokernelTester()
8870 .cr(32)
8871 .kr(25)
8872 .channels(channels)
8873 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008874 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008875 }
8876 }
8877
8878 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_step) {
8879 TEST_REQUIRES_ARM_NEON;
8880 for (size_t channels = 1; channels <= 160; channels += 31) {
8881 for (size_t step = 2; step <= 25; step++) {
8882 DWConvMicrokernelTester()
8883 .cr(32)
8884 .kr(25)
8885 .channels(channels)
8886 .width(3)
8887 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008888 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008889 }
8890 }
8891 }
8892
8893 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
8894 TEST_REQUIRES_ARM_NEON;
8895 for (size_t channels = 1; channels <= 160; channels += 31) {
8896 DWConvMicrokernelTester()
8897 .cr(32)
8898 .kr(25)
8899 .channels(32)
8900 .width(5)
8901 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08008902 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008903 }
8904 }
8905
8906 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmin) {
8907 TEST_REQUIRES_ARM_NEON;
8908 for (size_t channels = 1; channels <= 160; channels += 31) {
8909 DWConvMicrokernelTester()
8910 .cr(32)
8911 .kr(25)
8912 .channels(channels)
8913 .width(3)
8914 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008915 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008916 }
8917 }
8918
8919 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmax) {
8920 TEST_REQUIRES_ARM_NEON;
8921 for (size_t channels = 1; channels <= 160; channels += 31) {
8922 DWConvMicrokernelTester()
8923 .cr(32)
8924 .kr(25)
8925 .channels(channels)
8926 .width(3)
8927 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008928 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008929 }
8930 }
8931
8932 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, input_zero_point_only) {
8933 TEST_REQUIRES_ARM_NEON;
8934 for (size_t channels = 1; channels <= 160; channels += 31) {
8935 DWConvMicrokernelTester()
8936 .cr(32)
8937 .kr(25)
8938 .channels(channels)
8939 .width(3)
8940 .input_zero_point(255)
8941 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008942 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008943 }
8944 }
8945
8946 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, kernel_zero_point_only) {
8947 TEST_REQUIRES_ARM_NEON;
8948 for (size_t channels = 1; channels <= 160; channels += 31) {
8949 DWConvMicrokernelTester()
8950 .cr(32)
8951 .kr(25)
8952 .channels(channels)
8953 .width(3)
8954 .input_zero_point(0)
8955 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08008956 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008957 }
8958 }
8959
8960 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, input_offset) {
8961 TEST_REQUIRES_ARM_NEON;
8962 for (uint32_t channels = 64; channels < 512; channels += 96) {
8963 DWConvMicrokernelTester()
8964 .cr(32)
8965 .kr(25)
8966 .channels(channels)
8967 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08008968 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008969 }
8970 }
8971
8972 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, zero) {
8973 TEST_REQUIRES_ARM_NEON;
8974 for (uint32_t mz = 0; mz < 25; mz++) {
8975 for (uint32_t channels = 64; channels < 512; channels += 96) {
8976 DWConvMicrokernelTester()
8977 .cr(32)
8978 .kr(25)
8979 .channels(channels)
8980 .input_offset(592)
8981 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008982 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008983 }
8984 }
8985 }
8986#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8987
8988
8989#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8990 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_eq_8) {
8991 TEST_REQUIRES_ARM_NEON_V8;
8992 DWConvMicrokernelTester()
8993 .cr(8)
8994 .kr(25)
8995 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008996 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07008997 }
8998
8999 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8) {
9000 TEST_REQUIRES_ARM_NEON_V8;
9001 for (uint32_t channels = 16; channels < 128; channels += 24) {
9002 DWConvMicrokernelTester()
9003 .cr(8)
9004 .kr(25)
9005 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009006 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009007 }
9008 }
9009
9010 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmin) {
9011 TEST_REQUIRES_ARM_NEON_V8;
9012 for (uint32_t channels = 16; channels < 128; channels += 24) {
9013 DWConvMicrokernelTester()
9014 .cr(8)
9015 .kr(25)
9016 .channels(channels)
9017 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009018 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009019 }
9020 }
9021
9022 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmax) {
9023 TEST_REQUIRES_ARM_NEON_V8;
9024 for (uint32_t channels = 16; channels < 128; channels += 24) {
9025 DWConvMicrokernelTester()
9026 .cr(8)
9027 .kr(25)
9028 .channels(channels)
9029 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009030 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009031 }
9032 }
9033
9034 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_lt_8) {
9035 TEST_REQUIRES_ARM_NEON_V8;
9036 for (uint32_t channels = 1; channels < 8; channels++) {
9037 DWConvMicrokernelTester()
9038 .cr(8)
9039 .kr(25)
9040 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009041 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009042 }
9043 }
9044
9045 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8) {
9046 TEST_REQUIRES_ARM_NEON_V8;
9047 for (uint32_t channels = 9; channels < 16; channels++) {
9048 DWConvMicrokernelTester()
9049 .cr(8)
9050 .kr(25)
9051 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009052 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009053 }
9054 }
9055
9056 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmin) {
9057 TEST_REQUIRES_ARM_NEON_V8;
9058 for (uint32_t channels = 9; channels < 16; channels++) {
9059 DWConvMicrokernelTester()
9060 .cr(8)
9061 .kr(25)
9062 .channels(channels)
9063 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009064 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009065 }
9066 }
9067
9068 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmax) {
9069 TEST_REQUIRES_ARM_NEON_V8;
9070 for (uint32_t channels = 9; channels < 16; channels++) {
9071 DWConvMicrokernelTester()
9072 .cr(8)
9073 .kr(25)
9074 .channels(channels)
9075 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009076 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009077 }
9078 }
9079
9080 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel) {
9081 TEST_REQUIRES_ARM_NEON_V8;
9082 for (size_t channels = 1; channels <= 40; channels += 7) {
9083 DWConvMicrokernelTester()
9084 .cr(8)
9085 .kr(25)
9086 .channels(channels)
9087 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009088 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009089 }
9090 }
9091
9092 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_step) {
9093 TEST_REQUIRES_ARM_NEON_V8;
9094 for (size_t channels = 1; channels <= 40; channels += 7) {
9095 for (size_t step = 2; step <= 25; step++) {
9096 DWConvMicrokernelTester()
9097 .cr(8)
9098 .kr(25)
9099 .channels(channels)
9100 .width(3)
9101 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009102 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009103 }
9104 }
9105 }
9106
9107 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_output_stride) {
9108 TEST_REQUIRES_ARM_NEON_V8;
9109 for (size_t channels = 1; channels <= 40; channels += 7) {
9110 DWConvMicrokernelTester()
9111 .cr(8)
9112 .kr(25)
9113 .channels(8)
9114 .width(5)
9115 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009116 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009117 }
9118 }
9119
9120 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmin) {
9121 TEST_REQUIRES_ARM_NEON_V8;
9122 for (size_t channels = 1; channels <= 40; channels += 7) {
9123 DWConvMicrokernelTester()
9124 .cr(8)
9125 .kr(25)
9126 .channels(channels)
9127 .width(3)
9128 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009129 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009130 }
9131 }
9132
9133 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmax) {
9134 TEST_REQUIRES_ARM_NEON_V8;
9135 for (size_t channels = 1; channels <= 40; channels += 7) {
9136 DWConvMicrokernelTester()
9137 .cr(8)
9138 .kr(25)
9139 .channels(channels)
9140 .width(3)
9141 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009142 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009143 }
9144 }
9145
9146 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, input_zero_point_only) {
9147 TEST_REQUIRES_ARM_NEON_V8;
9148 for (size_t channels = 1; channels <= 40; channels += 7) {
9149 DWConvMicrokernelTester()
9150 .cr(8)
9151 .kr(25)
9152 .channels(channels)
9153 .width(3)
9154 .input_zero_point(255)
9155 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009156 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009157 }
9158 }
9159
9160 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, kernel_zero_point_only) {
9161 TEST_REQUIRES_ARM_NEON_V8;
9162 for (size_t channels = 1; channels <= 40; channels += 7) {
9163 DWConvMicrokernelTester()
9164 .cr(8)
9165 .kr(25)
9166 .channels(channels)
9167 .width(3)
9168 .input_zero_point(0)
9169 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08009170 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009171 }
9172 }
9173
9174 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, input_offset) {
9175 TEST_REQUIRES_ARM_NEON_V8;
9176 for (uint32_t channels = 16; channels < 128; channels += 24) {
9177 DWConvMicrokernelTester()
9178 .cr(8)
9179 .kr(25)
9180 .channels(channels)
9181 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08009182 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009183 }
9184 }
9185
9186 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, zero) {
9187 TEST_REQUIRES_ARM_NEON_V8;
9188 for (uint32_t mz = 0; mz < 25; mz++) {
9189 for (uint32_t channels = 16; channels < 128; channels += 24) {
9190 DWConvMicrokernelTester()
9191 .cr(8)
9192 .kr(25)
9193 .channels(channels)
9194 .input_offset(176)
9195 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009196 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009197 }
9198 }
9199 }
9200#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9201
9202
9203#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9204 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_eq_16) {
9205 TEST_REQUIRES_ARM_NEON_V8;
9206 DWConvMicrokernelTester()
9207 .cr(16)
9208 .kr(25)
9209 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08009210 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009211 }
9212
9213 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16) {
9214 TEST_REQUIRES_ARM_NEON_V8;
9215 for (uint32_t channels = 32; channels < 256; channels += 48) {
9216 DWConvMicrokernelTester()
9217 .cr(16)
9218 .kr(25)
9219 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009220 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009221 }
9222 }
9223
9224 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmin) {
9225 TEST_REQUIRES_ARM_NEON_V8;
9226 for (uint32_t channels = 32; channels < 256; channels += 48) {
9227 DWConvMicrokernelTester()
9228 .cr(16)
9229 .kr(25)
9230 .channels(channels)
9231 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009232 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009233 }
9234 }
9235
9236 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmax) {
9237 TEST_REQUIRES_ARM_NEON_V8;
9238 for (uint32_t channels = 32; channels < 256; channels += 48) {
9239 DWConvMicrokernelTester()
9240 .cr(16)
9241 .kr(25)
9242 .channels(channels)
9243 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009244 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009245 }
9246 }
9247
9248 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_lt_16) {
9249 TEST_REQUIRES_ARM_NEON_V8;
9250 for (uint32_t channels = 1; channels < 16; channels++) {
9251 DWConvMicrokernelTester()
9252 .cr(16)
9253 .kr(25)
9254 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009255 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009256 }
9257 }
9258
9259 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16) {
9260 TEST_REQUIRES_ARM_NEON_V8;
9261 for (uint32_t channels = 17; channels < 32; channels++) {
9262 DWConvMicrokernelTester()
9263 .cr(16)
9264 .kr(25)
9265 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009266 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009267 }
9268 }
9269
9270 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmin) {
9271 TEST_REQUIRES_ARM_NEON_V8;
9272 for (uint32_t channels = 17; channels < 32; channels++) {
9273 DWConvMicrokernelTester()
9274 .cr(16)
9275 .kr(25)
9276 .channels(channels)
9277 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009278 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009279 }
9280 }
9281
9282 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmax) {
9283 TEST_REQUIRES_ARM_NEON_V8;
9284 for (uint32_t channels = 17; channels < 32; channels++) {
9285 DWConvMicrokernelTester()
9286 .cr(16)
9287 .kr(25)
9288 .channels(channels)
9289 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009290 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009291 }
9292 }
9293
9294 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel) {
9295 TEST_REQUIRES_ARM_NEON_V8;
9296 for (size_t channels = 1; channels <= 80; channels += 15) {
9297 DWConvMicrokernelTester()
9298 .cr(16)
9299 .kr(25)
9300 .channels(channels)
9301 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009302 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009303 }
9304 }
9305
9306 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_step) {
9307 TEST_REQUIRES_ARM_NEON_V8;
9308 for (size_t channels = 1; channels <= 80; channels += 15) {
9309 for (size_t step = 2; step <= 25; step++) {
9310 DWConvMicrokernelTester()
9311 .cr(16)
9312 .kr(25)
9313 .channels(channels)
9314 .width(3)
9315 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009316 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009317 }
9318 }
9319 }
9320
9321 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_output_stride) {
9322 TEST_REQUIRES_ARM_NEON_V8;
9323 for (size_t channels = 1; channels <= 80; channels += 15) {
9324 DWConvMicrokernelTester()
9325 .cr(16)
9326 .kr(25)
9327 .channels(16)
9328 .width(5)
9329 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009330 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009331 }
9332 }
9333
9334 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmin) {
9335 TEST_REQUIRES_ARM_NEON_V8;
9336 for (size_t channels = 1; channels <= 80; channels += 15) {
9337 DWConvMicrokernelTester()
9338 .cr(16)
9339 .kr(25)
9340 .channels(channels)
9341 .width(3)
9342 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009343 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009344 }
9345 }
9346
9347 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmax) {
9348 TEST_REQUIRES_ARM_NEON_V8;
9349 for (size_t channels = 1; channels <= 80; channels += 15) {
9350 DWConvMicrokernelTester()
9351 .cr(16)
9352 .kr(25)
9353 .channels(channels)
9354 .width(3)
9355 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009356 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009357 }
9358 }
9359
9360 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, input_zero_point_only) {
9361 TEST_REQUIRES_ARM_NEON_V8;
9362 for (size_t channels = 1; channels <= 80; channels += 15) {
9363 DWConvMicrokernelTester()
9364 .cr(16)
9365 .kr(25)
9366 .channels(channels)
9367 .width(3)
9368 .input_zero_point(255)
9369 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009370 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009371 }
9372 }
9373
9374 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, kernel_zero_point_only) {
9375 TEST_REQUIRES_ARM_NEON_V8;
9376 for (size_t channels = 1; channels <= 80; channels += 15) {
9377 DWConvMicrokernelTester()
9378 .cr(16)
9379 .kr(25)
9380 .channels(channels)
9381 .width(3)
9382 .input_zero_point(0)
9383 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08009384 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009385 }
9386 }
9387
9388 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, input_offset) {
9389 TEST_REQUIRES_ARM_NEON_V8;
9390 for (uint32_t channels = 32; channels < 256; channels += 48) {
9391 DWConvMicrokernelTester()
9392 .cr(16)
9393 .kr(25)
9394 .channels(channels)
9395 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08009396 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009397 }
9398 }
9399
9400 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, zero) {
9401 TEST_REQUIRES_ARM_NEON_V8;
9402 for (uint32_t mz = 0; mz < 25; mz++) {
9403 for (uint32_t channels = 32; channels < 256; channels += 48) {
9404 DWConvMicrokernelTester()
9405 .cr(16)
9406 .kr(25)
9407 .channels(channels)
9408 .input_offset(304)
9409 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009410 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009411 }
9412 }
9413 }
9414#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9415
9416
9417#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9418 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_eq_24) {
9419 TEST_REQUIRES_ARM_NEON_V8;
9420 DWConvMicrokernelTester()
9421 .cr(24)
9422 .kr(25)
9423 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08009424 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009425 }
9426
9427 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24) {
9428 TEST_REQUIRES_ARM_NEON_V8;
9429 for (uint32_t channels = 48; channels < 384; channels += 72) {
9430 DWConvMicrokernelTester()
9431 .cr(24)
9432 .kr(25)
9433 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009434 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009435 }
9436 }
9437
9438 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmin) {
9439 TEST_REQUIRES_ARM_NEON_V8;
9440 for (uint32_t channels = 48; channels < 384; channels += 72) {
9441 DWConvMicrokernelTester()
9442 .cr(24)
9443 .kr(25)
9444 .channels(channels)
9445 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009446 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009447 }
9448 }
9449
9450 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmax) {
9451 TEST_REQUIRES_ARM_NEON_V8;
9452 for (uint32_t channels = 48; channels < 384; channels += 72) {
9453 DWConvMicrokernelTester()
9454 .cr(24)
9455 .kr(25)
9456 .channels(channels)
9457 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009458 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009459 }
9460 }
9461
9462 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_lt_24) {
9463 TEST_REQUIRES_ARM_NEON_V8;
9464 for (uint32_t channels = 1; channels < 24; channels++) {
9465 DWConvMicrokernelTester()
9466 .cr(24)
9467 .kr(25)
9468 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009469 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009470 }
9471 }
9472
9473 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24) {
9474 TEST_REQUIRES_ARM_NEON_V8;
9475 for (uint32_t channels = 25; channels < 48; channels++) {
9476 DWConvMicrokernelTester()
9477 .cr(24)
9478 .kr(25)
9479 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009480 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009481 }
9482 }
9483
9484 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmin) {
9485 TEST_REQUIRES_ARM_NEON_V8;
9486 for (uint32_t channels = 25; channels < 48; channels++) {
9487 DWConvMicrokernelTester()
9488 .cr(24)
9489 .kr(25)
9490 .channels(channels)
9491 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009492 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009493 }
9494 }
9495
9496 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmax) {
9497 TEST_REQUIRES_ARM_NEON_V8;
9498 for (uint32_t channels = 25; channels < 48; channels++) {
9499 DWConvMicrokernelTester()
9500 .cr(24)
9501 .kr(25)
9502 .channels(channels)
9503 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009504 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009505 }
9506 }
9507
9508 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel) {
9509 TEST_REQUIRES_ARM_NEON_V8;
9510 for (size_t channels = 1; channels <= 120; channels += 23) {
9511 DWConvMicrokernelTester()
9512 .cr(24)
9513 .kr(25)
9514 .channels(channels)
9515 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009516 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009517 }
9518 }
9519
9520 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_step) {
9521 TEST_REQUIRES_ARM_NEON_V8;
9522 for (size_t channels = 1; channels <= 120; channels += 23) {
9523 for (size_t step = 2; step <= 25; step++) {
9524 DWConvMicrokernelTester()
9525 .cr(24)
9526 .kr(25)
9527 .channels(channels)
9528 .width(3)
9529 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009530 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009531 }
9532 }
9533 }
9534
9535 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_output_stride) {
9536 TEST_REQUIRES_ARM_NEON_V8;
9537 for (size_t channels = 1; channels <= 120; channels += 23) {
9538 DWConvMicrokernelTester()
9539 .cr(24)
9540 .kr(25)
9541 .channels(24)
9542 .width(5)
9543 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08009544 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009545 }
9546 }
9547
9548 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmin) {
9549 TEST_REQUIRES_ARM_NEON_V8;
9550 for (size_t channels = 1; channels <= 120; channels += 23) {
9551 DWConvMicrokernelTester()
9552 .cr(24)
9553 .kr(25)
9554 .channels(channels)
9555 .width(3)
9556 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009557 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009558 }
9559 }
9560
9561 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmax) {
9562 TEST_REQUIRES_ARM_NEON_V8;
9563 for (size_t channels = 1; channels <= 120; channels += 23) {
9564 DWConvMicrokernelTester()
9565 .cr(24)
9566 .kr(25)
9567 .channels(channels)
9568 .width(3)
9569 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009570 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009571 }
9572 }
9573
9574 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, input_zero_point_only) {
9575 TEST_REQUIRES_ARM_NEON_V8;
9576 for (size_t channels = 1; channels <= 120; channels += 23) {
9577 DWConvMicrokernelTester()
9578 .cr(24)
9579 .kr(25)
9580 .channels(channels)
9581 .width(3)
9582 .input_zero_point(255)
9583 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009584 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009585 }
9586 }
9587
9588 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, kernel_zero_point_only) {
9589 TEST_REQUIRES_ARM_NEON_V8;
9590 for (size_t channels = 1; channels <= 120; channels += 23) {
9591 DWConvMicrokernelTester()
9592 .cr(24)
9593 .kr(25)
9594 .channels(channels)
9595 .width(3)
9596 .input_zero_point(0)
9597 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08009598 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009599 }
9600 }
9601
9602 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, input_offset) {
9603 TEST_REQUIRES_ARM_NEON_V8;
9604 for (uint32_t channels = 48; channels < 384; channels += 72) {
9605 DWConvMicrokernelTester()
9606 .cr(24)
9607 .kr(25)
9608 .channels(channels)
9609 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08009610 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009611 }
9612 }
9613
9614 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, zero) {
9615 TEST_REQUIRES_ARM_NEON_V8;
9616 for (uint32_t mz = 0; mz < 25; mz++) {
9617 for (uint32_t channels = 48; channels < 384; channels += 72) {
9618 DWConvMicrokernelTester()
9619 .cr(24)
9620 .kr(25)
9621 .channels(channels)
9622 .input_offset(464)
9623 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009624 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009625 }
9626 }
9627 }
9628#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9629
9630
9631#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9632 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_eq_32) {
9633 TEST_REQUIRES_ARM_NEON_V8;
9634 DWConvMicrokernelTester()
9635 .cr(32)
9636 .kr(25)
9637 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08009638 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009639 }
9640
9641 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32) {
9642 TEST_REQUIRES_ARM_NEON_V8;
9643 for (uint32_t channels = 64; channels < 512; channels += 96) {
9644 DWConvMicrokernelTester()
9645 .cr(32)
9646 .kr(25)
9647 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009648 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009649 }
9650 }
9651
9652 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmin) {
9653 TEST_REQUIRES_ARM_NEON_V8;
9654 for (uint32_t channels = 64; channels < 512; channels += 96) {
9655 DWConvMicrokernelTester()
9656 .cr(32)
9657 .kr(25)
9658 .channels(channels)
9659 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009660 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009661 }
9662 }
9663
9664 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmax) {
9665 TEST_REQUIRES_ARM_NEON_V8;
9666 for (uint32_t channels = 64; channels < 512; channels += 96) {
9667 DWConvMicrokernelTester()
9668 .cr(32)
9669 .kr(25)
9670 .channels(channels)
9671 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009672 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009673 }
9674 }
9675
9676 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_lt_32) {
9677 TEST_REQUIRES_ARM_NEON_V8;
9678 for (uint32_t channels = 1; channels < 32; channels++) {
9679 DWConvMicrokernelTester()
9680 .cr(32)
9681 .kr(25)
9682 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009683 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009684 }
9685 }
9686
9687 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32) {
9688 TEST_REQUIRES_ARM_NEON_V8;
9689 for (uint32_t channels = 33; channels < 64; channels++) {
9690 DWConvMicrokernelTester()
9691 .cr(32)
9692 .kr(25)
9693 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009694 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009695 }
9696 }
9697
9698 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmin) {
9699 TEST_REQUIRES_ARM_NEON_V8;
9700 for (uint32_t channels = 33; channels < 64; channels++) {
9701 DWConvMicrokernelTester()
9702 .cr(32)
9703 .kr(25)
9704 .channels(channels)
9705 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009706 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009707 }
9708 }
9709
9710 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmax) {
9711 TEST_REQUIRES_ARM_NEON_V8;
9712 for (uint32_t channels = 33; channels < 64; channels++) {
9713 DWConvMicrokernelTester()
9714 .cr(32)
9715 .kr(25)
9716 .channels(channels)
9717 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009718 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009719 }
9720 }
9721
9722 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel) {
9723 TEST_REQUIRES_ARM_NEON_V8;
9724 for (size_t channels = 1; channels <= 160; channels += 31) {
9725 DWConvMicrokernelTester()
9726 .cr(32)
9727 .kr(25)
9728 .channels(channels)
9729 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009730 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009731 }
9732 }
9733
9734 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_step) {
9735 TEST_REQUIRES_ARM_NEON_V8;
9736 for (size_t channels = 1; channels <= 160; channels += 31) {
9737 for (size_t step = 2; step <= 25; step++) {
9738 DWConvMicrokernelTester()
9739 .cr(32)
9740 .kr(25)
9741 .channels(channels)
9742 .width(3)
9743 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009744 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009745 }
9746 }
9747 }
9748
9749 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_output_stride) {
9750 TEST_REQUIRES_ARM_NEON_V8;
9751 for (size_t channels = 1; channels <= 160; channels += 31) {
9752 DWConvMicrokernelTester()
9753 .cr(32)
9754 .kr(25)
9755 .channels(32)
9756 .width(5)
9757 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08009758 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009759 }
9760 }
9761
9762 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmin) {
9763 TEST_REQUIRES_ARM_NEON_V8;
9764 for (size_t channels = 1; channels <= 160; channels += 31) {
9765 DWConvMicrokernelTester()
9766 .cr(32)
9767 .kr(25)
9768 .channels(channels)
9769 .width(3)
9770 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009771 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009772 }
9773 }
9774
9775 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmax) {
9776 TEST_REQUIRES_ARM_NEON_V8;
9777 for (size_t channels = 1; channels <= 160; channels += 31) {
9778 DWConvMicrokernelTester()
9779 .cr(32)
9780 .kr(25)
9781 .channels(channels)
9782 .width(3)
9783 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009784 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009785 }
9786 }
9787
9788 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, input_zero_point_only) {
9789 TEST_REQUIRES_ARM_NEON_V8;
9790 for (size_t channels = 1; channels <= 160; channels += 31) {
9791 DWConvMicrokernelTester()
9792 .cr(32)
9793 .kr(25)
9794 .channels(channels)
9795 .width(3)
9796 .input_zero_point(255)
9797 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009798 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009799 }
9800 }
9801
9802 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, kernel_zero_point_only) {
9803 TEST_REQUIRES_ARM_NEON_V8;
9804 for (size_t channels = 1; channels <= 160; channels += 31) {
9805 DWConvMicrokernelTester()
9806 .cr(32)
9807 .kr(25)
9808 .channels(channels)
9809 .width(3)
9810 .input_zero_point(0)
9811 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -08009812 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009813 }
9814 }
9815
9816 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, input_offset) {
9817 TEST_REQUIRES_ARM_NEON_V8;
9818 for (uint32_t channels = 64; channels < 512; channels += 96) {
9819 DWConvMicrokernelTester()
9820 .cr(32)
9821 .kr(25)
9822 .channels(channels)
9823 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08009824 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009825 }
9826 }
9827
9828 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, zero) {
9829 TEST_REQUIRES_ARM_NEON_V8;
9830 for (uint32_t mz = 0; mz < 25; mz++) {
9831 for (uint32_t channels = 64; channels < 512; channels += 96) {
9832 DWConvMicrokernelTester()
9833 .cr(32)
9834 .kr(25)
9835 .channels(channels)
9836 .input_offset(592)
9837 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009838 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan605696a2021-07-15 18:01:30 -07009839 }
9840 }
9841 }
9842#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9843
9844
Marat Dukhancfd606b2021-07-09 01:18:45 -07009845#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanf0f28812021-07-08 22:34:20 -07009846 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_eq_8) {
9847 TEST_REQUIRES_X86_SSE2;
9848 DWConvMicrokernelTester()
9849 .cr(8)
9850 .kr(25)
9851 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009852 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009853 }
9854
9855 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8) {
9856 TEST_REQUIRES_X86_SSE2;
9857 for (uint32_t channels = 16; channels < 128; channels += 24) {
9858 DWConvMicrokernelTester()
9859 .cr(8)
9860 .kr(25)
9861 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009862 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009863 }
9864 }
9865
9866 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmin) {
9867 TEST_REQUIRES_X86_SSE2;
9868 for (uint32_t channels = 16; channels < 128; channels += 24) {
9869 DWConvMicrokernelTester()
9870 .cr(8)
9871 .kr(25)
9872 .channels(channels)
9873 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009874 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009875 }
9876 }
9877
9878 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmax) {
9879 TEST_REQUIRES_X86_SSE2;
9880 for (uint32_t channels = 16; channels < 128; channels += 24) {
9881 DWConvMicrokernelTester()
9882 .cr(8)
9883 .kr(25)
9884 .channels(channels)
9885 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009886 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009887 }
9888 }
9889
9890 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_lt_8) {
9891 TEST_REQUIRES_X86_SSE2;
9892 for (uint32_t channels = 1; channels < 8; channels++) {
9893 DWConvMicrokernelTester()
9894 .cr(8)
9895 .kr(25)
9896 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009897 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009898 }
9899 }
9900
9901 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8) {
9902 TEST_REQUIRES_X86_SSE2;
9903 for (uint32_t channels = 9; channels < 16; channels++) {
9904 DWConvMicrokernelTester()
9905 .cr(8)
9906 .kr(25)
9907 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009908 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009909 }
9910 }
9911
9912 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmin) {
9913 TEST_REQUIRES_X86_SSE2;
9914 for (uint32_t channels = 9; channels < 16; channels++) {
9915 DWConvMicrokernelTester()
9916 .cr(8)
9917 .kr(25)
9918 .channels(channels)
9919 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009920 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009921 }
9922 }
9923
9924 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmax) {
9925 TEST_REQUIRES_X86_SSE2;
9926 for (uint32_t channels = 9; channels < 16; channels++) {
9927 DWConvMicrokernelTester()
9928 .cr(8)
9929 .kr(25)
9930 .channels(channels)
9931 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009932 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009933 }
9934 }
9935
9936 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel) {
9937 TEST_REQUIRES_X86_SSE2;
9938 for (size_t channels = 1; channels <= 40; channels += 7) {
9939 DWConvMicrokernelTester()
9940 .cr(8)
9941 .kr(25)
9942 .channels(channels)
9943 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009944 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009945 }
9946 }
9947
9948 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_step) {
9949 TEST_REQUIRES_X86_SSE2;
9950 for (size_t channels = 1; channels <= 40; channels += 7) {
9951 for (size_t step = 2; step <= 25; step++) {
9952 DWConvMicrokernelTester()
9953 .cr(8)
9954 .kr(25)
9955 .channels(channels)
9956 .width(3)
9957 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009958 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009959 }
9960 }
9961 }
9962
9963 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_output_stride) {
9964 TEST_REQUIRES_X86_SSE2;
9965 for (size_t channels = 1; channels <= 40; channels += 7) {
9966 DWConvMicrokernelTester()
9967 .cr(8)
9968 .kr(25)
9969 .channels(8)
9970 .width(5)
9971 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009972 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009973 }
9974 }
9975
9976 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmin) {
9977 TEST_REQUIRES_X86_SSE2;
9978 for (size_t channels = 1; channels <= 40; channels += 7) {
9979 DWConvMicrokernelTester()
9980 .cr(8)
9981 .kr(25)
9982 .channels(channels)
9983 .width(3)
9984 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009985 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009986 }
9987 }
9988
9989 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmax) {
9990 TEST_REQUIRES_X86_SSE2;
9991 for (size_t channels = 1; channels <= 40; channels += 7) {
9992 DWConvMicrokernelTester()
9993 .cr(8)
9994 .kr(25)
9995 .channels(channels)
9996 .width(3)
9997 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009998 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -07009999 }
10000 }
10001
10002 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, input_zero_point_only) {
10003 TEST_REQUIRES_X86_SSE2;
10004 for (size_t channels = 1; channels <= 40; channels += 7) {
10005 DWConvMicrokernelTester()
10006 .cr(8)
10007 .kr(25)
10008 .channels(channels)
10009 .width(3)
10010 .input_zero_point(255)
10011 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010012 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010013 }
10014 }
10015
10016 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, kernel_zero_point_only) {
10017 TEST_REQUIRES_X86_SSE2;
10018 for (size_t channels = 1; channels <= 40; channels += 7) {
10019 DWConvMicrokernelTester()
10020 .cr(8)
10021 .kr(25)
10022 .channels(channels)
10023 .width(3)
10024 .input_zero_point(0)
10025 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080010026 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010027 }
10028 }
10029
10030 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, input_offset) {
10031 TEST_REQUIRES_X86_SSE2;
10032 for (uint32_t channels = 16; channels < 128; channels += 24) {
10033 DWConvMicrokernelTester()
10034 .cr(8)
10035 .kr(25)
10036 .channels(channels)
10037 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080010038 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010039 }
10040 }
10041
10042 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, zero) {
10043 TEST_REQUIRES_X86_SSE2;
10044 for (uint32_t mz = 0; mz < 25; mz++) {
10045 for (uint32_t channels = 16; channels < 128; channels += 24) {
10046 DWConvMicrokernelTester()
10047 .cr(8)
10048 .kr(25)
10049 .channels(channels)
10050 .input_offset(176)
10051 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010052 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010053 }
10054 }
10055 }
10056#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10057
10058
10059#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10060 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_eq_16) {
10061 TEST_REQUIRES_X86_SSE2;
10062 DWConvMicrokernelTester()
10063 .cr(16)
10064 .kr(25)
10065 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010066 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010067 }
10068
10069 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16) {
10070 TEST_REQUIRES_X86_SSE2;
10071 for (uint32_t channels = 32; channels < 256; channels += 48) {
10072 DWConvMicrokernelTester()
10073 .cr(16)
10074 .kr(25)
10075 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010076 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010077 }
10078 }
10079
10080 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmin) {
10081 TEST_REQUIRES_X86_SSE2;
10082 for (uint32_t channels = 32; channels < 256; channels += 48) {
10083 DWConvMicrokernelTester()
10084 .cr(16)
10085 .kr(25)
10086 .channels(channels)
10087 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010088 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010089 }
10090 }
10091
10092 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmax) {
10093 TEST_REQUIRES_X86_SSE2;
10094 for (uint32_t channels = 32; channels < 256; channels += 48) {
10095 DWConvMicrokernelTester()
10096 .cr(16)
10097 .kr(25)
10098 .channels(channels)
10099 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010100 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010101 }
10102 }
10103
10104 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_lt_16) {
10105 TEST_REQUIRES_X86_SSE2;
10106 for (uint32_t channels = 1; channels < 16; channels++) {
10107 DWConvMicrokernelTester()
10108 .cr(16)
10109 .kr(25)
10110 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010111 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010112 }
10113 }
10114
10115 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16) {
10116 TEST_REQUIRES_X86_SSE2;
10117 for (uint32_t channels = 17; channels < 32; channels++) {
10118 DWConvMicrokernelTester()
10119 .cr(16)
10120 .kr(25)
10121 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010122 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010123 }
10124 }
10125
10126 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmin) {
10127 TEST_REQUIRES_X86_SSE2;
10128 for (uint32_t channels = 17; channels < 32; channels++) {
10129 DWConvMicrokernelTester()
10130 .cr(16)
10131 .kr(25)
10132 .channels(channels)
10133 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010134 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010135 }
10136 }
10137
10138 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmax) {
10139 TEST_REQUIRES_X86_SSE2;
10140 for (uint32_t channels = 17; channels < 32; channels++) {
10141 DWConvMicrokernelTester()
10142 .cr(16)
10143 .kr(25)
10144 .channels(channels)
10145 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010146 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010147 }
10148 }
10149
10150 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel) {
10151 TEST_REQUIRES_X86_SSE2;
10152 for (size_t channels = 1; channels <= 80; channels += 15) {
10153 DWConvMicrokernelTester()
10154 .cr(16)
10155 .kr(25)
10156 .channels(channels)
10157 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010158 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010159 }
10160 }
10161
10162 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_step) {
10163 TEST_REQUIRES_X86_SSE2;
10164 for (size_t channels = 1; channels <= 80; channels += 15) {
10165 for (size_t step = 2; step <= 25; step++) {
10166 DWConvMicrokernelTester()
10167 .cr(16)
10168 .kr(25)
10169 .channels(channels)
10170 .width(3)
10171 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010172 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010173 }
10174 }
10175 }
10176
10177 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_output_stride) {
10178 TEST_REQUIRES_X86_SSE2;
10179 for (size_t channels = 1; channels <= 80; channels += 15) {
10180 DWConvMicrokernelTester()
10181 .cr(16)
10182 .kr(25)
10183 .channels(16)
10184 .width(5)
10185 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010186 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010187 }
10188 }
10189
10190 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmin) {
10191 TEST_REQUIRES_X86_SSE2;
10192 for (size_t channels = 1; channels <= 80; channels += 15) {
10193 DWConvMicrokernelTester()
10194 .cr(16)
10195 .kr(25)
10196 .channels(channels)
10197 .width(3)
10198 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010199 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010200 }
10201 }
10202
10203 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmax) {
10204 TEST_REQUIRES_X86_SSE2;
10205 for (size_t channels = 1; channels <= 80; channels += 15) {
10206 DWConvMicrokernelTester()
10207 .cr(16)
10208 .kr(25)
10209 .channels(channels)
10210 .width(3)
10211 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010212 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010213 }
10214 }
10215
10216 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, input_zero_point_only) {
10217 TEST_REQUIRES_X86_SSE2;
10218 for (size_t channels = 1; channels <= 80; channels += 15) {
10219 DWConvMicrokernelTester()
10220 .cr(16)
10221 .kr(25)
10222 .channels(channels)
10223 .width(3)
10224 .input_zero_point(255)
10225 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010226 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010227 }
10228 }
10229
10230 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, kernel_zero_point_only) {
10231 TEST_REQUIRES_X86_SSE2;
10232 for (size_t channels = 1; channels <= 80; channels += 15) {
10233 DWConvMicrokernelTester()
10234 .cr(16)
10235 .kr(25)
10236 .channels(channels)
10237 .width(3)
10238 .input_zero_point(0)
10239 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080010240 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010241 }
10242 }
10243
10244 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, input_offset) {
10245 TEST_REQUIRES_X86_SSE2;
10246 for (uint32_t channels = 32; channels < 256; channels += 48) {
10247 DWConvMicrokernelTester()
10248 .cr(16)
10249 .kr(25)
10250 .channels(channels)
10251 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080010252 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010253 }
10254 }
10255
10256 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, zero) {
10257 TEST_REQUIRES_X86_SSE2;
10258 for (uint32_t mz = 0; mz < 25; mz++) {
10259 for (uint32_t channels = 32; channels < 256; channels += 48) {
10260 DWConvMicrokernelTester()
10261 .cr(16)
10262 .kr(25)
10263 .channels(channels)
10264 .input_offset(304)
10265 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010266 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010267 }
10268 }
10269 }
10270#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10271
10272
10273#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10274 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_eq_8) {
10275 TEST_REQUIRES_X86_SSE41;
10276 DWConvMicrokernelTester()
10277 .cr(8)
10278 .kr(25)
10279 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010280 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010281 }
10282
10283 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8) {
10284 TEST_REQUIRES_X86_SSE41;
10285 for (uint32_t channels = 16; channels < 128; channels += 24) {
10286 DWConvMicrokernelTester()
10287 .cr(8)
10288 .kr(25)
10289 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010290 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010291 }
10292 }
10293
10294 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmin) {
10295 TEST_REQUIRES_X86_SSE41;
10296 for (uint32_t channels = 16; channels < 128; channels += 24) {
10297 DWConvMicrokernelTester()
10298 .cr(8)
10299 .kr(25)
10300 .channels(channels)
10301 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010302 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010303 }
10304 }
10305
10306 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmax) {
10307 TEST_REQUIRES_X86_SSE41;
10308 for (uint32_t channels = 16; channels < 128; channels += 24) {
10309 DWConvMicrokernelTester()
10310 .cr(8)
10311 .kr(25)
10312 .channels(channels)
10313 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010314 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010315 }
10316 }
10317
10318 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_lt_8) {
10319 TEST_REQUIRES_X86_SSE41;
10320 for (uint32_t channels = 1; channels < 8; channels++) {
10321 DWConvMicrokernelTester()
10322 .cr(8)
10323 .kr(25)
10324 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010325 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010326 }
10327 }
10328
10329 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8) {
10330 TEST_REQUIRES_X86_SSE41;
10331 for (uint32_t channels = 9; channels < 16; channels++) {
10332 DWConvMicrokernelTester()
10333 .cr(8)
10334 .kr(25)
10335 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010336 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010337 }
10338 }
10339
10340 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmin) {
10341 TEST_REQUIRES_X86_SSE41;
10342 for (uint32_t channels = 9; channels < 16; channels++) {
10343 DWConvMicrokernelTester()
10344 .cr(8)
10345 .kr(25)
10346 .channels(channels)
10347 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010348 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010349 }
10350 }
10351
10352 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmax) {
10353 TEST_REQUIRES_X86_SSE41;
10354 for (uint32_t channels = 9; channels < 16; channels++) {
10355 DWConvMicrokernelTester()
10356 .cr(8)
10357 .kr(25)
10358 .channels(channels)
10359 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010360 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010361 }
10362 }
10363
10364 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel) {
10365 TEST_REQUIRES_X86_SSE41;
10366 for (size_t channels = 1; channels <= 40; channels += 7) {
10367 DWConvMicrokernelTester()
10368 .cr(8)
10369 .kr(25)
10370 .channels(channels)
10371 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010372 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010373 }
10374 }
10375
10376 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_step) {
10377 TEST_REQUIRES_X86_SSE41;
10378 for (size_t channels = 1; channels <= 40; channels += 7) {
10379 for (size_t step = 2; step <= 25; step++) {
10380 DWConvMicrokernelTester()
10381 .cr(8)
10382 .kr(25)
10383 .channels(channels)
10384 .width(3)
10385 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010386 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010387 }
10388 }
10389 }
10390
10391 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_output_stride) {
10392 TEST_REQUIRES_X86_SSE41;
10393 for (size_t channels = 1; channels <= 40; channels += 7) {
10394 DWConvMicrokernelTester()
10395 .cr(8)
10396 .kr(25)
10397 .channels(8)
10398 .width(5)
10399 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010400 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010401 }
10402 }
10403
10404 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmin) {
10405 TEST_REQUIRES_X86_SSE41;
10406 for (size_t channels = 1; channels <= 40; channels += 7) {
10407 DWConvMicrokernelTester()
10408 .cr(8)
10409 .kr(25)
10410 .channels(channels)
10411 .width(3)
10412 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010413 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010414 }
10415 }
10416
10417 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmax) {
10418 TEST_REQUIRES_X86_SSE41;
10419 for (size_t channels = 1; channels <= 40; channels += 7) {
10420 DWConvMicrokernelTester()
10421 .cr(8)
10422 .kr(25)
10423 .channels(channels)
10424 .width(3)
10425 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010426 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010427 }
10428 }
10429
10430 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, input_zero_point_only) {
10431 TEST_REQUIRES_X86_SSE41;
10432 for (size_t channels = 1; channels <= 40; channels += 7) {
10433 DWConvMicrokernelTester()
10434 .cr(8)
10435 .kr(25)
10436 .channels(channels)
10437 .width(3)
10438 .input_zero_point(255)
10439 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010440 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010441 }
10442 }
10443
10444 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, kernel_zero_point_only) {
10445 TEST_REQUIRES_X86_SSE41;
10446 for (size_t channels = 1; channels <= 40; channels += 7) {
10447 DWConvMicrokernelTester()
10448 .cr(8)
10449 .kr(25)
10450 .channels(channels)
10451 .width(3)
10452 .input_zero_point(0)
10453 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080010454 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010455 }
10456 }
10457
10458 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, input_offset) {
10459 TEST_REQUIRES_X86_SSE41;
10460 for (uint32_t channels = 16; channels < 128; channels += 24) {
10461 DWConvMicrokernelTester()
10462 .cr(8)
10463 .kr(25)
10464 .channels(channels)
10465 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080010466 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010467 }
10468 }
10469
10470 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, zero) {
10471 TEST_REQUIRES_X86_SSE41;
10472 for (uint32_t mz = 0; mz < 25; mz++) {
10473 for (uint32_t channels = 16; channels < 128; channels += 24) {
10474 DWConvMicrokernelTester()
10475 .cr(8)
10476 .kr(25)
10477 .channels(channels)
10478 .input_offset(176)
10479 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010480 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010481 }
10482 }
10483 }
10484#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10485
10486
10487#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10488 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_eq_16) {
10489 TEST_REQUIRES_X86_SSE41;
10490 DWConvMicrokernelTester()
10491 .cr(16)
10492 .kr(25)
10493 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010494 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010495 }
10496
10497 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16) {
10498 TEST_REQUIRES_X86_SSE41;
10499 for (uint32_t channels = 32; channels < 256; channels += 48) {
10500 DWConvMicrokernelTester()
10501 .cr(16)
10502 .kr(25)
10503 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010504 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010505 }
10506 }
10507
10508 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmin) {
10509 TEST_REQUIRES_X86_SSE41;
10510 for (uint32_t channels = 32; channels < 256; channels += 48) {
10511 DWConvMicrokernelTester()
10512 .cr(16)
10513 .kr(25)
10514 .channels(channels)
10515 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010516 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010517 }
10518 }
10519
10520 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmax) {
10521 TEST_REQUIRES_X86_SSE41;
10522 for (uint32_t channels = 32; channels < 256; channels += 48) {
10523 DWConvMicrokernelTester()
10524 .cr(16)
10525 .kr(25)
10526 .channels(channels)
10527 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010528 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010529 }
10530 }
10531
10532 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_lt_16) {
10533 TEST_REQUIRES_X86_SSE41;
10534 for (uint32_t channels = 1; channels < 16; channels++) {
10535 DWConvMicrokernelTester()
10536 .cr(16)
10537 .kr(25)
10538 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010539 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010540 }
10541 }
10542
10543 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16) {
10544 TEST_REQUIRES_X86_SSE41;
10545 for (uint32_t channels = 17; channels < 32; channels++) {
10546 DWConvMicrokernelTester()
10547 .cr(16)
10548 .kr(25)
10549 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010550 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010551 }
10552 }
10553
10554 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmin) {
10555 TEST_REQUIRES_X86_SSE41;
10556 for (uint32_t channels = 17; channels < 32; channels++) {
10557 DWConvMicrokernelTester()
10558 .cr(16)
10559 .kr(25)
10560 .channels(channels)
10561 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010562 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010563 }
10564 }
10565
10566 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmax) {
10567 TEST_REQUIRES_X86_SSE41;
10568 for (uint32_t channels = 17; channels < 32; channels++) {
10569 DWConvMicrokernelTester()
10570 .cr(16)
10571 .kr(25)
10572 .channels(channels)
10573 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010574 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010575 }
10576 }
10577
10578 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel) {
10579 TEST_REQUIRES_X86_SSE41;
10580 for (size_t channels = 1; channels <= 80; channels += 15) {
10581 DWConvMicrokernelTester()
10582 .cr(16)
10583 .kr(25)
10584 .channels(channels)
10585 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010586 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010587 }
10588 }
10589
10590 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_step) {
10591 TEST_REQUIRES_X86_SSE41;
10592 for (size_t channels = 1; channels <= 80; channels += 15) {
10593 for (size_t step = 2; step <= 25; step++) {
10594 DWConvMicrokernelTester()
10595 .cr(16)
10596 .kr(25)
10597 .channels(channels)
10598 .width(3)
10599 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010600 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010601 }
10602 }
10603 }
10604
10605 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_output_stride) {
10606 TEST_REQUIRES_X86_SSE41;
10607 for (size_t channels = 1; channels <= 80; channels += 15) {
10608 DWConvMicrokernelTester()
10609 .cr(16)
10610 .kr(25)
10611 .channels(16)
10612 .width(5)
10613 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010614 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010615 }
10616 }
10617
10618 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmin) {
10619 TEST_REQUIRES_X86_SSE41;
10620 for (size_t channels = 1; channels <= 80; channels += 15) {
10621 DWConvMicrokernelTester()
10622 .cr(16)
10623 .kr(25)
10624 .channels(channels)
10625 .width(3)
10626 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010627 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010628 }
10629 }
10630
10631 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmax) {
10632 TEST_REQUIRES_X86_SSE41;
10633 for (size_t channels = 1; channels <= 80; channels += 15) {
10634 DWConvMicrokernelTester()
10635 .cr(16)
10636 .kr(25)
10637 .channels(channels)
10638 .width(3)
10639 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010640 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010641 }
10642 }
10643
10644 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, input_zero_point_only) {
10645 TEST_REQUIRES_X86_SSE41;
10646 for (size_t channels = 1; channels <= 80; channels += 15) {
10647 DWConvMicrokernelTester()
10648 .cr(16)
10649 .kr(25)
10650 .channels(channels)
10651 .width(3)
10652 .input_zero_point(255)
10653 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010654 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010655 }
10656 }
10657
10658 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, kernel_zero_point_only) {
10659 TEST_REQUIRES_X86_SSE41;
10660 for (size_t channels = 1; channels <= 80; channels += 15) {
10661 DWConvMicrokernelTester()
10662 .cr(16)
10663 .kr(25)
10664 .channels(channels)
10665 .width(3)
10666 .input_zero_point(0)
10667 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080010668 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010669 }
10670 }
10671
10672 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, input_offset) {
10673 TEST_REQUIRES_X86_SSE41;
10674 for (uint32_t channels = 32; channels < 256; channels += 48) {
10675 DWConvMicrokernelTester()
10676 .cr(16)
10677 .kr(25)
10678 .channels(channels)
10679 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080010680 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010681 }
10682 }
10683
10684 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, zero) {
10685 TEST_REQUIRES_X86_SSE41;
10686 for (uint32_t mz = 0; mz < 25; mz++) {
10687 for (uint32_t channels = 32; channels < 256; channels += 48) {
10688 DWConvMicrokernelTester()
10689 .cr(16)
10690 .kr(25)
10691 .channels(channels)
10692 .input_offset(304)
10693 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010694 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010695 }
10696 }
10697 }
10698#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10699
10700
10701#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10702 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_eq_8) {
10703 TEST_REQUIRES_X86_AVX;
10704 DWConvMicrokernelTester()
10705 .cr(8)
10706 .kr(25)
10707 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010708 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010709 }
10710
10711 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8) {
10712 TEST_REQUIRES_X86_AVX;
10713 for (uint32_t channels = 16; channels < 128; channels += 24) {
10714 DWConvMicrokernelTester()
10715 .cr(8)
10716 .kr(25)
10717 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010718 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010719 }
10720 }
10721
10722 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmin) {
10723 TEST_REQUIRES_X86_AVX;
10724 for (uint32_t channels = 16; channels < 128; channels += 24) {
10725 DWConvMicrokernelTester()
10726 .cr(8)
10727 .kr(25)
10728 .channels(channels)
10729 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010730 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010731 }
10732 }
10733
10734 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmax) {
10735 TEST_REQUIRES_X86_AVX;
10736 for (uint32_t channels = 16; channels < 128; channels += 24) {
10737 DWConvMicrokernelTester()
10738 .cr(8)
10739 .kr(25)
10740 .channels(channels)
10741 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010742 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010743 }
10744 }
10745
10746 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_lt_8) {
10747 TEST_REQUIRES_X86_AVX;
10748 for (uint32_t channels = 1; channels < 8; channels++) {
10749 DWConvMicrokernelTester()
10750 .cr(8)
10751 .kr(25)
10752 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010753 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010754 }
10755 }
10756
10757 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8) {
10758 TEST_REQUIRES_X86_AVX;
10759 for (uint32_t channels = 9; channels < 16; channels++) {
10760 DWConvMicrokernelTester()
10761 .cr(8)
10762 .kr(25)
10763 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010764 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010765 }
10766 }
10767
10768 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmin) {
10769 TEST_REQUIRES_X86_AVX;
10770 for (uint32_t channels = 9; channels < 16; channels++) {
10771 DWConvMicrokernelTester()
10772 .cr(8)
10773 .kr(25)
10774 .channels(channels)
10775 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010776 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010777 }
10778 }
10779
10780 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmax) {
10781 TEST_REQUIRES_X86_AVX;
10782 for (uint32_t channels = 9; channels < 16; channels++) {
10783 DWConvMicrokernelTester()
10784 .cr(8)
10785 .kr(25)
10786 .channels(channels)
10787 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010788 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010789 }
10790 }
10791
10792 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel) {
10793 TEST_REQUIRES_X86_AVX;
10794 for (size_t channels = 1; channels <= 40; channels += 7) {
10795 DWConvMicrokernelTester()
10796 .cr(8)
10797 .kr(25)
10798 .channels(channels)
10799 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010800 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010801 }
10802 }
10803
10804 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_step) {
10805 TEST_REQUIRES_X86_AVX;
10806 for (size_t channels = 1; channels <= 40; channels += 7) {
10807 for (size_t step = 2; step <= 25; step++) {
10808 DWConvMicrokernelTester()
10809 .cr(8)
10810 .kr(25)
10811 .channels(channels)
10812 .width(3)
10813 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010814 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010815 }
10816 }
10817 }
10818
10819 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_output_stride) {
10820 TEST_REQUIRES_X86_AVX;
10821 for (size_t channels = 1; channels <= 40; channels += 7) {
10822 DWConvMicrokernelTester()
10823 .cr(8)
10824 .kr(25)
10825 .channels(8)
10826 .width(5)
10827 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010828 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010829 }
10830 }
10831
10832 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmin) {
10833 TEST_REQUIRES_X86_AVX;
10834 for (size_t channels = 1; channels <= 40; channels += 7) {
10835 DWConvMicrokernelTester()
10836 .cr(8)
10837 .kr(25)
10838 .channels(channels)
10839 .width(3)
10840 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010841 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010842 }
10843 }
10844
10845 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmax) {
10846 TEST_REQUIRES_X86_AVX;
10847 for (size_t channels = 1; channels <= 40; channels += 7) {
10848 DWConvMicrokernelTester()
10849 .cr(8)
10850 .kr(25)
10851 .channels(channels)
10852 .width(3)
10853 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010854 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010855 }
10856 }
10857
10858 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, input_zero_point_only) {
10859 TEST_REQUIRES_X86_AVX;
10860 for (size_t channels = 1; channels <= 40; channels += 7) {
10861 DWConvMicrokernelTester()
10862 .cr(8)
10863 .kr(25)
10864 .channels(channels)
10865 .width(3)
10866 .input_zero_point(255)
10867 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010868 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010869 }
10870 }
10871
10872 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, kernel_zero_point_only) {
10873 TEST_REQUIRES_X86_AVX;
10874 for (size_t channels = 1; channels <= 40; channels += 7) {
10875 DWConvMicrokernelTester()
10876 .cr(8)
10877 .kr(25)
10878 .channels(channels)
10879 .width(3)
10880 .input_zero_point(0)
10881 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080010882 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010883 }
10884 }
10885
10886 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, input_offset) {
10887 TEST_REQUIRES_X86_AVX;
10888 for (uint32_t channels = 16; channels < 128; channels += 24) {
10889 DWConvMicrokernelTester()
10890 .cr(8)
10891 .kr(25)
10892 .channels(channels)
10893 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080010894 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010895 }
10896 }
10897
10898 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, zero) {
10899 TEST_REQUIRES_X86_AVX;
10900 for (uint32_t mz = 0; mz < 25; mz++) {
10901 for (uint32_t channels = 16; channels < 128; channels += 24) {
10902 DWConvMicrokernelTester()
10903 .cr(8)
10904 .kr(25)
10905 .channels(channels)
10906 .input_offset(176)
10907 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010908 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010909 }
10910 }
10911 }
10912#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10913
10914
10915#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10916 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_eq_16) {
10917 TEST_REQUIRES_X86_AVX;
10918 DWConvMicrokernelTester()
10919 .cr(16)
10920 .kr(25)
10921 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010922 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010923 }
10924
10925 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16) {
10926 TEST_REQUIRES_X86_AVX;
10927 for (uint32_t channels = 32; channels < 256; channels += 48) {
10928 DWConvMicrokernelTester()
10929 .cr(16)
10930 .kr(25)
10931 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010932 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010933 }
10934 }
10935
10936 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmin) {
10937 TEST_REQUIRES_X86_AVX;
10938 for (uint32_t channels = 32; channels < 256; channels += 48) {
10939 DWConvMicrokernelTester()
10940 .cr(16)
10941 .kr(25)
10942 .channels(channels)
10943 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010944 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010945 }
10946 }
10947
10948 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmax) {
10949 TEST_REQUIRES_X86_AVX;
10950 for (uint32_t channels = 32; channels < 256; channels += 48) {
10951 DWConvMicrokernelTester()
10952 .cr(16)
10953 .kr(25)
10954 .channels(channels)
10955 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010956 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010957 }
10958 }
10959
10960 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_lt_16) {
10961 TEST_REQUIRES_X86_AVX;
10962 for (uint32_t channels = 1; channels < 16; channels++) {
10963 DWConvMicrokernelTester()
10964 .cr(16)
10965 .kr(25)
10966 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010967 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010968 }
10969 }
10970
10971 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16) {
10972 TEST_REQUIRES_X86_AVX;
10973 for (uint32_t channels = 17; channels < 32; channels++) {
10974 DWConvMicrokernelTester()
10975 .cr(16)
10976 .kr(25)
10977 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010978 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010979 }
10980 }
10981
10982 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmin) {
10983 TEST_REQUIRES_X86_AVX;
10984 for (uint32_t channels = 17; channels < 32; channels++) {
10985 DWConvMicrokernelTester()
10986 .cr(16)
10987 .kr(25)
10988 .channels(channels)
10989 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010990 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070010991 }
10992 }
10993
10994 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmax) {
10995 TEST_REQUIRES_X86_AVX;
10996 for (uint32_t channels = 17; channels < 32; channels++) {
10997 DWConvMicrokernelTester()
10998 .cr(16)
10999 .kr(25)
11000 .channels(channels)
11001 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011002 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011003 }
11004 }
11005
11006 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel) {
11007 TEST_REQUIRES_X86_AVX;
11008 for (size_t channels = 1; channels <= 80; channels += 15) {
11009 DWConvMicrokernelTester()
11010 .cr(16)
11011 .kr(25)
11012 .channels(channels)
11013 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011014 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011015 }
11016 }
11017
11018 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_step) {
11019 TEST_REQUIRES_X86_AVX;
11020 for (size_t channels = 1; channels <= 80; channels += 15) {
11021 for (size_t step = 2; step <= 25; step++) {
11022 DWConvMicrokernelTester()
11023 .cr(16)
11024 .kr(25)
11025 .channels(channels)
11026 .width(3)
11027 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011028 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011029 }
11030 }
11031 }
11032
11033 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_output_stride) {
11034 TEST_REQUIRES_X86_AVX;
11035 for (size_t channels = 1; channels <= 80; channels += 15) {
11036 DWConvMicrokernelTester()
11037 .cr(16)
11038 .kr(25)
11039 .channels(16)
11040 .width(5)
11041 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011042 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011043 }
11044 }
11045
11046 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmin) {
11047 TEST_REQUIRES_X86_AVX;
11048 for (size_t channels = 1; channels <= 80; channels += 15) {
11049 DWConvMicrokernelTester()
11050 .cr(16)
11051 .kr(25)
11052 .channels(channels)
11053 .width(3)
11054 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011055 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011056 }
11057 }
11058
11059 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmax) {
11060 TEST_REQUIRES_X86_AVX;
11061 for (size_t channels = 1; channels <= 80; channels += 15) {
11062 DWConvMicrokernelTester()
11063 .cr(16)
11064 .kr(25)
11065 .channels(channels)
11066 .width(3)
11067 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011068 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011069 }
11070 }
11071
11072 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, input_zero_point_only) {
11073 TEST_REQUIRES_X86_AVX;
11074 for (size_t channels = 1; channels <= 80; channels += 15) {
11075 DWConvMicrokernelTester()
11076 .cr(16)
11077 .kr(25)
11078 .channels(channels)
11079 .width(3)
11080 .input_zero_point(255)
11081 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011082 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011083 }
11084 }
11085
11086 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, kernel_zero_point_only) {
11087 TEST_REQUIRES_X86_AVX;
11088 for (size_t channels = 1; channels <= 80; channels += 15) {
11089 DWConvMicrokernelTester()
11090 .cr(16)
11091 .kr(25)
11092 .channels(channels)
11093 .width(3)
11094 .input_zero_point(0)
11095 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080011096 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011097 }
11098 }
11099
11100 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, input_offset) {
11101 TEST_REQUIRES_X86_AVX;
11102 for (uint32_t channels = 32; channels < 256; channels += 48) {
11103 DWConvMicrokernelTester()
11104 .cr(16)
11105 .kr(25)
11106 .channels(channels)
11107 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080011108 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011109 }
11110 }
11111
11112 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, zero) {
11113 TEST_REQUIRES_X86_AVX;
11114 for (uint32_t mz = 0; mz < 25; mz++) {
11115 for (uint32_t channels = 32; channels < 256; channels += 48) {
11116 DWConvMicrokernelTester()
11117 .cr(16)
11118 .kr(25)
11119 .channels(channels)
11120 .input_offset(304)
11121 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011122 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011123 }
11124 }
11125 }
11126#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11127
11128
11129#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11130 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_eq_8) {
11131 TEST_REQUIRES_X86_SSE41;
11132 DWConvMicrokernelTester()
11133 .cr(8)
11134 .kr(25)
11135 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011136 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011137 }
11138
11139 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8) {
11140 TEST_REQUIRES_X86_SSE41;
11141 for (uint32_t channels = 16; channels < 128; channels += 24) {
11142 DWConvMicrokernelTester()
11143 .cr(8)
11144 .kr(25)
11145 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011146 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011147 }
11148 }
11149
11150 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmin) {
11151 TEST_REQUIRES_X86_SSE41;
11152 for (uint32_t channels = 16; channels < 128; channels += 24) {
11153 DWConvMicrokernelTester()
11154 .cr(8)
11155 .kr(25)
11156 .channels(channels)
11157 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011158 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011159 }
11160 }
11161
11162 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmax) {
11163 TEST_REQUIRES_X86_SSE41;
11164 for (uint32_t channels = 16; channels < 128; channels += 24) {
11165 DWConvMicrokernelTester()
11166 .cr(8)
11167 .kr(25)
11168 .channels(channels)
11169 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011170 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011171 }
11172 }
11173
11174 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_lt_8) {
11175 TEST_REQUIRES_X86_SSE41;
11176 for (uint32_t channels = 1; channels < 8; channels++) {
11177 DWConvMicrokernelTester()
11178 .cr(8)
11179 .kr(25)
11180 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011181 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011182 }
11183 }
11184
11185 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8) {
11186 TEST_REQUIRES_X86_SSE41;
11187 for (uint32_t channels = 9; channels < 16; channels++) {
11188 DWConvMicrokernelTester()
11189 .cr(8)
11190 .kr(25)
11191 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011192 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011193 }
11194 }
11195
11196 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmin) {
11197 TEST_REQUIRES_X86_SSE41;
11198 for (uint32_t channels = 9; channels < 16; channels++) {
11199 DWConvMicrokernelTester()
11200 .cr(8)
11201 .kr(25)
11202 .channels(channels)
11203 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011204 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011205 }
11206 }
11207
11208 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmax) {
11209 TEST_REQUIRES_X86_SSE41;
11210 for (uint32_t channels = 9; channels < 16; channels++) {
11211 DWConvMicrokernelTester()
11212 .cr(8)
11213 .kr(25)
11214 .channels(channels)
11215 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011216 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011217 }
11218 }
11219
11220 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel) {
11221 TEST_REQUIRES_X86_SSE41;
11222 for (size_t channels = 1; channels <= 40; channels += 7) {
11223 DWConvMicrokernelTester()
11224 .cr(8)
11225 .kr(25)
11226 .channels(channels)
11227 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011228 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011229 }
11230 }
11231
11232 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_step) {
11233 TEST_REQUIRES_X86_SSE41;
11234 for (size_t channels = 1; channels <= 40; channels += 7) {
11235 for (size_t step = 2; step <= 25; step++) {
11236 DWConvMicrokernelTester()
11237 .cr(8)
11238 .kr(25)
11239 .channels(channels)
11240 .width(3)
11241 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011242 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011243 }
11244 }
11245 }
11246
11247 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_output_stride) {
11248 TEST_REQUIRES_X86_SSE41;
11249 for (size_t channels = 1; channels <= 40; channels += 7) {
11250 DWConvMicrokernelTester()
11251 .cr(8)
11252 .kr(25)
11253 .channels(8)
11254 .width(5)
11255 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011256 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011257 }
11258 }
11259
11260 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmin) {
11261 TEST_REQUIRES_X86_SSE41;
11262 for (size_t channels = 1; channels <= 40; channels += 7) {
11263 DWConvMicrokernelTester()
11264 .cr(8)
11265 .kr(25)
11266 .channels(channels)
11267 .width(3)
11268 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011269 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011270 }
11271 }
11272
11273 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmax) {
11274 TEST_REQUIRES_X86_SSE41;
11275 for (size_t channels = 1; channels <= 40; channels += 7) {
11276 DWConvMicrokernelTester()
11277 .cr(8)
11278 .kr(25)
11279 .channels(channels)
11280 .width(3)
11281 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011282 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011283 }
11284 }
11285
11286 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, input_zero_point_only) {
11287 TEST_REQUIRES_X86_SSE41;
11288 for (size_t channels = 1; channels <= 40; channels += 7) {
11289 DWConvMicrokernelTester()
11290 .cr(8)
11291 .kr(25)
11292 .channels(channels)
11293 .width(3)
11294 .input_zero_point(255)
11295 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011296 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011297 }
11298 }
11299
11300 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, kernel_zero_point_only) {
11301 TEST_REQUIRES_X86_SSE41;
11302 for (size_t channels = 1; channels <= 40; channels += 7) {
11303 DWConvMicrokernelTester()
11304 .cr(8)
11305 .kr(25)
11306 .channels(channels)
11307 .width(3)
11308 .input_zero_point(0)
11309 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080011310 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011311 }
11312 }
11313
11314 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, input_offset) {
11315 TEST_REQUIRES_X86_SSE41;
11316 for (uint32_t channels = 16; channels < 128; channels += 24) {
11317 DWConvMicrokernelTester()
11318 .cr(8)
11319 .kr(25)
11320 .channels(channels)
11321 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080011322 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011323 }
11324 }
11325
11326 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, zero) {
11327 TEST_REQUIRES_X86_SSE41;
11328 for (uint32_t mz = 0; mz < 25; mz++) {
11329 for (uint32_t channels = 16; channels < 128; channels += 24) {
11330 DWConvMicrokernelTester()
11331 .cr(8)
11332 .kr(25)
11333 .channels(channels)
11334 .input_offset(176)
11335 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011336 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011337 }
11338 }
11339 }
11340#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11341
11342
11343#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11344 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_eq_16) {
11345 TEST_REQUIRES_X86_SSE41;
11346 DWConvMicrokernelTester()
11347 .cr(16)
11348 .kr(25)
11349 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080011350 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011351 }
11352
11353 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16) {
11354 TEST_REQUIRES_X86_SSE41;
11355 for (uint32_t channels = 32; channels < 256; channels += 48) {
11356 DWConvMicrokernelTester()
11357 .cr(16)
11358 .kr(25)
11359 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011360 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011361 }
11362 }
11363
11364 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmin) {
11365 TEST_REQUIRES_X86_SSE41;
11366 for (uint32_t channels = 32; channels < 256; channels += 48) {
11367 DWConvMicrokernelTester()
11368 .cr(16)
11369 .kr(25)
11370 .channels(channels)
11371 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011372 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011373 }
11374 }
11375
11376 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmax) {
11377 TEST_REQUIRES_X86_SSE41;
11378 for (uint32_t channels = 32; channels < 256; channels += 48) {
11379 DWConvMicrokernelTester()
11380 .cr(16)
11381 .kr(25)
11382 .channels(channels)
11383 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011384 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011385 }
11386 }
11387
11388 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_lt_16) {
11389 TEST_REQUIRES_X86_SSE41;
11390 for (uint32_t channels = 1; channels < 16; channels++) {
11391 DWConvMicrokernelTester()
11392 .cr(16)
11393 .kr(25)
11394 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011395 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011396 }
11397 }
11398
11399 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16) {
11400 TEST_REQUIRES_X86_SSE41;
11401 for (uint32_t channels = 17; channels < 32; channels++) {
11402 DWConvMicrokernelTester()
11403 .cr(16)
11404 .kr(25)
11405 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011406 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011407 }
11408 }
11409
11410 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmin) {
11411 TEST_REQUIRES_X86_SSE41;
11412 for (uint32_t channels = 17; channels < 32; channels++) {
11413 DWConvMicrokernelTester()
11414 .cr(16)
11415 .kr(25)
11416 .channels(channels)
11417 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011418 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011419 }
11420 }
11421
11422 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmax) {
11423 TEST_REQUIRES_X86_SSE41;
11424 for (uint32_t channels = 17; channels < 32; channels++) {
11425 DWConvMicrokernelTester()
11426 .cr(16)
11427 .kr(25)
11428 .channels(channels)
11429 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011431 }
11432 }
11433
11434 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel) {
11435 TEST_REQUIRES_X86_SSE41;
11436 for (size_t channels = 1; channels <= 80; channels += 15) {
11437 DWConvMicrokernelTester()
11438 .cr(16)
11439 .kr(25)
11440 .channels(channels)
11441 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011442 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011443 }
11444 }
11445
11446 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_step) {
11447 TEST_REQUIRES_X86_SSE41;
11448 for (size_t channels = 1; channels <= 80; channels += 15) {
11449 for (size_t step = 2; step <= 25; step++) {
11450 DWConvMicrokernelTester()
11451 .cr(16)
11452 .kr(25)
11453 .channels(channels)
11454 .width(3)
11455 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011456 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011457 }
11458 }
11459 }
11460
11461 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_output_stride) {
11462 TEST_REQUIRES_X86_SSE41;
11463 for (size_t channels = 1; channels <= 80; channels += 15) {
11464 DWConvMicrokernelTester()
11465 .cr(16)
11466 .kr(25)
11467 .channels(16)
11468 .width(5)
11469 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011470 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011471 }
11472 }
11473
11474 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmin) {
11475 TEST_REQUIRES_X86_SSE41;
11476 for (size_t channels = 1; channels <= 80; channels += 15) {
11477 DWConvMicrokernelTester()
11478 .cr(16)
11479 .kr(25)
11480 .channels(channels)
11481 .width(3)
11482 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011483 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011484 }
11485 }
11486
11487 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmax) {
11488 TEST_REQUIRES_X86_SSE41;
11489 for (size_t channels = 1; channels <= 80; channels += 15) {
11490 DWConvMicrokernelTester()
11491 .cr(16)
11492 .kr(25)
11493 .channels(channels)
11494 .width(3)
11495 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011496 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011497 }
11498 }
11499
11500 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, input_zero_point_only) {
11501 TEST_REQUIRES_X86_SSE41;
11502 for (size_t channels = 1; channels <= 80; channels += 15) {
11503 DWConvMicrokernelTester()
11504 .cr(16)
11505 .kr(25)
11506 .channels(channels)
11507 .width(3)
11508 .input_zero_point(255)
11509 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011510 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011511 }
11512 }
11513
11514 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, kernel_zero_point_only) {
11515 TEST_REQUIRES_X86_SSE41;
11516 for (size_t channels = 1; channels <= 80; channels += 15) {
11517 DWConvMicrokernelTester()
11518 .cr(16)
11519 .kr(25)
11520 .channels(channels)
11521 .width(3)
11522 .input_zero_point(0)
11523 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080011524 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011525 }
11526 }
11527
11528 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, input_offset) {
11529 TEST_REQUIRES_X86_SSE41;
11530 for (uint32_t channels = 32; channels < 256; channels += 48) {
11531 DWConvMicrokernelTester()
11532 .cr(16)
11533 .kr(25)
11534 .channels(channels)
11535 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080011536 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011537 }
11538 }
11539
11540 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, zero) {
11541 TEST_REQUIRES_X86_SSE41;
11542 for (uint32_t mz = 0; mz < 25; mz++) {
11543 for (uint32_t channels = 32; channels < 256; channels += 48) {
11544 DWConvMicrokernelTester()
11545 .cr(16)
11546 .kr(25)
11547 .channels(channels)
11548 .input_offset(304)
11549 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011550 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011551 }
11552 }
11553 }
11554#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11555
11556
11557#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11558 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_eq_8) {
11559 TEST_REQUIRES_X86_AVX;
11560 DWConvMicrokernelTester()
11561 .cr(8)
11562 .kr(25)
11563 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011564 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011565 }
11566
11567 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8) {
11568 TEST_REQUIRES_X86_AVX;
11569 for (uint32_t channels = 16; channels < 128; channels += 24) {
11570 DWConvMicrokernelTester()
11571 .cr(8)
11572 .kr(25)
11573 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011574 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011575 }
11576 }
11577
11578 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmin) {
11579 TEST_REQUIRES_X86_AVX;
11580 for (uint32_t channels = 16; channels < 128; channels += 24) {
11581 DWConvMicrokernelTester()
11582 .cr(8)
11583 .kr(25)
11584 .channels(channels)
11585 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011586 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011587 }
11588 }
11589
11590 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmax) {
11591 TEST_REQUIRES_X86_AVX;
11592 for (uint32_t channels = 16; channels < 128; channels += 24) {
11593 DWConvMicrokernelTester()
11594 .cr(8)
11595 .kr(25)
11596 .channels(channels)
11597 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011598 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011599 }
11600 }
11601
11602 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_lt_8) {
11603 TEST_REQUIRES_X86_AVX;
11604 for (uint32_t channels = 1; channels < 8; channels++) {
11605 DWConvMicrokernelTester()
11606 .cr(8)
11607 .kr(25)
11608 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011609 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011610 }
11611 }
11612
11613 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8) {
11614 TEST_REQUIRES_X86_AVX;
11615 for (uint32_t channels = 9; channels < 16; channels++) {
11616 DWConvMicrokernelTester()
11617 .cr(8)
11618 .kr(25)
11619 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011620 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011621 }
11622 }
11623
11624 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmin) {
11625 TEST_REQUIRES_X86_AVX;
11626 for (uint32_t channels = 9; channels < 16; channels++) {
11627 DWConvMicrokernelTester()
11628 .cr(8)
11629 .kr(25)
11630 .channels(channels)
11631 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011632 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011633 }
11634 }
11635
11636 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmax) {
11637 TEST_REQUIRES_X86_AVX;
11638 for (uint32_t channels = 9; channels < 16; channels++) {
11639 DWConvMicrokernelTester()
11640 .cr(8)
11641 .kr(25)
11642 .channels(channels)
11643 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011644 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011645 }
11646 }
11647
11648 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel) {
11649 TEST_REQUIRES_X86_AVX;
11650 for (size_t channels = 1; channels <= 40; channels += 7) {
11651 DWConvMicrokernelTester()
11652 .cr(8)
11653 .kr(25)
11654 .channels(channels)
11655 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011656 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011657 }
11658 }
11659
11660 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_step) {
11661 TEST_REQUIRES_X86_AVX;
11662 for (size_t channels = 1; channels <= 40; channels += 7) {
11663 for (size_t step = 2; step <= 25; step++) {
11664 DWConvMicrokernelTester()
11665 .cr(8)
11666 .kr(25)
11667 .channels(channels)
11668 .width(3)
11669 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011670 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011671 }
11672 }
11673 }
11674
11675 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_output_stride) {
11676 TEST_REQUIRES_X86_AVX;
11677 for (size_t channels = 1; channels <= 40; channels += 7) {
11678 DWConvMicrokernelTester()
11679 .cr(8)
11680 .kr(25)
11681 .channels(8)
11682 .width(5)
11683 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011684 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011685 }
11686 }
11687
11688 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmin) {
11689 TEST_REQUIRES_X86_AVX;
11690 for (size_t channels = 1; channels <= 40; channels += 7) {
11691 DWConvMicrokernelTester()
11692 .cr(8)
11693 .kr(25)
11694 .channels(channels)
11695 .width(3)
11696 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011697 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011698 }
11699 }
11700
11701 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmax) {
11702 TEST_REQUIRES_X86_AVX;
11703 for (size_t channels = 1; channels <= 40; channels += 7) {
11704 DWConvMicrokernelTester()
11705 .cr(8)
11706 .kr(25)
11707 .channels(channels)
11708 .width(3)
11709 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011710 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011711 }
11712 }
11713
11714 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, input_zero_point_only) {
11715 TEST_REQUIRES_X86_AVX;
11716 for (size_t channels = 1; channels <= 40; channels += 7) {
11717 DWConvMicrokernelTester()
11718 .cr(8)
11719 .kr(25)
11720 .channels(channels)
11721 .width(3)
11722 .input_zero_point(255)
11723 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011724 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011725 }
11726 }
11727
11728 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, kernel_zero_point_only) {
11729 TEST_REQUIRES_X86_AVX;
11730 for (size_t channels = 1; channels <= 40; channels += 7) {
11731 DWConvMicrokernelTester()
11732 .cr(8)
11733 .kr(25)
11734 .channels(channels)
11735 .width(3)
11736 .input_zero_point(0)
11737 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080011738 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011739 }
11740 }
11741
11742 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, input_offset) {
11743 TEST_REQUIRES_X86_AVX;
11744 for (uint32_t channels = 16; channels < 128; channels += 24) {
11745 DWConvMicrokernelTester()
11746 .cr(8)
11747 .kr(25)
11748 .channels(channels)
11749 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080011750 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011751 }
11752 }
11753
11754 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, zero) {
11755 TEST_REQUIRES_X86_AVX;
11756 for (uint32_t mz = 0; mz < 25; mz++) {
11757 for (uint32_t channels = 16; channels < 128; channels += 24) {
11758 DWConvMicrokernelTester()
11759 .cr(8)
11760 .kr(25)
11761 .channels(channels)
11762 .input_offset(176)
11763 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011764 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011765 }
11766 }
11767 }
11768#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11769
11770
11771#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11772 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_eq_16) {
11773 TEST_REQUIRES_X86_AVX;
11774 DWConvMicrokernelTester()
11775 .cr(16)
11776 .kr(25)
11777 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080011778 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011779 }
11780
11781 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16) {
11782 TEST_REQUIRES_X86_AVX;
11783 for (uint32_t channels = 32; channels < 256; channels += 48) {
11784 DWConvMicrokernelTester()
11785 .cr(16)
11786 .kr(25)
11787 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011788 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011789 }
11790 }
11791
11792 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmin) {
11793 TEST_REQUIRES_X86_AVX;
11794 for (uint32_t channels = 32; channels < 256; channels += 48) {
11795 DWConvMicrokernelTester()
11796 .cr(16)
11797 .kr(25)
11798 .channels(channels)
11799 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011800 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011801 }
11802 }
11803
11804 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmax) {
11805 TEST_REQUIRES_X86_AVX;
11806 for (uint32_t channels = 32; channels < 256; channels += 48) {
11807 DWConvMicrokernelTester()
11808 .cr(16)
11809 .kr(25)
11810 .channels(channels)
11811 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011812 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011813 }
11814 }
11815
11816 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_lt_16) {
11817 TEST_REQUIRES_X86_AVX;
11818 for (uint32_t channels = 1; channels < 16; channels++) {
11819 DWConvMicrokernelTester()
11820 .cr(16)
11821 .kr(25)
11822 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011823 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011824 }
11825 }
11826
11827 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16) {
11828 TEST_REQUIRES_X86_AVX;
11829 for (uint32_t channels = 17; channels < 32; channels++) {
11830 DWConvMicrokernelTester()
11831 .cr(16)
11832 .kr(25)
11833 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011834 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011835 }
11836 }
11837
11838 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmin) {
11839 TEST_REQUIRES_X86_AVX;
11840 for (uint32_t channels = 17; channels < 32; channels++) {
11841 DWConvMicrokernelTester()
11842 .cr(16)
11843 .kr(25)
11844 .channels(channels)
11845 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011846 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011847 }
11848 }
11849
11850 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmax) {
11851 TEST_REQUIRES_X86_AVX;
11852 for (uint32_t channels = 17; channels < 32; channels++) {
11853 DWConvMicrokernelTester()
11854 .cr(16)
11855 .kr(25)
11856 .channels(channels)
11857 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011858 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011859 }
11860 }
11861
11862 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel) {
11863 TEST_REQUIRES_X86_AVX;
11864 for (size_t channels = 1; channels <= 80; channels += 15) {
11865 DWConvMicrokernelTester()
11866 .cr(16)
11867 .kr(25)
11868 .channels(channels)
11869 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011870 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011871 }
11872 }
11873
11874 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_step) {
11875 TEST_REQUIRES_X86_AVX;
11876 for (size_t channels = 1; channels <= 80; channels += 15) {
11877 for (size_t step = 2; step <= 25; step++) {
11878 DWConvMicrokernelTester()
11879 .cr(16)
11880 .kr(25)
11881 .channels(channels)
11882 .width(3)
11883 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011884 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011885 }
11886 }
11887 }
11888
11889 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_output_stride) {
11890 TEST_REQUIRES_X86_AVX;
11891 for (size_t channels = 1; channels <= 80; channels += 15) {
11892 DWConvMicrokernelTester()
11893 .cr(16)
11894 .kr(25)
11895 .channels(16)
11896 .width(5)
11897 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011898 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011899 }
11900 }
11901
11902 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmin) {
11903 TEST_REQUIRES_X86_AVX;
11904 for (size_t channels = 1; channels <= 80; channels += 15) {
11905 DWConvMicrokernelTester()
11906 .cr(16)
11907 .kr(25)
11908 .channels(channels)
11909 .width(3)
11910 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011911 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011912 }
11913 }
11914
11915 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmax) {
11916 TEST_REQUIRES_X86_AVX;
11917 for (size_t channels = 1; channels <= 80; channels += 15) {
11918 DWConvMicrokernelTester()
11919 .cr(16)
11920 .kr(25)
11921 .channels(channels)
11922 .width(3)
11923 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011924 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011925 }
11926 }
11927
11928 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, input_zero_point_only) {
11929 TEST_REQUIRES_X86_AVX;
11930 for (size_t channels = 1; channels <= 80; channels += 15) {
11931 DWConvMicrokernelTester()
11932 .cr(16)
11933 .kr(25)
11934 .channels(channels)
11935 .width(3)
11936 .input_zero_point(255)
11937 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011938 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011939 }
11940 }
11941
11942 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, kernel_zero_point_only) {
11943 TEST_REQUIRES_X86_AVX;
11944 for (size_t channels = 1; channels <= 80; channels += 15) {
11945 DWConvMicrokernelTester()
11946 .cr(16)
11947 .kr(25)
11948 .channels(channels)
11949 .width(3)
11950 .input_zero_point(0)
11951 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080011952 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011953 }
11954 }
11955
11956 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, input_offset) {
11957 TEST_REQUIRES_X86_AVX;
11958 for (uint32_t channels = 32; channels < 256; channels += 48) {
11959 DWConvMicrokernelTester()
11960 .cr(16)
11961 .kr(25)
11962 .channels(channels)
11963 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080011964 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011965 }
11966 }
11967
11968 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, zero) {
11969 TEST_REQUIRES_X86_AVX;
11970 for (uint32_t mz = 0; mz < 25; mz++) {
11971 for (uint32_t channels = 32; channels < 256; channels += 48) {
11972 DWConvMicrokernelTester()
11973 .cr(16)
11974 .kr(25)
11975 .channels(channels)
11976 .input_offset(304)
11977 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011978 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanf0f28812021-07-08 22:34:20 -070011979 }
11980 }
11981 }
11982#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11983
11984
11985#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070011986 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_eq_8) {
11987 TEST_REQUIRES_X86_XOP;
11988 DWConvMicrokernelTester()
11989 .cr(8)
11990 .kr(25)
11991 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011992 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070011993 }
11994
11995 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8) {
11996 TEST_REQUIRES_X86_XOP;
11997 for (uint32_t channels = 16; channels < 128; channels += 24) {
11998 DWConvMicrokernelTester()
11999 .cr(8)
12000 .kr(25)
12001 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012002 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012003 }
12004 }
12005
12006 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmin) {
12007 TEST_REQUIRES_X86_XOP;
12008 for (uint32_t channels = 16; channels < 128; channels += 24) {
12009 DWConvMicrokernelTester()
12010 .cr(8)
12011 .kr(25)
12012 .channels(channels)
12013 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012014 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012015 }
12016 }
12017
12018 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmax) {
12019 TEST_REQUIRES_X86_XOP;
12020 for (uint32_t channels = 16; channels < 128; channels += 24) {
12021 DWConvMicrokernelTester()
12022 .cr(8)
12023 .kr(25)
12024 .channels(channels)
12025 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012026 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012027 }
12028 }
12029
12030 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_lt_8) {
12031 TEST_REQUIRES_X86_XOP;
12032 for (uint32_t channels = 1; channels < 8; channels++) {
12033 DWConvMicrokernelTester()
12034 .cr(8)
12035 .kr(25)
12036 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012037 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012038 }
12039 }
12040
12041 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8) {
12042 TEST_REQUIRES_X86_XOP;
12043 for (uint32_t channels = 9; channels < 16; channels++) {
12044 DWConvMicrokernelTester()
12045 .cr(8)
12046 .kr(25)
12047 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012048 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012049 }
12050 }
12051
12052 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmin) {
12053 TEST_REQUIRES_X86_XOP;
12054 for (uint32_t channels = 9; channels < 16; channels++) {
12055 DWConvMicrokernelTester()
12056 .cr(8)
12057 .kr(25)
12058 .channels(channels)
12059 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012060 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012061 }
12062 }
12063
12064 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmax) {
12065 TEST_REQUIRES_X86_XOP;
12066 for (uint32_t channels = 9; channels < 16; channels++) {
12067 DWConvMicrokernelTester()
12068 .cr(8)
12069 .kr(25)
12070 .channels(channels)
12071 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012072 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012073 }
12074 }
12075
12076 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel) {
12077 TEST_REQUIRES_X86_XOP;
12078 for (size_t channels = 1; channels <= 40; channels += 7) {
12079 DWConvMicrokernelTester()
12080 .cr(8)
12081 .kr(25)
12082 .channels(channels)
12083 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012084 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012085 }
12086 }
12087
12088 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_step) {
12089 TEST_REQUIRES_X86_XOP;
12090 for (size_t channels = 1; channels <= 40; channels += 7) {
12091 for (size_t step = 2; step <= 25; step++) {
12092 DWConvMicrokernelTester()
12093 .cr(8)
12094 .kr(25)
12095 .channels(channels)
12096 .width(3)
12097 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012098 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012099 }
12100 }
12101 }
12102
12103 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_output_stride) {
12104 TEST_REQUIRES_X86_XOP;
12105 for (size_t channels = 1; channels <= 40; channels += 7) {
12106 DWConvMicrokernelTester()
12107 .cr(8)
12108 .kr(25)
12109 .channels(8)
12110 .width(5)
12111 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012112 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012113 }
12114 }
12115
12116 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmin) {
12117 TEST_REQUIRES_X86_XOP;
12118 for (size_t channels = 1; channels <= 40; channels += 7) {
12119 DWConvMicrokernelTester()
12120 .cr(8)
12121 .kr(25)
12122 .channels(channels)
12123 .width(3)
12124 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012125 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012126 }
12127 }
12128
12129 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmax) {
12130 TEST_REQUIRES_X86_XOP;
12131 for (size_t channels = 1; channels <= 40; channels += 7) {
12132 DWConvMicrokernelTester()
12133 .cr(8)
12134 .kr(25)
12135 .channels(channels)
12136 .width(3)
12137 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012138 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012139 }
12140 }
12141
12142 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, input_zero_point_only) {
12143 TEST_REQUIRES_X86_XOP;
12144 for (size_t channels = 1; channels <= 40; channels += 7) {
12145 DWConvMicrokernelTester()
12146 .cr(8)
12147 .kr(25)
12148 .channels(channels)
12149 .width(3)
12150 .input_zero_point(255)
12151 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012152 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012153 }
12154 }
12155
12156 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, kernel_zero_point_only) {
12157 TEST_REQUIRES_X86_XOP;
12158 for (size_t channels = 1; channels <= 40; channels += 7) {
12159 DWConvMicrokernelTester()
12160 .cr(8)
12161 .kr(25)
12162 .channels(channels)
12163 .width(3)
12164 .input_zero_point(0)
12165 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080012166 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012167 }
12168 }
12169
12170 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, input_offset) {
12171 TEST_REQUIRES_X86_XOP;
12172 for (uint32_t channels = 16; channels < 128; channels += 24) {
12173 DWConvMicrokernelTester()
12174 .cr(8)
12175 .kr(25)
12176 .channels(channels)
12177 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080012178 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012179 }
12180 }
12181
12182 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, zero) {
12183 TEST_REQUIRES_X86_XOP;
12184 for (uint32_t mz = 0; mz < 25; mz++) {
12185 for (uint32_t channels = 16; channels < 128; channels += 24) {
12186 DWConvMicrokernelTester()
12187 .cr(8)
12188 .kr(25)
12189 .channels(channels)
12190 .input_offset(176)
12191 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012192 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012193 }
12194 }
12195 }
12196#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12197
12198
12199#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12200 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_eq_16) {
12201 TEST_REQUIRES_X86_XOP;
12202 DWConvMicrokernelTester()
12203 .cr(16)
12204 .kr(25)
12205 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080012206 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012207 }
12208
12209 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16) {
12210 TEST_REQUIRES_X86_XOP;
12211 for (uint32_t channels = 32; channels < 256; channels += 48) {
12212 DWConvMicrokernelTester()
12213 .cr(16)
12214 .kr(25)
12215 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012216 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012217 }
12218 }
12219
12220 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmin) {
12221 TEST_REQUIRES_X86_XOP;
12222 for (uint32_t channels = 32; channels < 256; channels += 48) {
12223 DWConvMicrokernelTester()
12224 .cr(16)
12225 .kr(25)
12226 .channels(channels)
12227 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012228 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012229 }
12230 }
12231
12232 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmax) {
12233 TEST_REQUIRES_X86_XOP;
12234 for (uint32_t channels = 32; channels < 256; channels += 48) {
12235 DWConvMicrokernelTester()
12236 .cr(16)
12237 .kr(25)
12238 .channels(channels)
12239 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012240 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012241 }
12242 }
12243
12244 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_lt_16) {
12245 TEST_REQUIRES_X86_XOP;
12246 for (uint32_t channels = 1; channels < 16; channels++) {
12247 DWConvMicrokernelTester()
12248 .cr(16)
12249 .kr(25)
12250 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012251 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012252 }
12253 }
12254
12255 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16) {
12256 TEST_REQUIRES_X86_XOP;
12257 for (uint32_t channels = 17; channels < 32; channels++) {
12258 DWConvMicrokernelTester()
12259 .cr(16)
12260 .kr(25)
12261 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012262 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012263 }
12264 }
12265
12266 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmin) {
12267 TEST_REQUIRES_X86_XOP;
12268 for (uint32_t channels = 17; channels < 32; channels++) {
12269 DWConvMicrokernelTester()
12270 .cr(16)
12271 .kr(25)
12272 .channels(channels)
12273 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012274 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012275 }
12276 }
12277
12278 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmax) {
12279 TEST_REQUIRES_X86_XOP;
12280 for (uint32_t channels = 17; channels < 32; channels++) {
12281 DWConvMicrokernelTester()
12282 .cr(16)
12283 .kr(25)
12284 .channels(channels)
12285 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012286 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012287 }
12288 }
12289
12290 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel) {
12291 TEST_REQUIRES_X86_XOP;
12292 for (size_t channels = 1; channels <= 80; channels += 15) {
12293 DWConvMicrokernelTester()
12294 .cr(16)
12295 .kr(25)
12296 .channels(channels)
12297 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012298 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012299 }
12300 }
12301
12302 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_step) {
12303 TEST_REQUIRES_X86_XOP;
12304 for (size_t channels = 1; channels <= 80; channels += 15) {
12305 for (size_t step = 2; step <= 25; step++) {
12306 DWConvMicrokernelTester()
12307 .cr(16)
12308 .kr(25)
12309 .channels(channels)
12310 .width(3)
12311 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012312 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012313 }
12314 }
12315 }
12316
12317 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_output_stride) {
12318 TEST_REQUIRES_X86_XOP;
12319 for (size_t channels = 1; channels <= 80; channels += 15) {
12320 DWConvMicrokernelTester()
12321 .cr(16)
12322 .kr(25)
12323 .channels(16)
12324 .width(5)
12325 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012326 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012327 }
12328 }
12329
12330 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmin) {
12331 TEST_REQUIRES_X86_XOP;
12332 for (size_t channels = 1; channels <= 80; channels += 15) {
12333 DWConvMicrokernelTester()
12334 .cr(16)
12335 .kr(25)
12336 .channels(channels)
12337 .width(3)
12338 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012339 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012340 }
12341 }
12342
12343 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmax) {
12344 TEST_REQUIRES_X86_XOP;
12345 for (size_t channels = 1; channels <= 80; channels += 15) {
12346 DWConvMicrokernelTester()
12347 .cr(16)
12348 .kr(25)
12349 .channels(channels)
12350 .width(3)
12351 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012352 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012353 }
12354 }
12355
12356 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, input_zero_point_only) {
12357 TEST_REQUIRES_X86_XOP;
12358 for (size_t channels = 1; channels <= 80; channels += 15) {
12359 DWConvMicrokernelTester()
12360 .cr(16)
12361 .kr(25)
12362 .channels(channels)
12363 .width(3)
12364 .input_zero_point(255)
12365 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012366 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012367 }
12368 }
12369
12370 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, kernel_zero_point_only) {
12371 TEST_REQUIRES_X86_XOP;
12372 for (size_t channels = 1; channels <= 80; channels += 15) {
12373 DWConvMicrokernelTester()
12374 .cr(16)
12375 .kr(25)
12376 .channels(channels)
12377 .width(3)
12378 .input_zero_point(0)
12379 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080012380 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012381 }
12382 }
12383
12384 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, input_offset) {
12385 TEST_REQUIRES_X86_XOP;
12386 for (uint32_t channels = 32; channels < 256; channels += 48) {
12387 DWConvMicrokernelTester()
12388 .cr(16)
12389 .kr(25)
12390 .channels(channels)
12391 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080012392 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012393 }
12394 }
12395
12396 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, zero) {
12397 TEST_REQUIRES_X86_XOP;
12398 for (uint32_t mz = 0; mz < 25; mz++) {
12399 for (uint32_t channels = 32; channels < 256; channels += 48) {
12400 DWConvMicrokernelTester()
12401 .cr(16)
12402 .kr(25)
12403 .channels(channels)
12404 .input_offset(304)
12405 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012406 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhan3c35f7a2021-07-08 18:55:42 -070012407 }
12408 }
12409 }
12410#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09c312b2021-07-09 00:45:04 -070012411
12412
12413#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12414 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_eq_8) {
12415 TEST_REQUIRES_X86_AVX2;
12416 DWConvMicrokernelTester()
12417 .cr(8)
12418 .kr(25)
12419 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012420 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012421 }
12422
12423 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8) {
12424 TEST_REQUIRES_X86_AVX2;
12425 for (uint32_t channels = 16; channels < 128; channels += 24) {
12426 DWConvMicrokernelTester()
12427 .cr(8)
12428 .kr(25)
12429 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012431 }
12432 }
12433
12434 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
12435 TEST_REQUIRES_X86_AVX2;
12436 for (uint32_t channels = 16; channels < 128; channels += 24) {
12437 DWConvMicrokernelTester()
12438 .cr(8)
12439 .kr(25)
12440 .channels(channels)
12441 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012442 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012443 }
12444 }
12445
12446 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
12447 TEST_REQUIRES_X86_AVX2;
12448 for (uint32_t channels = 16; channels < 128; channels += 24) {
12449 DWConvMicrokernelTester()
12450 .cr(8)
12451 .kr(25)
12452 .channels(channels)
12453 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012454 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012455 }
12456 }
12457
12458 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_lt_8) {
12459 TEST_REQUIRES_X86_AVX2;
12460 for (uint32_t channels = 1; channels < 8; channels++) {
12461 DWConvMicrokernelTester()
12462 .cr(8)
12463 .kr(25)
12464 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012465 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012466 }
12467 }
12468
12469 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8) {
12470 TEST_REQUIRES_X86_AVX2;
12471 for (uint32_t channels = 9; channels < 16; channels++) {
12472 DWConvMicrokernelTester()
12473 .cr(8)
12474 .kr(25)
12475 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012476 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012477 }
12478 }
12479
12480 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
12481 TEST_REQUIRES_X86_AVX2;
12482 for (uint32_t channels = 9; channels < 16; channels++) {
12483 DWConvMicrokernelTester()
12484 .cr(8)
12485 .kr(25)
12486 .channels(channels)
12487 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012488 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012489 }
12490 }
12491
12492 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
12493 TEST_REQUIRES_X86_AVX2;
12494 for (uint32_t channels = 9; channels < 16; channels++) {
12495 DWConvMicrokernelTester()
12496 .cr(8)
12497 .kr(25)
12498 .channels(channels)
12499 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012500 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012501 }
12502 }
12503
12504 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel) {
12505 TEST_REQUIRES_X86_AVX2;
12506 for (size_t channels = 1; channels <= 40; channels += 7) {
12507 DWConvMicrokernelTester()
12508 .cr(8)
12509 .kr(25)
12510 .channels(channels)
12511 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012512 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012513 }
12514 }
12515
12516 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_step) {
12517 TEST_REQUIRES_X86_AVX2;
12518 for (size_t channels = 1; channels <= 40; channels += 7) {
12519 for (size_t step = 2; step <= 25; step++) {
12520 DWConvMicrokernelTester()
12521 .cr(8)
12522 .kr(25)
12523 .channels(channels)
12524 .width(3)
12525 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012526 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012527 }
12528 }
12529 }
12530
12531 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
12532 TEST_REQUIRES_X86_AVX2;
12533 for (size_t channels = 1; channels <= 40; channels += 7) {
12534 DWConvMicrokernelTester()
12535 .cr(8)
12536 .kr(25)
12537 .channels(8)
12538 .width(5)
12539 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012540 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012541 }
12542 }
12543
12544 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
12545 TEST_REQUIRES_X86_AVX2;
12546 for (size_t channels = 1; channels <= 40; channels += 7) {
12547 DWConvMicrokernelTester()
12548 .cr(8)
12549 .kr(25)
12550 .channels(channels)
12551 .width(3)
12552 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012553 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012554 }
12555 }
12556
12557 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
12558 TEST_REQUIRES_X86_AVX2;
12559 for (size_t channels = 1; channels <= 40; channels += 7) {
12560 DWConvMicrokernelTester()
12561 .cr(8)
12562 .kr(25)
12563 .channels(channels)
12564 .width(3)
12565 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012566 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012567 }
12568 }
12569
12570 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_zero_point_only) {
12571 TEST_REQUIRES_X86_AVX2;
12572 for (size_t channels = 1; channels <= 40; channels += 7) {
12573 DWConvMicrokernelTester()
12574 .cr(8)
12575 .kr(25)
12576 .channels(channels)
12577 .width(3)
12578 .input_zero_point(255)
12579 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012580 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012581 }
12582 }
12583
12584 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, kernel_zero_point_only) {
12585 TEST_REQUIRES_X86_AVX2;
12586 for (size_t channels = 1; channels <= 40; channels += 7) {
12587 DWConvMicrokernelTester()
12588 .cr(8)
12589 .kr(25)
12590 .channels(channels)
12591 .width(3)
12592 .input_zero_point(0)
12593 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080012594 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012595 }
12596 }
12597
12598 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_offset) {
12599 TEST_REQUIRES_X86_AVX2;
12600 for (uint32_t channels = 16; channels < 128; channels += 24) {
12601 DWConvMicrokernelTester()
12602 .cr(8)
12603 .kr(25)
12604 .channels(channels)
12605 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080012606 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012607 }
12608 }
12609
12610 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, zero) {
12611 TEST_REQUIRES_X86_AVX2;
12612 for (uint32_t mz = 0; mz < 25; mz++) {
12613 for (uint32_t channels = 16; channels < 128; channels += 24) {
12614 DWConvMicrokernelTester()
12615 .cr(8)
12616 .kr(25)
12617 .channels(channels)
12618 .input_offset(176)
12619 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012620 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012621 }
12622 }
12623 }
12624#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12625
12626
12627#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12628 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_eq_16) {
12629 TEST_REQUIRES_X86_AVX2;
12630 DWConvMicrokernelTester()
12631 .cr(16)
12632 .kr(25)
12633 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080012634 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012635 }
12636
12637 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16) {
12638 TEST_REQUIRES_X86_AVX2;
12639 for (uint32_t channels = 32; channels < 256; channels += 48) {
12640 DWConvMicrokernelTester()
12641 .cr(16)
12642 .kr(25)
12643 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012644 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012645 }
12646 }
12647
12648 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
12649 TEST_REQUIRES_X86_AVX2;
12650 for (uint32_t channels = 32; channels < 256; channels += 48) {
12651 DWConvMicrokernelTester()
12652 .cr(16)
12653 .kr(25)
12654 .channels(channels)
12655 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012656 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012657 }
12658 }
12659
12660 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
12661 TEST_REQUIRES_X86_AVX2;
12662 for (uint32_t channels = 32; channels < 256; channels += 48) {
12663 DWConvMicrokernelTester()
12664 .cr(16)
12665 .kr(25)
12666 .channels(channels)
12667 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012668 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012669 }
12670 }
12671
12672 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_lt_16) {
12673 TEST_REQUIRES_X86_AVX2;
12674 for (uint32_t channels = 1; channels < 16; channels++) {
12675 DWConvMicrokernelTester()
12676 .cr(16)
12677 .kr(25)
12678 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012679 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012680 }
12681 }
12682
12683 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16) {
12684 TEST_REQUIRES_X86_AVX2;
12685 for (uint32_t channels = 17; channels < 32; channels++) {
12686 DWConvMicrokernelTester()
12687 .cr(16)
12688 .kr(25)
12689 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012690 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012691 }
12692 }
12693
12694 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
12695 TEST_REQUIRES_X86_AVX2;
12696 for (uint32_t channels = 17; channels < 32; channels++) {
12697 DWConvMicrokernelTester()
12698 .cr(16)
12699 .kr(25)
12700 .channels(channels)
12701 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012702 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012703 }
12704 }
12705
12706 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
12707 TEST_REQUIRES_X86_AVX2;
12708 for (uint32_t channels = 17; channels < 32; channels++) {
12709 DWConvMicrokernelTester()
12710 .cr(16)
12711 .kr(25)
12712 .channels(channels)
12713 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012714 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012715 }
12716 }
12717
12718 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel) {
12719 TEST_REQUIRES_X86_AVX2;
12720 for (size_t channels = 1; channels <= 80; channels += 15) {
12721 DWConvMicrokernelTester()
12722 .cr(16)
12723 .kr(25)
12724 .channels(channels)
12725 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012726 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012727 }
12728 }
12729
12730 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_step) {
12731 TEST_REQUIRES_X86_AVX2;
12732 for (size_t channels = 1; channels <= 80; channels += 15) {
12733 for (size_t step = 2; step <= 25; step++) {
12734 DWConvMicrokernelTester()
12735 .cr(16)
12736 .kr(25)
12737 .channels(channels)
12738 .width(3)
12739 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012740 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012741 }
12742 }
12743 }
12744
12745 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
12746 TEST_REQUIRES_X86_AVX2;
12747 for (size_t channels = 1; channels <= 80; channels += 15) {
12748 DWConvMicrokernelTester()
12749 .cr(16)
12750 .kr(25)
12751 .channels(16)
12752 .width(5)
12753 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012754 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012755 }
12756 }
12757
12758 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
12759 TEST_REQUIRES_X86_AVX2;
12760 for (size_t channels = 1; channels <= 80; channels += 15) {
12761 DWConvMicrokernelTester()
12762 .cr(16)
12763 .kr(25)
12764 .channels(channels)
12765 .width(3)
12766 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012767 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012768 }
12769 }
12770
12771 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
12772 TEST_REQUIRES_X86_AVX2;
12773 for (size_t channels = 1; channels <= 80; channels += 15) {
12774 DWConvMicrokernelTester()
12775 .cr(16)
12776 .kr(25)
12777 .channels(channels)
12778 .width(3)
12779 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012780 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012781 }
12782 }
12783
12784 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_zero_point_only) {
12785 TEST_REQUIRES_X86_AVX2;
12786 for (size_t channels = 1; channels <= 80; channels += 15) {
12787 DWConvMicrokernelTester()
12788 .cr(16)
12789 .kr(25)
12790 .channels(channels)
12791 .width(3)
12792 .input_zero_point(255)
12793 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012794 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012795 }
12796 }
12797
12798 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, kernel_zero_point_only) {
12799 TEST_REQUIRES_X86_AVX2;
12800 for (size_t channels = 1; channels <= 80; channels += 15) {
12801 DWConvMicrokernelTester()
12802 .cr(16)
12803 .kr(25)
12804 .channels(channels)
12805 .width(3)
12806 .input_zero_point(0)
12807 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080012808 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012809 }
12810 }
12811
12812 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_offset) {
12813 TEST_REQUIRES_X86_AVX2;
12814 for (uint32_t channels = 32; channels < 256; channels += 48) {
12815 DWConvMicrokernelTester()
12816 .cr(16)
12817 .kr(25)
12818 .channels(channels)
12819 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080012820 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012821 }
12822 }
12823
12824 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, zero) {
12825 TEST_REQUIRES_X86_AVX2;
12826 for (uint32_t mz = 0; mz < 25; mz++) {
12827 for (uint32_t channels = 32; channels < 256; channels += 48) {
12828 DWConvMicrokernelTester()
12829 .cr(16)
12830 .kr(25)
12831 .channels(channels)
12832 .input_offset(304)
12833 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012834 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012835 }
12836 }
12837 }
12838#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12839
12840
12841#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12842 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_eq_32) {
12843 TEST_REQUIRES_X86_AVX2;
12844 DWConvMicrokernelTester()
12845 .cr(32)
12846 .kr(25)
12847 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080012848 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012849 }
12850
12851 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32) {
12852 TEST_REQUIRES_X86_AVX2;
12853 for (uint32_t channels = 64; channels < 512; channels += 96) {
12854 DWConvMicrokernelTester()
12855 .cr(32)
12856 .kr(25)
12857 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012858 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012859 }
12860 }
12861
12862 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
12863 TEST_REQUIRES_X86_AVX2;
12864 for (uint32_t channels = 64; channels < 512; channels += 96) {
12865 DWConvMicrokernelTester()
12866 .cr(32)
12867 .kr(25)
12868 .channels(channels)
12869 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012870 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012871 }
12872 }
12873
12874 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
12875 TEST_REQUIRES_X86_AVX2;
12876 for (uint32_t channels = 64; channels < 512; channels += 96) {
12877 DWConvMicrokernelTester()
12878 .cr(32)
12879 .kr(25)
12880 .channels(channels)
12881 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012882 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012883 }
12884 }
12885
12886 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_lt_32) {
12887 TEST_REQUIRES_X86_AVX2;
12888 for (uint32_t channels = 1; channels < 32; channels++) {
12889 DWConvMicrokernelTester()
12890 .cr(32)
12891 .kr(25)
12892 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012893 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012894 }
12895 }
12896
12897 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32) {
12898 TEST_REQUIRES_X86_AVX2;
12899 for (uint32_t channels = 33; channels < 64; channels++) {
12900 DWConvMicrokernelTester()
12901 .cr(32)
12902 .kr(25)
12903 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012904 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012905 }
12906 }
12907
12908 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
12909 TEST_REQUIRES_X86_AVX2;
12910 for (uint32_t channels = 33; channels < 64; channels++) {
12911 DWConvMicrokernelTester()
12912 .cr(32)
12913 .kr(25)
12914 .channels(channels)
12915 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012916 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012917 }
12918 }
12919
12920 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
12921 TEST_REQUIRES_X86_AVX2;
12922 for (uint32_t channels = 33; channels < 64; channels++) {
12923 DWConvMicrokernelTester()
12924 .cr(32)
12925 .kr(25)
12926 .channels(channels)
12927 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012928 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012929 }
12930 }
12931
12932 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel) {
12933 TEST_REQUIRES_X86_AVX2;
12934 for (size_t channels = 1; channels <= 160; channels += 31) {
12935 DWConvMicrokernelTester()
12936 .cr(32)
12937 .kr(25)
12938 .channels(channels)
12939 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012940 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012941 }
12942 }
12943
12944 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_step) {
12945 TEST_REQUIRES_X86_AVX2;
12946 for (size_t channels = 1; channels <= 160; channels += 31) {
12947 for (size_t step = 2; step <= 25; step++) {
12948 DWConvMicrokernelTester()
12949 .cr(32)
12950 .kr(25)
12951 .channels(channels)
12952 .width(3)
12953 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012954 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012955 }
12956 }
12957 }
12958
12959 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
12960 TEST_REQUIRES_X86_AVX2;
12961 for (size_t channels = 1; channels <= 160; channels += 31) {
12962 DWConvMicrokernelTester()
12963 .cr(32)
12964 .kr(25)
12965 .channels(32)
12966 .width(5)
12967 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080012968 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012969 }
12970 }
12971
12972 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
12973 TEST_REQUIRES_X86_AVX2;
12974 for (size_t channels = 1; channels <= 160; channels += 31) {
12975 DWConvMicrokernelTester()
12976 .cr(32)
12977 .kr(25)
12978 .channels(channels)
12979 .width(3)
12980 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012981 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012982 }
12983 }
12984
12985 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
12986 TEST_REQUIRES_X86_AVX2;
12987 for (size_t channels = 1; channels <= 160; channels += 31) {
12988 DWConvMicrokernelTester()
12989 .cr(32)
12990 .kr(25)
12991 .channels(channels)
12992 .width(3)
12993 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012994 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070012995 }
12996 }
12997
12998 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_zero_point_only) {
12999 TEST_REQUIRES_X86_AVX2;
13000 for (size_t channels = 1; channels <= 160; channels += 31) {
13001 DWConvMicrokernelTester()
13002 .cr(32)
13003 .kr(25)
13004 .channels(channels)
13005 .width(3)
13006 .input_zero_point(255)
13007 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013008 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070013009 }
13010 }
13011
13012 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, kernel_zero_point_only) {
13013 TEST_REQUIRES_X86_AVX2;
13014 for (size_t channels = 1; channels <= 160; channels += 31) {
13015 DWConvMicrokernelTester()
13016 .cr(32)
13017 .kr(25)
13018 .channels(channels)
13019 .width(3)
13020 .input_zero_point(0)
13021 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080013022 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070013023 }
13024 }
13025
13026 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_offset) {
13027 TEST_REQUIRES_X86_AVX2;
13028 for (uint32_t channels = 64; channels < 512; channels += 96) {
13029 DWConvMicrokernelTester()
13030 .cr(32)
13031 .kr(25)
13032 .channels(channels)
13033 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080013034 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070013035 }
13036 }
13037
13038 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, zero) {
13039 TEST_REQUIRES_X86_AVX2;
13040 for (uint32_t mz = 0; mz < 25; mz++) {
13041 for (uint32_t channels = 64; channels < 512; channels += 96) {
13042 DWConvMicrokernelTester()
13043 .cr(32)
13044 .kr(25)
13045 .channels(channels)
13046 .input_offset(592)
13047 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013048 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan09c312b2021-07-09 00:45:04 -070013049 }
13050 }
13051 }
13052#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhancfd606b2021-07-09 01:18:45 -070013053
13054
13055#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13056 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_eq_16) {
13057 TEST_REQUIRES_X86_AVX512SKX;
13058 DWConvMicrokernelTester()
13059 .cr(16)
13060 .kr(25)
13061 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080013062 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013063 }
13064
13065 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16) {
13066 TEST_REQUIRES_X86_AVX512SKX;
13067 for (uint32_t channels = 32; channels < 256; channels += 48) {
13068 DWConvMicrokernelTester()
13069 .cr(16)
13070 .kr(25)
13071 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013072 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013073 }
13074 }
13075
13076 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
13077 TEST_REQUIRES_X86_AVX512SKX;
13078 for (uint32_t channels = 32; channels < 256; channels += 48) {
13079 DWConvMicrokernelTester()
13080 .cr(16)
13081 .kr(25)
13082 .channels(channels)
13083 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013084 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013085 }
13086 }
13087
13088 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
13089 TEST_REQUIRES_X86_AVX512SKX;
13090 for (uint32_t channels = 32; channels < 256; channels += 48) {
13091 DWConvMicrokernelTester()
13092 .cr(16)
13093 .kr(25)
13094 .channels(channels)
13095 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013096 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013097 }
13098 }
13099
13100 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_lt_16) {
13101 TEST_REQUIRES_X86_AVX512SKX;
13102 for (uint32_t channels = 1; channels < 16; channels++) {
13103 DWConvMicrokernelTester()
13104 .cr(16)
13105 .kr(25)
13106 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013107 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013108 }
13109 }
13110
13111 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16) {
13112 TEST_REQUIRES_X86_AVX512SKX;
13113 for (uint32_t channels = 17; channels < 32; channels++) {
13114 DWConvMicrokernelTester()
13115 .cr(16)
13116 .kr(25)
13117 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013118 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013119 }
13120 }
13121
13122 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
13123 TEST_REQUIRES_X86_AVX512SKX;
13124 for (uint32_t channels = 17; channels < 32; channels++) {
13125 DWConvMicrokernelTester()
13126 .cr(16)
13127 .kr(25)
13128 .channels(channels)
13129 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013130 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013131 }
13132 }
13133
13134 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
13135 TEST_REQUIRES_X86_AVX512SKX;
13136 for (uint32_t channels = 17; channels < 32; channels++) {
13137 DWConvMicrokernelTester()
13138 .cr(16)
13139 .kr(25)
13140 .channels(channels)
13141 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013142 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013143 }
13144 }
13145
13146 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel) {
13147 TEST_REQUIRES_X86_AVX512SKX;
13148 for (size_t channels = 1; channels <= 80; channels += 15) {
13149 DWConvMicrokernelTester()
13150 .cr(16)
13151 .kr(25)
13152 .channels(channels)
13153 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013154 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013155 }
13156 }
13157
13158 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
13159 TEST_REQUIRES_X86_AVX512SKX;
13160 for (size_t channels = 1; channels <= 80; channels += 15) {
13161 for (size_t step = 2; step <= 25; step++) {
13162 DWConvMicrokernelTester()
13163 .cr(16)
13164 .kr(25)
13165 .channels(channels)
13166 .width(3)
13167 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013168 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013169 }
13170 }
13171 }
13172
13173 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
13174 TEST_REQUIRES_X86_AVX512SKX;
13175 for (size_t channels = 1; channels <= 80; channels += 15) {
13176 DWConvMicrokernelTester()
13177 .cr(16)
13178 .kr(25)
13179 .channels(16)
13180 .width(5)
13181 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013182 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013183 }
13184 }
13185
13186 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
13187 TEST_REQUIRES_X86_AVX512SKX;
13188 for (size_t channels = 1; channels <= 80; channels += 15) {
13189 DWConvMicrokernelTester()
13190 .cr(16)
13191 .kr(25)
13192 .channels(channels)
13193 .width(3)
13194 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013195 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013196 }
13197 }
13198
13199 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
13200 TEST_REQUIRES_X86_AVX512SKX;
13201 for (size_t channels = 1; channels <= 80; channels += 15) {
13202 DWConvMicrokernelTester()
13203 .cr(16)
13204 .kr(25)
13205 .channels(channels)
13206 .width(3)
13207 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013208 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013209 }
13210 }
13211
13212 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_zero_point_only) {
13213 TEST_REQUIRES_X86_AVX512SKX;
13214 for (size_t channels = 1; channels <= 80; channels += 15) {
13215 DWConvMicrokernelTester()
13216 .cr(16)
13217 .kr(25)
13218 .channels(channels)
13219 .width(3)
13220 .input_zero_point(255)
13221 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013222 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013223 }
13224 }
13225
13226 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, kernel_zero_point_only) {
13227 TEST_REQUIRES_X86_AVX512SKX;
13228 for (size_t channels = 1; channels <= 80; channels += 15) {
13229 DWConvMicrokernelTester()
13230 .cr(16)
13231 .kr(25)
13232 .channels(channels)
13233 .width(3)
13234 .input_zero_point(0)
13235 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080013236 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013237 }
13238 }
13239
13240 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_offset) {
13241 TEST_REQUIRES_X86_AVX512SKX;
13242 for (uint32_t channels = 32; channels < 256; channels += 48) {
13243 DWConvMicrokernelTester()
13244 .cr(16)
13245 .kr(25)
13246 .channels(channels)
13247 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080013248 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013249 }
13250 }
13251
13252 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, zero) {
13253 TEST_REQUIRES_X86_AVX512SKX;
13254 for (uint32_t mz = 0; mz < 25; mz++) {
13255 for (uint32_t channels = 32; channels < 256; channels += 48) {
13256 DWConvMicrokernelTester()
13257 .cr(16)
13258 .kr(25)
13259 .channels(channels)
13260 .input_offset(304)
13261 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013262 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013263 }
13264 }
13265 }
13266#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13267
13268
13269#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13270 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_eq_32) {
13271 TEST_REQUIRES_X86_AVX512SKX;
13272 DWConvMicrokernelTester()
13273 .cr(32)
13274 .kr(25)
13275 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080013276 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013277 }
13278
13279 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32) {
13280 TEST_REQUIRES_X86_AVX512SKX;
13281 for (uint32_t channels = 64; channels < 512; channels += 96) {
13282 DWConvMicrokernelTester()
13283 .cr(32)
13284 .kr(25)
13285 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013286 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013287 }
13288 }
13289
13290 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
13291 TEST_REQUIRES_X86_AVX512SKX;
13292 for (uint32_t channels = 64; channels < 512; channels += 96) {
13293 DWConvMicrokernelTester()
13294 .cr(32)
13295 .kr(25)
13296 .channels(channels)
13297 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013298 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013299 }
13300 }
13301
13302 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
13303 TEST_REQUIRES_X86_AVX512SKX;
13304 for (uint32_t channels = 64; channels < 512; channels += 96) {
13305 DWConvMicrokernelTester()
13306 .cr(32)
13307 .kr(25)
13308 .channels(channels)
13309 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013310 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013311 }
13312 }
13313
13314 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_lt_32) {
13315 TEST_REQUIRES_X86_AVX512SKX;
13316 for (uint32_t channels = 1; channels < 32; channels++) {
13317 DWConvMicrokernelTester()
13318 .cr(32)
13319 .kr(25)
13320 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013321 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013322 }
13323 }
13324
13325 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32) {
13326 TEST_REQUIRES_X86_AVX512SKX;
13327 for (uint32_t channels = 33; channels < 64; channels++) {
13328 DWConvMicrokernelTester()
13329 .cr(32)
13330 .kr(25)
13331 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013332 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013333 }
13334 }
13335
13336 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
13337 TEST_REQUIRES_X86_AVX512SKX;
13338 for (uint32_t channels = 33; channels < 64; channels++) {
13339 DWConvMicrokernelTester()
13340 .cr(32)
13341 .kr(25)
13342 .channels(channels)
13343 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013344 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013345 }
13346 }
13347
13348 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
13349 TEST_REQUIRES_X86_AVX512SKX;
13350 for (uint32_t channels = 33; channels < 64; channels++) {
13351 DWConvMicrokernelTester()
13352 .cr(32)
13353 .kr(25)
13354 .channels(channels)
13355 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013356 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013357 }
13358 }
13359
13360 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel) {
13361 TEST_REQUIRES_X86_AVX512SKX;
13362 for (size_t channels = 1; channels <= 160; channels += 31) {
13363 DWConvMicrokernelTester()
13364 .cr(32)
13365 .kr(25)
13366 .channels(channels)
13367 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013368 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013369 }
13370 }
13371
13372 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
13373 TEST_REQUIRES_X86_AVX512SKX;
13374 for (size_t channels = 1; channels <= 160; channels += 31) {
13375 for (size_t step = 2; step <= 25; step++) {
13376 DWConvMicrokernelTester()
13377 .cr(32)
13378 .kr(25)
13379 .channels(channels)
13380 .width(3)
13381 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013382 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013383 }
13384 }
13385 }
13386
13387 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
13388 TEST_REQUIRES_X86_AVX512SKX;
13389 for (size_t channels = 1; channels <= 160; channels += 31) {
13390 DWConvMicrokernelTester()
13391 .cr(32)
13392 .kr(25)
13393 .channels(32)
13394 .width(5)
13395 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080013396 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013397 }
13398 }
13399
13400 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
13401 TEST_REQUIRES_X86_AVX512SKX;
13402 for (size_t channels = 1; channels <= 160; channels += 31) {
13403 DWConvMicrokernelTester()
13404 .cr(32)
13405 .kr(25)
13406 .channels(channels)
13407 .width(3)
13408 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013409 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013410 }
13411 }
13412
13413 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
13414 TEST_REQUIRES_X86_AVX512SKX;
13415 for (size_t channels = 1; channels <= 160; channels += 31) {
13416 DWConvMicrokernelTester()
13417 .cr(32)
13418 .kr(25)
13419 .channels(channels)
13420 .width(3)
13421 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013422 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013423 }
13424 }
13425
13426 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_zero_point_only) {
13427 TEST_REQUIRES_X86_AVX512SKX;
13428 for (size_t channels = 1; channels <= 160; channels += 31) {
13429 DWConvMicrokernelTester()
13430 .cr(32)
13431 .kr(25)
13432 .channels(channels)
13433 .width(3)
13434 .input_zero_point(255)
13435 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013436 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013437 }
13438 }
13439
13440 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, kernel_zero_point_only) {
13441 TEST_REQUIRES_X86_AVX512SKX;
13442 for (size_t channels = 1; channels <= 160; channels += 31) {
13443 DWConvMicrokernelTester()
13444 .cr(32)
13445 .kr(25)
13446 .channels(channels)
13447 .width(3)
13448 .input_zero_point(0)
13449 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080013450 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013451 }
13452 }
13453
13454 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_offset) {
13455 TEST_REQUIRES_X86_AVX512SKX;
13456 for (uint32_t channels = 64; channels < 512; channels += 96) {
13457 DWConvMicrokernelTester()
13458 .cr(32)
13459 .kr(25)
13460 .channels(channels)
13461 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080013462 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013463 }
13464 }
13465
13466 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, zero) {
13467 TEST_REQUIRES_X86_AVX512SKX;
13468 for (uint32_t mz = 0; mz < 25; mz++) {
13469 for (uint32_t channels = 64; channels < 512; channels += 96) {
13470 DWConvMicrokernelTester()
13471 .cr(32)
13472 .kr(25)
13473 .channels(channels)
13474 .input_offset(592)
13475 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013476 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhancfd606b2021-07-09 01:18:45 -070013477 }
13478 }
13479 }
13480#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanf6011352021-07-15 15:11:14 -070013481
13482
Marat Dukhan4c617792021-12-21 15:47:58 -080013483#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -070013484 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_eq_8) {
13485 DWConvMicrokernelTester()
13486 .cr(8)
13487 .kr(25)
13488 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013489 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013490 }
13491
13492 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8) {
13493 for (uint32_t channels = 16; channels < 128; channels += 24) {
13494 DWConvMicrokernelTester()
13495 .cr(8)
13496 .kr(25)
13497 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013498 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013499 }
13500 }
13501
13502 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmin) {
13503 for (uint32_t channels = 16; channels < 128; channels += 24) {
13504 DWConvMicrokernelTester()
13505 .cr(8)
13506 .kr(25)
13507 .channels(channels)
13508 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013509 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013510 }
13511 }
13512
13513 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmax) {
13514 for (uint32_t channels = 16; channels < 128; channels += 24) {
13515 DWConvMicrokernelTester()
13516 .cr(8)
13517 .kr(25)
13518 .channels(channels)
13519 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013520 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013521 }
13522 }
13523
13524 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_lt_8) {
13525 for (uint32_t channels = 1; channels < 8; channels++) {
13526 DWConvMicrokernelTester()
13527 .cr(8)
13528 .kr(25)
13529 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013530 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013531 }
13532 }
13533
13534 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8) {
13535 for (uint32_t channels = 9; channels < 16; channels++) {
13536 DWConvMicrokernelTester()
13537 .cr(8)
13538 .kr(25)
13539 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013540 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013541 }
13542 }
13543
13544 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmin) {
13545 for (uint32_t channels = 9; channels < 16; channels++) {
13546 DWConvMicrokernelTester()
13547 .cr(8)
13548 .kr(25)
13549 .channels(channels)
13550 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013551 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013552 }
13553 }
13554
13555 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmax) {
13556 for (uint32_t channels = 9; channels < 16; channels++) {
13557 DWConvMicrokernelTester()
13558 .cr(8)
13559 .kr(25)
13560 .channels(channels)
13561 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013562 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013563 }
13564 }
13565
13566 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel) {
13567 for (size_t channels = 1; channels <= 40; channels += 7) {
13568 DWConvMicrokernelTester()
13569 .cr(8)
13570 .kr(25)
13571 .channels(channels)
13572 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013573 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013574 }
13575 }
13576
13577 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_step) {
13578 for (size_t channels = 1; channels <= 40; channels += 7) {
13579 for (size_t step = 2; step <= 25; step++) {
13580 DWConvMicrokernelTester()
13581 .cr(8)
13582 .kr(25)
13583 .channels(channels)
13584 .width(3)
13585 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013586 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013587 }
13588 }
13589 }
13590
13591 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
13592 for (size_t channels = 1; channels <= 40; channels += 7) {
13593 DWConvMicrokernelTester()
13594 .cr(8)
13595 .kr(25)
13596 .channels(8)
13597 .width(5)
13598 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013599 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013600 }
13601 }
13602
13603 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmin) {
13604 for (size_t channels = 1; channels <= 40; channels += 7) {
13605 DWConvMicrokernelTester()
13606 .cr(8)
13607 .kr(25)
13608 .channels(channels)
13609 .width(3)
13610 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013611 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013612 }
13613 }
13614
13615 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmax) {
13616 for (size_t channels = 1; channels <= 40; channels += 7) {
13617 DWConvMicrokernelTester()
13618 .cr(8)
13619 .kr(25)
13620 .channels(channels)
13621 .width(3)
13622 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013623 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013624 }
13625 }
13626
13627 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, input_zero_point_only) {
13628 for (size_t channels = 1; channels <= 40; channels += 7) {
13629 DWConvMicrokernelTester()
13630 .cr(8)
13631 .kr(25)
13632 .channels(channels)
13633 .width(3)
13634 .input_zero_point(255)
13635 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013636 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013637 }
13638 }
13639
13640 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, kernel_zero_point_only) {
13641 for (size_t channels = 1; channels <= 40; channels += 7) {
13642 DWConvMicrokernelTester()
13643 .cr(8)
13644 .kr(25)
13645 .channels(channels)
13646 .width(3)
13647 .input_zero_point(0)
13648 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080013649 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013650 }
13651 }
13652
13653 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, input_offset) {
13654 for (uint32_t channels = 16; channels < 128; channels += 24) {
13655 DWConvMicrokernelTester()
13656 .cr(8)
13657 .kr(25)
13658 .channels(channels)
13659 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080013660 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013661 }
13662 }
13663
13664 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, zero) {
13665 for (uint32_t mz = 0; mz < 25; mz++) {
13666 for (uint32_t channels = 16; channels < 128; channels += 24) {
13667 DWConvMicrokernelTester()
13668 .cr(8)
13669 .kr(25)
13670 .channels(channels)
13671 .input_offset(176)
13672 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013673 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013674 }
13675 }
13676 }
Marat Dukhan4c617792021-12-21 15:47:58 -080013677#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -070013678
13679
Marat Dukhan4c617792021-12-21 15:47:58 -080013680#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -070013681 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_eq_16) {
13682 DWConvMicrokernelTester()
13683 .cr(16)
13684 .kr(25)
13685 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080013686 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013687 }
13688
13689 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16) {
13690 for (uint32_t channels = 32; channels < 256; channels += 48) {
13691 DWConvMicrokernelTester()
13692 .cr(16)
13693 .kr(25)
13694 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013695 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013696 }
13697 }
13698
13699 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmin) {
13700 for (uint32_t channels = 32; channels < 256; channels += 48) {
13701 DWConvMicrokernelTester()
13702 .cr(16)
13703 .kr(25)
13704 .channels(channels)
13705 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013706 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013707 }
13708 }
13709
13710 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmax) {
13711 for (uint32_t channels = 32; channels < 256; channels += 48) {
13712 DWConvMicrokernelTester()
13713 .cr(16)
13714 .kr(25)
13715 .channels(channels)
13716 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013717 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013718 }
13719 }
13720
13721 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_lt_16) {
13722 for (uint32_t channels = 1; channels < 16; channels++) {
13723 DWConvMicrokernelTester()
13724 .cr(16)
13725 .kr(25)
13726 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013727 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013728 }
13729 }
13730
13731 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16) {
13732 for (uint32_t channels = 17; channels < 32; channels++) {
13733 DWConvMicrokernelTester()
13734 .cr(16)
13735 .kr(25)
13736 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013737 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013738 }
13739 }
13740
13741 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmin) {
13742 for (uint32_t channels = 17; channels < 32; channels++) {
13743 DWConvMicrokernelTester()
13744 .cr(16)
13745 .kr(25)
13746 .channels(channels)
13747 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013748 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013749 }
13750 }
13751
13752 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmax) {
13753 for (uint32_t channels = 17; channels < 32; channels++) {
13754 DWConvMicrokernelTester()
13755 .cr(16)
13756 .kr(25)
13757 .channels(channels)
13758 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013759 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013760 }
13761 }
13762
13763 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel) {
13764 for (size_t channels = 1; channels <= 80; channels += 15) {
13765 DWConvMicrokernelTester()
13766 .cr(16)
13767 .kr(25)
13768 .channels(channels)
13769 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013770 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013771 }
13772 }
13773
13774 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_step) {
13775 for (size_t channels = 1; channels <= 80; channels += 15) {
13776 for (size_t step = 2; step <= 25; step++) {
13777 DWConvMicrokernelTester()
13778 .cr(16)
13779 .kr(25)
13780 .channels(channels)
13781 .width(3)
13782 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013783 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013784 }
13785 }
13786 }
13787
13788 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
13789 for (size_t channels = 1; channels <= 80; channels += 15) {
13790 DWConvMicrokernelTester()
13791 .cr(16)
13792 .kr(25)
13793 .channels(16)
13794 .width(5)
13795 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013796 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013797 }
13798 }
13799
13800 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmin) {
13801 for (size_t channels = 1; channels <= 80; channels += 15) {
13802 DWConvMicrokernelTester()
13803 .cr(16)
13804 .kr(25)
13805 .channels(channels)
13806 .width(3)
13807 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013808 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013809 }
13810 }
13811
13812 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmax) {
13813 for (size_t channels = 1; channels <= 80; channels += 15) {
13814 DWConvMicrokernelTester()
13815 .cr(16)
13816 .kr(25)
13817 .channels(channels)
13818 .width(3)
13819 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013820 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013821 }
13822 }
13823
13824 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, input_zero_point_only) {
13825 for (size_t channels = 1; channels <= 80; channels += 15) {
13826 DWConvMicrokernelTester()
13827 .cr(16)
13828 .kr(25)
13829 .channels(channels)
13830 .width(3)
13831 .input_zero_point(255)
13832 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013833 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013834 }
13835 }
13836
13837 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, kernel_zero_point_only) {
13838 for (size_t channels = 1; channels <= 80; channels += 15) {
13839 DWConvMicrokernelTester()
13840 .cr(16)
13841 .kr(25)
13842 .channels(channels)
13843 .width(3)
13844 .input_zero_point(0)
13845 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080013846 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013847 }
13848 }
13849
13850 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, input_offset) {
13851 for (uint32_t channels = 32; channels < 256; channels += 48) {
13852 DWConvMicrokernelTester()
13853 .cr(16)
13854 .kr(25)
13855 .channels(channels)
13856 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080013857 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013858 }
13859 }
13860
13861 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, zero) {
13862 for (uint32_t mz = 0; mz < 25; mz++) {
13863 for (uint32_t channels = 32; channels < 256; channels += 48) {
13864 DWConvMicrokernelTester()
13865 .cr(16)
13866 .kr(25)
13867 .channels(channels)
13868 .input_offset(304)
13869 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013870 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013871 }
13872 }
13873 }
Marat Dukhan4c617792021-12-21 15:47:58 -080013874#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -070013875
13876
Marat Dukhan4c617792021-12-21 15:47:58 -080013877#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf6011352021-07-15 15:11:14 -070013878 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_eq_24) {
13879 DWConvMicrokernelTester()
13880 .cr(24)
13881 .kr(25)
13882 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080013883 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013884 }
13885
13886 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24) {
13887 for (uint32_t channels = 48; channels < 384; channels += 72) {
13888 DWConvMicrokernelTester()
13889 .cr(24)
13890 .kr(25)
13891 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013892 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013893 }
13894 }
13895
13896 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmin) {
13897 for (uint32_t channels = 48; channels < 384; channels += 72) {
13898 DWConvMicrokernelTester()
13899 .cr(24)
13900 .kr(25)
13901 .channels(channels)
13902 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013903 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013904 }
13905 }
13906
13907 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmax) {
13908 for (uint32_t channels = 48; channels < 384; channels += 72) {
13909 DWConvMicrokernelTester()
13910 .cr(24)
13911 .kr(25)
13912 .channels(channels)
13913 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013914 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013915 }
13916 }
13917
13918 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_lt_24) {
13919 for (uint32_t channels = 1; channels < 24; channels++) {
13920 DWConvMicrokernelTester()
13921 .cr(24)
13922 .kr(25)
13923 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013924 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013925 }
13926 }
13927
13928 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24) {
13929 for (uint32_t channels = 25; channels < 48; channels++) {
13930 DWConvMicrokernelTester()
13931 .cr(24)
13932 .kr(25)
13933 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013934 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013935 }
13936 }
13937
13938 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmin) {
13939 for (uint32_t channels = 25; channels < 48; channels++) {
13940 DWConvMicrokernelTester()
13941 .cr(24)
13942 .kr(25)
13943 .channels(channels)
13944 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013945 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013946 }
13947 }
13948
13949 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmax) {
13950 for (uint32_t channels = 25; channels < 48; channels++) {
13951 DWConvMicrokernelTester()
13952 .cr(24)
13953 .kr(25)
13954 .channels(channels)
13955 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013956 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013957 }
13958 }
13959
13960 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel) {
13961 for (size_t channels = 1; channels <= 120; channels += 23) {
13962 DWConvMicrokernelTester()
13963 .cr(24)
13964 .kr(25)
13965 .channels(channels)
13966 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013967 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013968 }
13969 }
13970
13971 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_step) {
13972 for (size_t channels = 1; channels <= 120; channels += 23) {
13973 for (size_t step = 2; step <= 25; step++) {
13974 DWConvMicrokernelTester()
13975 .cr(24)
13976 .kr(25)
13977 .channels(channels)
13978 .width(3)
13979 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013980 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013981 }
13982 }
13983 }
13984
13985 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
13986 for (size_t channels = 1; channels <= 120; channels += 23) {
13987 DWConvMicrokernelTester()
13988 .cr(24)
13989 .kr(25)
13990 .channels(24)
13991 .width(5)
13992 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080013993 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070013994 }
13995 }
13996
13997 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmin) {
13998 for (size_t channels = 1; channels <= 120; channels += 23) {
13999 DWConvMicrokernelTester()
14000 .cr(24)
14001 .kr(25)
14002 .channels(channels)
14003 .width(3)
14004 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014005 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070014006 }
14007 }
14008
14009 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmax) {
14010 for (size_t channels = 1; channels <= 120; channels += 23) {
14011 DWConvMicrokernelTester()
14012 .cr(24)
14013 .kr(25)
14014 .channels(channels)
14015 .width(3)
14016 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014017 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070014018 }
14019 }
14020
14021 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, input_zero_point_only) {
14022 for (size_t channels = 1; channels <= 120; channels += 23) {
14023 DWConvMicrokernelTester()
14024 .cr(24)
14025 .kr(25)
14026 .channels(channels)
14027 .width(3)
14028 .input_zero_point(255)
14029 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014030 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070014031 }
14032 }
14033
14034 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, kernel_zero_point_only) {
14035 for (size_t channels = 1; channels <= 120; channels += 23) {
14036 DWConvMicrokernelTester()
14037 .cr(24)
14038 .kr(25)
14039 .channels(channels)
14040 .width(3)
14041 .input_zero_point(0)
14042 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080014043 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070014044 }
14045 }
14046
14047 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, input_offset) {
14048 for (uint32_t channels = 48; channels < 384; channels += 72) {
14049 DWConvMicrokernelTester()
14050 .cr(24)
14051 .kr(25)
14052 .channels(channels)
14053 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080014054 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070014055 }
14056 }
14057
14058 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, zero) {
14059 for (uint32_t mz = 0; mz < 25; mz++) {
14060 for (uint32_t channels = 48; channels < 384; channels += 72) {
14061 DWConvMicrokernelTester()
14062 .cr(24)
14063 .kr(25)
14064 .channels(channels)
14065 .input_offset(464)
14066 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014067 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhanf6011352021-07-15 15:11:14 -070014068 }
14069 }
14070 }
Marat Dukhan4c617792021-12-21 15:47:58 -080014071#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1f714282021-07-15 15:41:32 -070014072
14073
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014074#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14075 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_eq_1) {
14076 DWConvMicrokernelTester()
14077 .cr(1)
14078 .kr(25)
14079 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014080 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014081 }
14082
14083 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1) {
14084 for (uint32_t channels = 2; channels < 10; channels++) {
14085 DWConvMicrokernelTester()
14086 .cr(1)
14087 .kr(25)
14088 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014089 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014090 }
14091 }
14092
14093 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmin) {
14094 for (uint32_t channels = 2; channels < 10; channels++) {
14095 DWConvMicrokernelTester()
14096 .cr(1)
14097 .kr(25)
14098 .channels(channels)
14099 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014100 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014101 }
14102 }
14103
14104 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmax) {
14105 for (uint32_t channels = 2; channels < 10; channels++) {
14106 DWConvMicrokernelTester()
14107 .cr(1)
14108 .kr(25)
14109 .channels(channels)
14110 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014111 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014112 }
14113 }
14114
14115 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel) {
14116 for (size_t channels = 1; channels <= 5; channels += 1) {
14117 DWConvMicrokernelTester()
14118 .cr(1)
14119 .kr(25)
14120 .channels(channels)
14121 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014122 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014123 }
14124 }
14125
14126 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_step) {
14127 for (size_t channels = 1; channels <= 5; channels += 1) {
14128 for (size_t step = 2; step <= 25; step++) {
14129 DWConvMicrokernelTester()
14130 .cr(1)
14131 .kr(25)
14132 .channels(channels)
14133 .width(3)
14134 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014135 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014136 }
14137 }
14138 }
14139
14140 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_output_stride) {
14141 for (size_t channels = 1; channels <= 5; channels += 1) {
14142 DWConvMicrokernelTester()
14143 .cr(1)
14144 .kr(25)
14145 .channels(1)
14146 .width(5)
14147 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014148 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014149 }
14150 }
14151
14152 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmin) {
14153 for (size_t channels = 1; channels <= 5; channels += 1) {
14154 DWConvMicrokernelTester()
14155 .cr(1)
14156 .kr(25)
14157 .channels(channels)
14158 .width(3)
14159 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014160 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014161 }
14162 }
14163
14164 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmax) {
14165 for (size_t channels = 1; channels <= 5; channels += 1) {
14166 DWConvMicrokernelTester()
14167 .cr(1)
14168 .kr(25)
14169 .channels(channels)
14170 .width(3)
14171 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014172 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014173 }
14174 }
14175
14176 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, input_zero_point_only) {
14177 for (size_t channels = 1; channels <= 5; channels += 1) {
14178 DWConvMicrokernelTester()
14179 .cr(1)
14180 .kr(25)
14181 .channels(channels)
14182 .width(3)
14183 .input_zero_point(255)
14184 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014185 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014186 }
14187 }
14188
14189 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, kernel_zero_point_only) {
14190 for (size_t channels = 1; channels <= 5; channels += 1) {
14191 DWConvMicrokernelTester()
14192 .cr(1)
14193 .kr(25)
14194 .channels(channels)
14195 .width(3)
14196 .input_zero_point(0)
14197 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080014198 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014199 }
14200 }
14201
14202 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, input_offset) {
14203 for (uint32_t channels = 2; channels < 16; channels += 3) {
14204 DWConvMicrokernelTester()
14205 .cr(1)
14206 .kr(25)
14207 .channels(channels)
14208 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080014209 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014210 }
14211 }
14212
14213 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, zero) {
14214 for (uint32_t mz = 0; mz < 25; mz++) {
14215 for (uint32_t channels = 2; channels < 16; channels += 3) {
14216 DWConvMicrokernelTester()
14217 .cr(1)
14218 .kr(25)
14219 .channels(channels)
14220 .input_offset(48)
14221 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014222 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014223 }
14224 }
14225 }
14226#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14227
14228
14229#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14230 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_eq_2) {
14231 DWConvMicrokernelTester()
14232 .cr(2)
14233 .kr(25)
14234 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080014235 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014236 }
14237
14238 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2) {
14239 for (uint32_t channels = 4; channels < 32; channels += 6) {
14240 DWConvMicrokernelTester()
14241 .cr(2)
14242 .kr(25)
14243 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014244 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014245 }
14246 }
14247
14248 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmin) {
14249 for (uint32_t channels = 4; channels < 32; channels += 6) {
14250 DWConvMicrokernelTester()
14251 .cr(2)
14252 .kr(25)
14253 .channels(channels)
14254 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014255 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014256 }
14257 }
14258
14259 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmax) {
14260 for (uint32_t channels = 4; channels < 32; channels += 6) {
14261 DWConvMicrokernelTester()
14262 .cr(2)
14263 .kr(25)
14264 .channels(channels)
14265 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014266 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014267 }
14268 }
14269
14270 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_lt_2) {
14271 for (uint32_t channels = 1; channels < 2; channels++) {
14272 DWConvMicrokernelTester()
14273 .cr(2)
14274 .kr(25)
14275 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014276 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014277 }
14278 }
14279
14280 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2) {
14281 for (uint32_t channels = 3; channels < 4; channels++) {
14282 DWConvMicrokernelTester()
14283 .cr(2)
14284 .kr(25)
14285 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014286 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014287 }
14288 }
14289
14290 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmin) {
14291 for (uint32_t channels = 3; channels < 4; channels++) {
14292 DWConvMicrokernelTester()
14293 .cr(2)
14294 .kr(25)
14295 .channels(channels)
14296 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014297 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014298 }
14299 }
14300
14301 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmax) {
14302 for (uint32_t channels = 3; channels < 4; channels++) {
14303 DWConvMicrokernelTester()
14304 .cr(2)
14305 .kr(25)
14306 .channels(channels)
14307 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014308 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014309 }
14310 }
14311
14312 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel) {
14313 for (size_t channels = 1; channels <= 10; channels += 1) {
14314 DWConvMicrokernelTester()
14315 .cr(2)
14316 .kr(25)
14317 .channels(channels)
14318 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014319 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014320 }
14321 }
14322
14323 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_step) {
14324 for (size_t channels = 1; channels <= 10; channels += 1) {
14325 for (size_t step = 2; step <= 25; step++) {
14326 DWConvMicrokernelTester()
14327 .cr(2)
14328 .kr(25)
14329 .channels(channels)
14330 .width(3)
14331 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014332 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014333 }
14334 }
14335 }
14336
14337 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_output_stride) {
14338 for (size_t channels = 1; channels <= 10; channels += 1) {
14339 DWConvMicrokernelTester()
14340 .cr(2)
14341 .kr(25)
14342 .channels(2)
14343 .width(5)
14344 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080014345 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014346 }
14347 }
14348
14349 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmin) {
14350 for (size_t channels = 1; channels <= 10; channels += 1) {
14351 DWConvMicrokernelTester()
14352 .cr(2)
14353 .kr(25)
14354 .channels(channels)
14355 .width(3)
14356 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014357 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014358 }
14359 }
14360
14361 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmax) {
14362 for (size_t channels = 1; channels <= 10; channels += 1) {
14363 DWConvMicrokernelTester()
14364 .cr(2)
14365 .kr(25)
14366 .channels(channels)
14367 .width(3)
14368 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014369 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014370 }
14371 }
14372
14373 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, input_zero_point_only) {
14374 for (size_t channels = 1; channels <= 10; channels += 1) {
14375 DWConvMicrokernelTester()
14376 .cr(2)
14377 .kr(25)
14378 .channels(channels)
14379 .width(3)
14380 .input_zero_point(255)
14381 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014382 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014383 }
14384 }
14385
14386 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, kernel_zero_point_only) {
14387 for (size_t channels = 1; channels <= 10; channels += 1) {
14388 DWConvMicrokernelTester()
14389 .cr(2)
14390 .kr(25)
14391 .channels(channels)
14392 .width(3)
14393 .input_zero_point(0)
14394 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080014395 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014396 }
14397 }
14398
14399 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, input_offset) {
14400 for (uint32_t channels = 4; channels < 32; channels += 6) {
14401 DWConvMicrokernelTester()
14402 .cr(2)
14403 .kr(25)
14404 .channels(channels)
14405 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080014406 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014407 }
14408 }
14409
14410 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, zero) {
14411 for (uint32_t mz = 0; mz < 25; mz++) {
14412 for (uint32_t channels = 4; channels < 32; channels += 6) {
14413 DWConvMicrokernelTester()
14414 .cr(2)
14415 .kr(25)
14416 .channels(channels)
14417 .input_offset(80)
14418 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014419 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014420 }
14421 }
14422 }
14423#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14424
14425
14426#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14427 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_eq_4) {
14428 DWConvMicrokernelTester()
14429 .cr(4)
14430 .kr(25)
14431 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080014432 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014433 }
14434
14435 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4) {
14436 for (uint32_t channels = 8; channels < 64; channels += 12) {
14437 DWConvMicrokernelTester()
14438 .cr(4)
14439 .kr(25)
14440 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014441 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014442 }
14443 }
14444
14445 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmin) {
14446 for (uint32_t channels = 8; channels < 64; channels += 12) {
14447 DWConvMicrokernelTester()
14448 .cr(4)
14449 .kr(25)
14450 .channels(channels)
14451 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014452 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014453 }
14454 }
14455
14456 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmax) {
14457 for (uint32_t channels = 8; channels < 64; channels += 12) {
14458 DWConvMicrokernelTester()
14459 .cr(4)
14460 .kr(25)
14461 .channels(channels)
14462 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014463 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014464 }
14465 }
14466
14467 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_lt_4) {
14468 for (uint32_t channels = 1; channels < 4; channels++) {
14469 DWConvMicrokernelTester()
14470 .cr(4)
14471 .kr(25)
14472 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014473 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014474 }
14475 }
14476
14477 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4) {
14478 for (uint32_t channels = 5; channels < 8; channels++) {
14479 DWConvMicrokernelTester()
14480 .cr(4)
14481 .kr(25)
14482 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014483 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014484 }
14485 }
14486
14487 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmin) {
14488 for (uint32_t channels = 5; channels < 8; channels++) {
14489 DWConvMicrokernelTester()
14490 .cr(4)
14491 .kr(25)
14492 .channels(channels)
14493 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014494 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014495 }
14496 }
14497
14498 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmax) {
14499 for (uint32_t channels = 5; channels < 8; channels++) {
14500 DWConvMicrokernelTester()
14501 .cr(4)
14502 .kr(25)
14503 .channels(channels)
14504 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014505 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014506 }
14507 }
14508
14509 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel) {
14510 for (size_t channels = 1; channels <= 20; channels += 3) {
14511 DWConvMicrokernelTester()
14512 .cr(4)
14513 .kr(25)
14514 .channels(channels)
14515 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014516 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014517 }
14518 }
14519
14520 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_step) {
14521 for (size_t channels = 1; channels <= 20; channels += 3) {
14522 for (size_t step = 2; step <= 25; step++) {
14523 DWConvMicrokernelTester()
14524 .cr(4)
14525 .kr(25)
14526 .channels(channels)
14527 .width(3)
14528 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014529 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014530 }
14531 }
14532 }
14533
14534 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_output_stride) {
14535 for (size_t channels = 1; channels <= 20; channels += 3) {
14536 DWConvMicrokernelTester()
14537 .cr(4)
14538 .kr(25)
14539 .channels(4)
14540 .width(5)
14541 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080014542 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014543 }
14544 }
14545
14546 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmin) {
14547 for (size_t channels = 1; channels <= 20; channels += 3) {
14548 DWConvMicrokernelTester()
14549 .cr(4)
14550 .kr(25)
14551 .channels(channels)
14552 .width(3)
14553 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014554 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014555 }
14556 }
14557
14558 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmax) {
14559 for (size_t channels = 1; channels <= 20; channels += 3) {
14560 DWConvMicrokernelTester()
14561 .cr(4)
14562 .kr(25)
14563 .channels(channels)
14564 .width(3)
14565 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014566 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014567 }
14568 }
14569
14570 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, input_zero_point_only) {
14571 for (size_t channels = 1; channels <= 20; channels += 3) {
14572 DWConvMicrokernelTester()
14573 .cr(4)
14574 .kr(25)
14575 .channels(channels)
14576 .width(3)
14577 .input_zero_point(255)
14578 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014579 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014580 }
14581 }
14582
14583 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, kernel_zero_point_only) {
14584 for (size_t channels = 1; channels <= 20; channels += 3) {
14585 DWConvMicrokernelTester()
14586 .cr(4)
14587 .kr(25)
14588 .channels(channels)
14589 .width(3)
14590 .input_zero_point(0)
14591 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080014592 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014593 }
14594 }
14595
14596 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, input_offset) {
14597 for (uint32_t channels = 8; channels < 64; channels += 12) {
14598 DWConvMicrokernelTester()
14599 .cr(4)
14600 .kr(25)
14601 .channels(channels)
14602 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080014603 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014604 }
14605 }
14606
14607 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, zero) {
14608 for (uint32_t mz = 0; mz < 25; mz++) {
14609 for (uint32_t channels = 8; channels < 64; channels += 12) {
14610 DWConvMicrokernelTester()
14611 .cr(4)
14612 .kr(25)
14613 .channels(channels)
14614 .input_offset(112)
14615 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014616 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080014617 }
14618 }
14619 }
14620#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
14621
14622
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014623TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014624 DWConvMicrokernelTester()
14625 .cr(1)
14626 .kr(25)
14627 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014628 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014629}
14630
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014631TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014632 for (uint32_t channels = 2; channels < 10; channels++) {
14633 DWConvMicrokernelTester()
14634 .cr(1)
14635 .kr(25)
14636 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014637 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014638 }
14639}
14640
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014641TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014642 for (uint32_t channels = 2; channels < 10; channels++) {
14643 DWConvMicrokernelTester()
14644 .cr(1)
14645 .kr(25)
14646 .channels(channels)
14647 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014648 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014649 }
14650}
14651
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014652TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014653 for (uint32_t channels = 2; channels < 10; channels++) {
14654 DWConvMicrokernelTester()
14655 .cr(1)
14656 .kr(25)
14657 .channels(channels)
14658 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014659 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014660 }
14661}
14662
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014663TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014664 for (size_t channels = 1; channels <= 5; channels += 1) {
14665 DWConvMicrokernelTester()
14666 .cr(1)
14667 .kr(25)
14668 .channels(channels)
14669 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014670 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014671 }
14672}
14673
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014674TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014675 for (size_t channels = 1; channels <= 5; channels += 1) {
14676 for (size_t step = 2; step <= 25; step++) {
14677 DWConvMicrokernelTester()
14678 .cr(1)
14679 .kr(25)
14680 .channels(channels)
14681 .width(3)
14682 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014683 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014684 }
14685 }
14686}
14687
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014688TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014689 for (size_t channels = 1; channels <= 5; channels += 1) {
14690 DWConvMicrokernelTester()
14691 .cr(1)
14692 .kr(25)
14693 .channels(1)
14694 .width(5)
14695 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014696 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014697 }
14698}
14699
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014700TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014701 for (size_t channels = 1; channels <= 5; channels += 1) {
14702 DWConvMicrokernelTester()
14703 .cr(1)
14704 .kr(25)
14705 .channels(channels)
14706 .width(3)
14707 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014708 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014709 }
14710}
14711
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014712TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014713 for (size_t channels = 1; channels <= 5; channels += 1) {
14714 DWConvMicrokernelTester()
14715 .cr(1)
14716 .kr(25)
14717 .channels(channels)
14718 .width(3)
14719 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014720 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014721 }
14722}
14723
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014724TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, input_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014725 for (size_t channels = 1; channels <= 5; channels += 1) {
14726 DWConvMicrokernelTester()
14727 .cr(1)
14728 .kr(25)
14729 .channels(channels)
14730 .width(3)
14731 .input_zero_point(255)
14732 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014733 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014734 }
14735}
14736
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014737TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, kernel_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014738 for (size_t channels = 1; channels <= 5; channels += 1) {
14739 DWConvMicrokernelTester()
14740 .cr(1)
14741 .kr(25)
14742 .channels(channels)
14743 .width(3)
14744 .input_zero_point(0)
14745 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080014746 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014747 }
14748}
14749
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014750TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, input_offset) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014751 for (uint32_t channels = 2; channels < 16; channels += 3) {
14752 DWConvMicrokernelTester()
14753 .cr(1)
14754 .kr(25)
14755 .channels(channels)
14756 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080014757 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014758 }
14759}
14760
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014761TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, zero) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014762 for (uint32_t mz = 0; mz < 25; mz++) {
14763 for (uint32_t channels = 2; channels < 16; channels += 3) {
14764 DWConvMicrokernelTester()
14765 .cr(1)
14766 .kr(25)
14767 .channels(channels)
14768 .input_offset(48)
14769 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014770 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014771 }
14772 }
14773}
14774
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014775TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_eq_2) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014776 DWConvMicrokernelTester()
14777 .cr(2)
14778 .kr(25)
14779 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080014780 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014781}
14782
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014783TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014784 for (uint32_t channels = 4; channels < 32; channels += 6) {
14785 DWConvMicrokernelTester()
14786 .cr(2)
14787 .kr(25)
14788 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014789 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014790 }
14791}
14792
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014793TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014794 for (uint32_t channels = 4; channels < 32; channels += 6) {
14795 DWConvMicrokernelTester()
14796 .cr(2)
14797 .kr(25)
14798 .channels(channels)
14799 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014800 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014801 }
14802}
14803
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014804TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014805 for (uint32_t channels = 4; channels < 32; channels += 6) {
14806 DWConvMicrokernelTester()
14807 .cr(2)
14808 .kr(25)
14809 .channels(channels)
14810 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014811 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014812 }
14813}
14814
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014815TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_lt_2) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014816 for (uint32_t channels = 1; channels < 2; channels++) {
14817 DWConvMicrokernelTester()
14818 .cr(2)
14819 .kr(25)
14820 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014821 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014822 }
14823}
14824
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014825TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014826 for (uint32_t channels = 3; channels < 4; channels++) {
14827 DWConvMicrokernelTester()
14828 .cr(2)
14829 .kr(25)
14830 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014831 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014832 }
14833}
14834
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014835TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014836 for (uint32_t channels = 3; channels < 4; channels++) {
14837 DWConvMicrokernelTester()
14838 .cr(2)
14839 .kr(25)
14840 .channels(channels)
14841 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014842 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014843 }
14844}
14845
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014846TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014847 for (uint32_t channels = 3; channels < 4; channels++) {
14848 DWConvMicrokernelTester()
14849 .cr(2)
14850 .kr(25)
14851 .channels(channels)
14852 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014853 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014854 }
14855}
14856
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014857TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014858 for (size_t channels = 1; channels <= 10; channels += 1) {
14859 DWConvMicrokernelTester()
14860 .cr(2)
14861 .kr(25)
14862 .channels(channels)
14863 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014864 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014865 }
14866}
14867
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014868TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014869 for (size_t channels = 1; channels <= 10; channels += 1) {
14870 for (size_t step = 2; step <= 25; step++) {
14871 DWConvMicrokernelTester()
14872 .cr(2)
14873 .kr(25)
14874 .channels(channels)
14875 .width(3)
14876 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014877 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014878 }
14879 }
14880}
14881
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014882TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014883 for (size_t channels = 1; channels <= 10; channels += 1) {
14884 DWConvMicrokernelTester()
14885 .cr(2)
14886 .kr(25)
14887 .channels(2)
14888 .width(5)
14889 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080014890 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014891 }
14892}
14893
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014894TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014895 for (size_t channels = 1; channels <= 10; channels += 1) {
14896 DWConvMicrokernelTester()
14897 .cr(2)
14898 .kr(25)
14899 .channels(channels)
14900 .width(3)
14901 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014902 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014903 }
14904}
14905
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014906TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014907 for (size_t channels = 1; channels <= 10; channels += 1) {
14908 DWConvMicrokernelTester()
14909 .cr(2)
14910 .kr(25)
14911 .channels(channels)
14912 .width(3)
14913 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014914 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014915 }
14916}
14917
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014918TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, input_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014919 for (size_t channels = 1; channels <= 10; channels += 1) {
14920 DWConvMicrokernelTester()
14921 .cr(2)
14922 .kr(25)
14923 .channels(channels)
14924 .width(3)
14925 .input_zero_point(255)
14926 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014927 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014928 }
14929}
14930
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014931TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, kernel_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014932 for (size_t channels = 1; channels <= 10; channels += 1) {
14933 DWConvMicrokernelTester()
14934 .cr(2)
14935 .kr(25)
14936 .channels(channels)
14937 .width(3)
14938 .input_zero_point(0)
14939 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080014940 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014941 }
14942}
14943
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014944TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, input_offset) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014945 for (uint32_t channels = 4; channels < 32; channels += 6) {
14946 DWConvMicrokernelTester()
14947 .cr(2)
14948 .kr(25)
14949 .channels(channels)
14950 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080014951 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014952 }
14953}
14954
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014955TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, zero) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014956 for (uint32_t mz = 0; mz < 25; mz++) {
14957 for (uint32_t channels = 4; channels < 32; channels += 6) {
14958 DWConvMicrokernelTester()
14959 .cr(2)
14960 .kr(25)
14961 .channels(channels)
14962 .input_offset(80)
14963 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014964 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014965 }
14966 }
14967}
14968
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014969TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_eq_4) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014970 DWConvMicrokernelTester()
14971 .cr(4)
14972 .kr(25)
14973 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080014974 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014975}
14976
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014977TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014978 for (uint32_t channels = 8; channels < 64; channels += 12) {
14979 DWConvMicrokernelTester()
14980 .cr(4)
14981 .kr(25)
14982 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014983 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014984 }
14985}
14986
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014987TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014988 for (uint32_t channels = 8; channels < 64; channels += 12) {
14989 DWConvMicrokernelTester()
14990 .cr(4)
14991 .kr(25)
14992 .channels(channels)
14993 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014994 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070014995 }
14996}
14997
Marat Dukhan2ac722e2022-01-04 01:54:20 -080014998TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -070014999 for (uint32_t channels = 8; channels < 64; channels += 12) {
15000 DWConvMicrokernelTester()
15001 .cr(4)
15002 .kr(25)
15003 .channels(channels)
15004 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015005 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015006 }
15007}
15008
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015009TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_lt_4) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015010 for (uint32_t channels = 1; channels < 4; channels++) {
15011 DWConvMicrokernelTester()
15012 .cr(4)
15013 .kr(25)
15014 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015015 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015016 }
15017}
15018
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015019TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015020 for (uint32_t channels = 5; channels < 8; channels++) {
15021 DWConvMicrokernelTester()
15022 .cr(4)
15023 .kr(25)
15024 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015025 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015026 }
15027}
15028
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015029TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015030 for (uint32_t channels = 5; channels < 8; channels++) {
15031 DWConvMicrokernelTester()
15032 .cr(4)
15033 .kr(25)
15034 .channels(channels)
15035 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015036 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015037 }
15038}
15039
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015040TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015041 for (uint32_t channels = 5; channels < 8; channels++) {
15042 DWConvMicrokernelTester()
15043 .cr(4)
15044 .kr(25)
15045 .channels(channels)
15046 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015047 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015048 }
15049}
15050
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015051TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015052 for (size_t channels = 1; channels <= 20; channels += 3) {
15053 DWConvMicrokernelTester()
15054 .cr(4)
15055 .kr(25)
15056 .channels(channels)
15057 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015058 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015059 }
15060}
15061
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015062TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015063 for (size_t channels = 1; channels <= 20; channels += 3) {
15064 for (size_t step = 2; step <= 25; step++) {
15065 DWConvMicrokernelTester()
15066 .cr(4)
15067 .kr(25)
15068 .channels(channels)
15069 .width(3)
15070 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015071 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015072 }
15073 }
15074}
15075
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015076TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015077 for (size_t channels = 1; channels <= 20; channels += 3) {
15078 DWConvMicrokernelTester()
15079 .cr(4)
15080 .kr(25)
15081 .channels(4)
15082 .width(5)
15083 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080015084 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015085 }
15086}
15087
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015088TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015089 for (size_t channels = 1; channels <= 20; channels += 3) {
15090 DWConvMicrokernelTester()
15091 .cr(4)
15092 .kr(25)
15093 .channels(channels)
15094 .width(3)
15095 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015096 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015097 }
15098}
15099
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015100TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015101 for (size_t channels = 1; channels <= 20; channels += 3) {
15102 DWConvMicrokernelTester()
15103 .cr(4)
15104 .kr(25)
15105 .channels(channels)
15106 .width(3)
15107 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015108 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015109 }
15110}
15111
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015112TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, input_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015113 for (size_t channels = 1; channels <= 20; channels += 3) {
15114 DWConvMicrokernelTester()
15115 .cr(4)
15116 .kr(25)
15117 .channels(channels)
15118 .width(3)
15119 .input_zero_point(255)
15120 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015121 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015122 }
15123}
15124
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015125TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, kernel_zero_point_only) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015126 for (size_t channels = 1; channels <= 20; channels += 3) {
15127 DWConvMicrokernelTester()
15128 .cr(4)
15129 .kr(25)
15130 .channels(channels)
15131 .width(3)
15132 .input_zero_point(0)
15133 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080015134 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015135 }
15136}
15137
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015138TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, input_offset) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015139 for (uint32_t channels = 8; channels < 64; channels += 12) {
15140 DWConvMicrokernelTester()
15141 .cr(4)
15142 .kr(25)
15143 .channels(channels)
15144 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080015145 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015146 }
15147}
15148
Marat Dukhan2ac722e2022-01-04 01:54:20 -080015149TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, zero) {
Marat Dukhan1f714282021-07-15 15:41:32 -070015150 for (uint32_t mz = 0; mz < 25; mz++) {
15151 for (uint32_t channels = 8; channels < 64; channels += 12) {
15152 DWConvMicrokernelTester()
15153 .cr(4)
15154 .kr(25)
15155 .channels(channels)
15156 .input_offset(112)
15157 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015158 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan1f714282021-07-15 15:41:32 -070015159 }
15160 }
Marat Dukhan272d4d92022-01-04 15:07:14 -080015161}
15162
15163TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
15164 DWConvMicrokernelTester()
15165 .cr(1)
15166 .kr(25)
15167 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015168 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015169}
15170
15171TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
15172 for (uint32_t channels = 2; channels < 10; channels++) {
15173 DWConvMicrokernelTester()
15174 .cr(1)
15175 .kr(25)
15176 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015177 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015178 }
15179}
15180
15181TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
15182 for (uint32_t channels = 2; channels < 10; channels++) {
15183 DWConvMicrokernelTester()
15184 .cr(1)
15185 .kr(25)
15186 .channels(channels)
15187 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015188 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015189 }
15190}
15191
15192TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
15193 for (uint32_t channels = 2; channels < 10; channels++) {
15194 DWConvMicrokernelTester()
15195 .cr(1)
15196 .kr(25)
15197 .channels(channels)
15198 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015199 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015200 }
15201}
15202
15203TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
15204 for (size_t channels = 1; channels <= 5; channels += 1) {
15205 DWConvMicrokernelTester()
15206 .cr(1)
15207 .kr(25)
15208 .channels(channels)
15209 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015210 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015211 }
15212}
15213
15214TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
15215 for (size_t channels = 1; channels <= 5; channels += 1) {
15216 for (size_t step = 2; step <= 25; step++) {
15217 DWConvMicrokernelTester()
15218 .cr(1)
15219 .kr(25)
15220 .channels(channels)
15221 .width(3)
15222 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015223 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015224 }
15225 }
15226}
15227
15228TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
15229 for (size_t channels = 1; channels <= 5; channels += 1) {
15230 DWConvMicrokernelTester()
15231 .cr(1)
15232 .kr(25)
15233 .channels(1)
15234 .width(5)
15235 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015236 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015237 }
15238}
15239
15240TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
15241 for (size_t channels = 1; channels <= 5; channels += 1) {
15242 DWConvMicrokernelTester()
15243 .cr(1)
15244 .kr(25)
15245 .channels(channels)
15246 .width(3)
15247 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015248 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015249 }
15250}
15251
15252TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
15253 for (size_t channels = 1; channels <= 5; channels += 1) {
15254 DWConvMicrokernelTester()
15255 .cr(1)
15256 .kr(25)
15257 .channels(channels)
15258 .width(3)
15259 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015260 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015261 }
15262}
15263
15264TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_zero_point_only) {
15265 for (size_t channels = 1; channels <= 5; channels += 1) {
15266 DWConvMicrokernelTester()
15267 .cr(1)
15268 .kr(25)
15269 .channels(channels)
15270 .width(3)
15271 .input_zero_point(255)
15272 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015273 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015274 }
15275}
15276
15277TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, kernel_zero_point_only) {
15278 for (size_t channels = 1; channels <= 5; channels += 1) {
15279 DWConvMicrokernelTester()
15280 .cr(1)
15281 .kr(25)
15282 .channels(channels)
15283 .width(3)
15284 .input_zero_point(0)
15285 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080015286 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015287 }
15288}
15289
15290TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
15291 for (uint32_t channels = 2; channels < 16; channels += 3) {
15292 DWConvMicrokernelTester()
15293 .cr(1)
15294 .kr(25)
15295 .channels(channels)
15296 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080015297 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015298 }
15299}
15300
15301TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
15302 for (uint32_t mz = 0; mz < 25; mz++) {
15303 for (uint32_t channels = 2; channels < 16; channels += 3) {
15304 DWConvMicrokernelTester()
15305 .cr(1)
15306 .kr(25)
15307 .channels(channels)
15308 .input_offset(48)
15309 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015310 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015311 }
15312 }
15313}
15314
15315TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
15316 DWConvMicrokernelTester()
15317 .cr(2)
15318 .kr(25)
15319 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080015320 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015321}
15322
15323TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
15324 for (uint32_t channels = 4; channels < 32; channels += 6) {
15325 DWConvMicrokernelTester()
15326 .cr(2)
15327 .kr(25)
15328 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015329 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015330 }
15331}
15332
15333TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
15334 for (uint32_t channels = 4; channels < 32; channels += 6) {
15335 DWConvMicrokernelTester()
15336 .cr(2)
15337 .kr(25)
15338 .channels(channels)
15339 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015340 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015341 }
15342}
15343
15344TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
15345 for (uint32_t channels = 4; channels < 32; channels += 6) {
15346 DWConvMicrokernelTester()
15347 .cr(2)
15348 .kr(25)
15349 .channels(channels)
15350 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015351 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015352 }
15353}
15354
15355TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
15356 for (uint32_t channels = 1; channels < 2; channels++) {
15357 DWConvMicrokernelTester()
15358 .cr(2)
15359 .kr(25)
15360 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015361 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015362 }
15363}
15364
15365TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
15366 for (uint32_t channels = 3; channels < 4; channels++) {
15367 DWConvMicrokernelTester()
15368 .cr(2)
15369 .kr(25)
15370 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015371 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015372 }
15373}
15374
15375TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
15376 for (uint32_t channels = 3; channels < 4; channels++) {
15377 DWConvMicrokernelTester()
15378 .cr(2)
15379 .kr(25)
15380 .channels(channels)
15381 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015382 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015383 }
15384}
15385
15386TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
15387 for (uint32_t channels = 3; channels < 4; channels++) {
15388 DWConvMicrokernelTester()
15389 .cr(2)
15390 .kr(25)
15391 .channels(channels)
15392 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015393 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015394 }
15395}
15396
15397TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
15398 for (size_t channels = 1; channels <= 10; channels += 1) {
15399 DWConvMicrokernelTester()
15400 .cr(2)
15401 .kr(25)
15402 .channels(channels)
15403 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015404 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015405 }
15406}
15407
15408TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
15409 for (size_t channels = 1; channels <= 10; channels += 1) {
15410 for (size_t step = 2; step <= 25; step++) {
15411 DWConvMicrokernelTester()
15412 .cr(2)
15413 .kr(25)
15414 .channels(channels)
15415 .width(3)
15416 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015417 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015418 }
15419 }
15420}
15421
15422TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
15423 for (size_t channels = 1; channels <= 10; channels += 1) {
15424 DWConvMicrokernelTester()
15425 .cr(2)
15426 .kr(25)
15427 .channels(2)
15428 .width(5)
15429 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080015430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015431 }
15432}
15433
15434TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
15435 for (size_t channels = 1; channels <= 10; channels += 1) {
15436 DWConvMicrokernelTester()
15437 .cr(2)
15438 .kr(25)
15439 .channels(channels)
15440 .width(3)
15441 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015442 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015443 }
15444}
15445
15446TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
15447 for (size_t channels = 1; channels <= 10; channels += 1) {
15448 DWConvMicrokernelTester()
15449 .cr(2)
15450 .kr(25)
15451 .channels(channels)
15452 .width(3)
15453 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015454 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015455 }
15456}
15457
15458TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_zero_point_only) {
15459 for (size_t channels = 1; channels <= 10; channels += 1) {
15460 DWConvMicrokernelTester()
15461 .cr(2)
15462 .kr(25)
15463 .channels(channels)
15464 .width(3)
15465 .input_zero_point(255)
15466 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015467 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015468 }
15469}
15470
15471TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, kernel_zero_point_only) {
15472 for (size_t channels = 1; channels <= 10; channels += 1) {
15473 DWConvMicrokernelTester()
15474 .cr(2)
15475 .kr(25)
15476 .channels(channels)
15477 .width(3)
15478 .input_zero_point(0)
15479 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080015480 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015481 }
15482}
15483
15484TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
15485 for (uint32_t channels = 4; channels < 32; channels += 6) {
15486 DWConvMicrokernelTester()
15487 .cr(2)
15488 .kr(25)
15489 .channels(channels)
15490 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080015491 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015492 }
15493}
15494
15495TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
15496 for (uint32_t mz = 0; mz < 25; mz++) {
15497 for (uint32_t channels = 4; channels < 32; channels += 6) {
15498 DWConvMicrokernelTester()
15499 .cr(2)
15500 .kr(25)
15501 .channels(channels)
15502 .input_offset(80)
15503 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015504 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015505 }
15506 }
15507}
15508
15509TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
15510 DWConvMicrokernelTester()
15511 .cr(4)
15512 .kr(25)
15513 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080015514 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015515}
15516
15517TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
15518 for (uint32_t channels = 8; channels < 64; channels += 12) {
15519 DWConvMicrokernelTester()
15520 .cr(4)
15521 .kr(25)
15522 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015523 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015524 }
15525}
15526
15527TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
15528 for (uint32_t channels = 8; channels < 64; channels += 12) {
15529 DWConvMicrokernelTester()
15530 .cr(4)
15531 .kr(25)
15532 .channels(channels)
15533 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015534 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015535 }
15536}
15537
15538TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
15539 for (uint32_t channels = 8; channels < 64; channels += 12) {
15540 DWConvMicrokernelTester()
15541 .cr(4)
15542 .kr(25)
15543 .channels(channels)
15544 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015545 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015546 }
15547}
15548
15549TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
15550 for (uint32_t channels = 1; channels < 4; channels++) {
15551 DWConvMicrokernelTester()
15552 .cr(4)
15553 .kr(25)
15554 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015555 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015556 }
15557}
15558
15559TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
15560 for (uint32_t channels = 5; channels < 8; channels++) {
15561 DWConvMicrokernelTester()
15562 .cr(4)
15563 .kr(25)
15564 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015565 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015566 }
15567}
15568
15569TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
15570 for (uint32_t channels = 5; channels < 8; channels++) {
15571 DWConvMicrokernelTester()
15572 .cr(4)
15573 .kr(25)
15574 .channels(channels)
15575 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015576 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015577 }
15578}
15579
15580TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
15581 for (uint32_t channels = 5; channels < 8; channels++) {
15582 DWConvMicrokernelTester()
15583 .cr(4)
15584 .kr(25)
15585 .channels(channels)
15586 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015587 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015588 }
15589}
15590
15591TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
15592 for (size_t channels = 1; channels <= 20; channels += 3) {
15593 DWConvMicrokernelTester()
15594 .cr(4)
15595 .kr(25)
15596 .channels(channels)
15597 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015598 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015599 }
15600}
15601
15602TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
15603 for (size_t channels = 1; channels <= 20; channels += 3) {
15604 for (size_t step = 2; step <= 25; step++) {
15605 DWConvMicrokernelTester()
15606 .cr(4)
15607 .kr(25)
15608 .channels(channels)
15609 .width(3)
15610 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015611 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015612 }
15613 }
15614}
15615
15616TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
15617 for (size_t channels = 1; channels <= 20; channels += 3) {
15618 DWConvMicrokernelTester()
15619 .cr(4)
15620 .kr(25)
15621 .channels(4)
15622 .width(5)
15623 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080015624 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015625 }
15626}
15627
15628TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
15629 for (size_t channels = 1; channels <= 20; channels += 3) {
15630 DWConvMicrokernelTester()
15631 .cr(4)
15632 .kr(25)
15633 .channels(channels)
15634 .width(3)
15635 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015636 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015637 }
15638}
15639
15640TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
15641 for (size_t channels = 1; channels <= 20; channels += 3) {
15642 DWConvMicrokernelTester()
15643 .cr(4)
15644 .kr(25)
15645 .channels(channels)
15646 .width(3)
15647 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015648 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015649 }
15650}
15651
15652TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_zero_point_only) {
15653 for (size_t channels = 1; channels <= 20; channels += 3) {
15654 DWConvMicrokernelTester()
15655 .cr(4)
15656 .kr(25)
15657 .channels(channels)
15658 .width(3)
15659 .input_zero_point(255)
15660 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015661 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015662 }
15663}
15664
15665TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, kernel_zero_point_only) {
15666 for (size_t channels = 1; channels <= 20; channels += 3) {
15667 DWConvMicrokernelTester()
15668 .cr(4)
15669 .kr(25)
15670 .channels(channels)
15671 .width(3)
15672 .input_zero_point(0)
15673 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080015674 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015675 }
15676}
15677
15678TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
15679 for (uint32_t channels = 8; channels < 64; channels += 12) {
15680 DWConvMicrokernelTester()
15681 .cr(4)
15682 .kr(25)
15683 .channels(channels)
15684 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080015685 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015686 }
15687}
15688
15689TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
15690 for (uint32_t mz = 0; mz < 25; mz++) {
15691 for (uint32_t channels = 8; channels < 64; channels += 12) {
15692 DWConvMicrokernelTester()
15693 .cr(4)
15694 .kr(25)
15695 .channels(channels)
15696 .input_offset(112)
15697 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015698 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015699 }
15700 }
15701}
15702
15703TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
15704 DWConvMicrokernelTester()
15705 .cr(1)
15706 .kr(25)
15707 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015708 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015709}
15710
15711TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
15712 for (uint32_t channels = 2; channels < 10; channels++) {
15713 DWConvMicrokernelTester()
15714 .cr(1)
15715 .kr(25)
15716 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015717 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015718 }
15719}
15720
15721TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
15722 for (uint32_t channels = 2; channels < 10; channels++) {
15723 DWConvMicrokernelTester()
15724 .cr(1)
15725 .kr(25)
15726 .channels(channels)
15727 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015728 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015729 }
15730}
15731
15732TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
15733 for (uint32_t channels = 2; channels < 10; channels++) {
15734 DWConvMicrokernelTester()
15735 .cr(1)
15736 .kr(25)
15737 .channels(channels)
15738 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015739 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015740 }
15741}
15742
15743TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
15744 for (size_t channels = 1; channels <= 5; channels += 1) {
15745 DWConvMicrokernelTester()
15746 .cr(1)
15747 .kr(25)
15748 .channels(channels)
15749 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015750 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015751 }
15752}
15753
15754TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
15755 for (size_t channels = 1; channels <= 5; channels += 1) {
15756 for (size_t step = 2; step <= 25; step++) {
15757 DWConvMicrokernelTester()
15758 .cr(1)
15759 .kr(25)
15760 .channels(channels)
15761 .width(3)
15762 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015763 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015764 }
15765 }
15766}
15767
15768TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
15769 for (size_t channels = 1; channels <= 5; channels += 1) {
15770 DWConvMicrokernelTester()
15771 .cr(1)
15772 .kr(25)
15773 .channels(1)
15774 .width(5)
15775 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015776 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015777 }
15778}
15779
15780TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
15781 for (size_t channels = 1; channels <= 5; channels += 1) {
15782 DWConvMicrokernelTester()
15783 .cr(1)
15784 .kr(25)
15785 .channels(channels)
15786 .width(3)
15787 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015788 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015789 }
15790}
15791
15792TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
15793 for (size_t channels = 1; channels <= 5; channels += 1) {
15794 DWConvMicrokernelTester()
15795 .cr(1)
15796 .kr(25)
15797 .channels(channels)
15798 .width(3)
15799 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015800 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015801 }
15802}
15803
15804TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_zero_point_only) {
15805 for (size_t channels = 1; channels <= 5; channels += 1) {
15806 DWConvMicrokernelTester()
15807 .cr(1)
15808 .kr(25)
15809 .channels(channels)
15810 .width(3)
15811 .input_zero_point(255)
15812 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015813 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015814 }
15815}
15816
15817TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, kernel_zero_point_only) {
15818 for (size_t channels = 1; channels <= 5; channels += 1) {
15819 DWConvMicrokernelTester()
15820 .cr(1)
15821 .kr(25)
15822 .channels(channels)
15823 .width(3)
15824 .input_zero_point(0)
15825 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080015826 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015827 }
15828}
15829
15830TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
15831 for (uint32_t channels = 2; channels < 16; channels += 3) {
15832 DWConvMicrokernelTester()
15833 .cr(1)
15834 .kr(25)
15835 .channels(channels)
15836 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080015837 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015838 }
15839}
15840
15841TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
15842 for (uint32_t mz = 0; mz < 25; mz++) {
15843 for (uint32_t channels = 2; channels < 16; channels += 3) {
15844 DWConvMicrokernelTester()
15845 .cr(1)
15846 .kr(25)
15847 .channels(channels)
15848 .input_offset(48)
15849 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015850 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015851 }
15852 }
15853}
15854
15855TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
15856 DWConvMicrokernelTester()
15857 .cr(2)
15858 .kr(25)
15859 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080015860 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015861}
15862
15863TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
15864 for (uint32_t channels = 4; channels < 32; channels += 6) {
15865 DWConvMicrokernelTester()
15866 .cr(2)
15867 .kr(25)
15868 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015869 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015870 }
15871}
15872
15873TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
15874 for (uint32_t channels = 4; channels < 32; channels += 6) {
15875 DWConvMicrokernelTester()
15876 .cr(2)
15877 .kr(25)
15878 .channels(channels)
15879 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015880 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015881 }
15882}
15883
15884TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
15885 for (uint32_t channels = 4; channels < 32; channels += 6) {
15886 DWConvMicrokernelTester()
15887 .cr(2)
15888 .kr(25)
15889 .channels(channels)
15890 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015891 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015892 }
15893}
15894
15895TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
15896 for (uint32_t channels = 1; channels < 2; channels++) {
15897 DWConvMicrokernelTester()
15898 .cr(2)
15899 .kr(25)
15900 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015901 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015902 }
15903}
15904
15905TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
15906 for (uint32_t channels = 3; channels < 4; channels++) {
15907 DWConvMicrokernelTester()
15908 .cr(2)
15909 .kr(25)
15910 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015911 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015912 }
15913}
15914
15915TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
15916 for (uint32_t channels = 3; channels < 4; channels++) {
15917 DWConvMicrokernelTester()
15918 .cr(2)
15919 .kr(25)
15920 .channels(channels)
15921 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015922 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015923 }
15924}
15925
15926TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
15927 for (uint32_t channels = 3; channels < 4; channels++) {
15928 DWConvMicrokernelTester()
15929 .cr(2)
15930 .kr(25)
15931 .channels(channels)
15932 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015933 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015934 }
15935}
15936
15937TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
15938 for (size_t channels = 1; channels <= 10; channels += 1) {
15939 DWConvMicrokernelTester()
15940 .cr(2)
15941 .kr(25)
15942 .channels(channels)
15943 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015944 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015945 }
15946}
15947
15948TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
15949 for (size_t channels = 1; channels <= 10; channels += 1) {
15950 for (size_t step = 2; step <= 25; step++) {
15951 DWConvMicrokernelTester()
15952 .cr(2)
15953 .kr(25)
15954 .channels(channels)
15955 .width(3)
15956 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015957 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015958 }
15959 }
15960}
15961
15962TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
15963 for (size_t channels = 1; channels <= 10; channels += 1) {
15964 DWConvMicrokernelTester()
15965 .cr(2)
15966 .kr(25)
15967 .channels(2)
15968 .width(5)
15969 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080015970 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015971 }
15972}
15973
15974TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
15975 for (size_t channels = 1; channels <= 10; channels += 1) {
15976 DWConvMicrokernelTester()
15977 .cr(2)
15978 .kr(25)
15979 .channels(channels)
15980 .width(3)
15981 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015982 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015983 }
15984}
15985
15986TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
15987 for (size_t channels = 1; channels <= 10; channels += 1) {
15988 DWConvMicrokernelTester()
15989 .cr(2)
15990 .kr(25)
15991 .channels(channels)
15992 .width(3)
15993 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015994 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080015995 }
15996}
15997
15998TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_zero_point_only) {
15999 for (size_t channels = 1; channels <= 10; channels += 1) {
16000 DWConvMicrokernelTester()
16001 .cr(2)
16002 .kr(25)
16003 .channels(channels)
16004 .width(3)
16005 .input_zero_point(255)
16006 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016007 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016008 }
16009}
16010
16011TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, kernel_zero_point_only) {
16012 for (size_t channels = 1; channels <= 10; channels += 1) {
16013 DWConvMicrokernelTester()
16014 .cr(2)
16015 .kr(25)
16016 .channels(channels)
16017 .width(3)
16018 .input_zero_point(0)
16019 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080016020 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016021 }
16022}
16023
16024TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
16025 for (uint32_t channels = 4; channels < 32; channels += 6) {
16026 DWConvMicrokernelTester()
16027 .cr(2)
16028 .kr(25)
16029 .channels(channels)
16030 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080016031 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016032 }
16033}
16034
16035TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
16036 for (uint32_t mz = 0; mz < 25; mz++) {
16037 for (uint32_t channels = 4; channels < 32; channels += 6) {
16038 DWConvMicrokernelTester()
16039 .cr(2)
16040 .kr(25)
16041 .channels(channels)
16042 .input_offset(80)
16043 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016044 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016045 }
16046 }
16047}
16048
16049TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
16050 DWConvMicrokernelTester()
16051 .cr(4)
16052 .kr(25)
16053 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080016054 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016055}
16056
16057TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
16058 for (uint32_t channels = 8; channels < 64; channels += 12) {
16059 DWConvMicrokernelTester()
16060 .cr(4)
16061 .kr(25)
16062 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016063 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016064 }
16065}
16066
16067TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
16068 for (uint32_t channels = 8; channels < 64; channels += 12) {
16069 DWConvMicrokernelTester()
16070 .cr(4)
16071 .kr(25)
16072 .channels(channels)
16073 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016074 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016075 }
16076}
16077
16078TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
16079 for (uint32_t channels = 8; channels < 64; channels += 12) {
16080 DWConvMicrokernelTester()
16081 .cr(4)
16082 .kr(25)
16083 .channels(channels)
16084 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016085 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016086 }
16087}
16088
16089TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
16090 for (uint32_t channels = 1; channels < 4; channels++) {
16091 DWConvMicrokernelTester()
16092 .cr(4)
16093 .kr(25)
16094 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016095 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016096 }
16097}
16098
16099TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
16100 for (uint32_t channels = 5; channels < 8; channels++) {
16101 DWConvMicrokernelTester()
16102 .cr(4)
16103 .kr(25)
16104 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016105 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016106 }
16107}
16108
16109TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
16110 for (uint32_t channels = 5; channels < 8; channels++) {
16111 DWConvMicrokernelTester()
16112 .cr(4)
16113 .kr(25)
16114 .channels(channels)
16115 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016116 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016117 }
16118}
16119
16120TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
16121 for (uint32_t channels = 5; channels < 8; channels++) {
16122 DWConvMicrokernelTester()
16123 .cr(4)
16124 .kr(25)
16125 .channels(channels)
16126 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016127 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016128 }
16129}
16130
16131TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
16132 for (size_t channels = 1; channels <= 20; channels += 3) {
16133 DWConvMicrokernelTester()
16134 .cr(4)
16135 .kr(25)
16136 .channels(channels)
16137 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016138 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016139 }
16140}
16141
16142TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
16143 for (size_t channels = 1; channels <= 20; channels += 3) {
16144 for (size_t step = 2; step <= 25; step++) {
16145 DWConvMicrokernelTester()
16146 .cr(4)
16147 .kr(25)
16148 .channels(channels)
16149 .width(3)
16150 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016151 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016152 }
16153 }
16154}
16155
16156TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
16157 for (size_t channels = 1; channels <= 20; channels += 3) {
16158 DWConvMicrokernelTester()
16159 .cr(4)
16160 .kr(25)
16161 .channels(4)
16162 .width(5)
16163 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080016164 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016165 }
16166}
16167
16168TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
16169 for (size_t channels = 1; channels <= 20; channels += 3) {
16170 DWConvMicrokernelTester()
16171 .cr(4)
16172 .kr(25)
16173 .channels(channels)
16174 .width(3)
16175 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016177 }
16178}
16179
16180TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
16181 for (size_t channels = 1; channels <= 20; channels += 3) {
16182 DWConvMicrokernelTester()
16183 .cr(4)
16184 .kr(25)
16185 .channels(channels)
16186 .width(3)
16187 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016188 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016189 }
16190}
16191
16192TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_zero_point_only) {
16193 for (size_t channels = 1; channels <= 20; channels += 3) {
16194 DWConvMicrokernelTester()
16195 .cr(4)
16196 .kr(25)
16197 .channels(channels)
16198 .width(3)
16199 .input_zero_point(255)
16200 .kernel_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016201 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016202 }
16203}
16204
16205TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, kernel_zero_point_only) {
16206 for (size_t channels = 1; channels <= 20; channels += 3) {
16207 DWConvMicrokernelTester()
16208 .cr(4)
16209 .kr(25)
16210 .channels(channels)
16211 .width(3)
16212 .input_zero_point(0)
16213 .kernel_zero_point(255)
Marat Dukhan50323b82022-01-11 00:12:01 -080016214 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016215 }
16216}
16217
16218TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
16219 for (uint32_t channels = 8; channels < 64; channels += 12) {
16220 DWConvMicrokernelTester()
16221 .cr(4)
16222 .kr(25)
16223 .channels(channels)
16224 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080016225 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016226 }
16227}
16228
16229TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
16230 for (uint32_t mz = 0; mz < 25; mz++) {
16231 for (uint32_t channels = 8; channels < 64; channels += 12) {
16232 DWConvMicrokernelTester()
16233 .cr(4)
16234 .kr(25)
16235 .channels(channels)
16236 .input_offset(112)
16237 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016238 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080016239 }
16240 }
Marat Dukhan1f714282021-07-15 15:41:32 -070016241}