blob: be75088ea148be3400876e19aee4c23158b4ebe5 [file] [log] [blame]
Marat Dukhan82286892021-06-04 17:27:27 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qc8-dwconv-minmax-fp32.yaml
11// Generator: tools/generate-dwconv-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/dwconv.h>
20#include "dwconv-microkernel-tester.h"
21
22
Marat Dukhan59af5812021-06-29 18:09:57 -070023#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -070024 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_eq_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070025 TEST_REQUIRES_ARM_NEON;
26 DWConvMicrokernelTester()
27 .cr(8)
28 .kr(9)
29 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080030 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070031 }
32
Marat Dukhan5f2939f2021-07-23 13:38:32 -070033 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_div_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070034 TEST_REQUIRES_ARM_NEON;
35 for (uint32_t channels = 16; channels < 128; channels += 24) {
36 DWConvMicrokernelTester()
37 .cr(8)
38 .kr(9)
39 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080040 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070041 }
42 }
43
Marat Dukhan5f2939f2021-07-23 13:38:32 -070044 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070045 TEST_REQUIRES_ARM_NEON;
46 for (uint32_t channels = 16; channels < 128; channels += 24) {
47 DWConvMicrokernelTester()
48 .cr(8)
49 .kr(9)
50 .channels(channels)
51 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080052 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070053 }
54 }
55
Marat Dukhan5f2939f2021-07-23 13:38:32 -070056 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070057 TEST_REQUIRES_ARM_NEON;
58 for (uint32_t channels = 16; channels < 128; channels += 24) {
59 DWConvMicrokernelTester()
60 .cr(8)
61 .kr(9)
62 .channels(channels)
63 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080064 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070065 }
66 }
67
Marat Dukhan5f2939f2021-07-23 13:38:32 -070068 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_lt_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070069 TEST_REQUIRES_ARM_NEON;
70 for (uint32_t channels = 1; channels < 8; channels++) {
71 DWConvMicrokernelTester()
72 .cr(8)
73 .kr(9)
74 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080075 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070076 }
77 }
78
Marat Dukhan5f2939f2021-07-23 13:38:32 -070079 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_gt_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070080 TEST_REQUIRES_ARM_NEON;
81 for (uint32_t channels = 9; channels < 16; channels++) {
82 DWConvMicrokernelTester()
83 .cr(8)
84 .kr(9)
85 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080086 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070087 }
88 }
89
Marat Dukhan5f2939f2021-07-23 13:38:32 -070090 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070091 TEST_REQUIRES_ARM_NEON;
92 for (uint32_t channels = 9; channels < 16; channels++) {
93 DWConvMicrokernelTester()
94 .cr(8)
95 .kr(9)
96 .channels(channels)
97 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070099 }
100 }
101
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700102 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t channels = 9; channels < 16; channels++) {
105 DWConvMicrokernelTester()
106 .cr(8)
107 .kr(9)
108 .channels(channels)
109 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800110 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700111 }
112 }
113
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700114 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700115 TEST_REQUIRES_ARM_NEON;
116 for (size_t channels = 1; channels <= 40; channels += 7) {
117 DWConvMicrokernelTester()
118 .cr(8)
119 .kr(9)
120 .channels(channels)
121 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800122 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700123 }
124 }
125
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700126 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_step) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700127 TEST_REQUIRES_ARM_NEON;
128 for (size_t channels = 1; channels <= 40; channels += 7) {
129 for (size_t step = 2; step <= 9; step++) {
130 DWConvMicrokernelTester()
131 .cr(8)
132 .kr(9)
133 .channels(channels)
134 .width(3)
135 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800136 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700137 }
138 }
139 }
140
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700141 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_output_stride) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700142 TEST_REQUIRES_ARM_NEON;
143 for (size_t channels = 1; channels <= 40; channels += 7) {
144 DWConvMicrokernelTester()
145 .cr(8)
146 .kr(9)
147 .channels(8)
148 .width(5)
149 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -0800150 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700151 }
152 }
153
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700154 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700155 TEST_REQUIRES_ARM_NEON;
156 for (size_t channels = 1; channels <= 40; channels += 7) {
157 DWConvMicrokernelTester()
158 .cr(8)
159 .kr(9)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800163 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700164 }
165 }
166
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700167 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700168 TEST_REQUIRES_ARM_NEON;
169 for (size_t channels = 1; channels <= 40; channels += 7) {
170 DWConvMicrokernelTester()
171 .cr(8)
172 .kr(9)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800176 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700177 }
178 }
179
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700180 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, input_offset) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700181 TEST_REQUIRES_ARM_NEON;
182 for (uint32_t channels = 16; channels < 128; channels += 24) {
183 DWConvMicrokernelTester()
184 .cr(8)
185 .kr(9)
186 .channels(channels)
187 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -0800188 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700189 }
190 }
191
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700192 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, zero) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700193 TEST_REQUIRES_ARM_NEON;
194 for (uint32_t mz = 0; mz < 9; mz++) {
195 for (uint32_t channels = 16; channels < 128; channels += 24) {
196 DWConvMicrokernelTester()
197 .cr(8)
198 .kr(9)
199 .channels(channels)
200 .input_offset(176)
201 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700203 }
204 }
205 }
206#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
207
208
209#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700210 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_eq_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700211 TEST_REQUIRES_ARM_NEON;
212 DWConvMicrokernelTester()
213 .cr(16)
214 .kr(9)
215 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800216 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700217 }
218
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700219 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_div_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700220 TEST_REQUIRES_ARM_NEON;
221 for (uint32_t channels = 32; channels < 256; channels += 48) {
222 DWConvMicrokernelTester()
223 .cr(16)
224 .kr(9)
225 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800226 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700227 }
228 }
229
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700230 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700231 TEST_REQUIRES_ARM_NEON;
232 for (uint32_t channels = 32; channels < 256; channels += 48) {
233 DWConvMicrokernelTester()
234 .cr(16)
235 .kr(9)
236 .channels(channels)
237 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800238 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700239 }
240 }
241
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700242 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700243 TEST_REQUIRES_ARM_NEON;
244 for (uint32_t channels = 32; channels < 256; channels += 48) {
245 DWConvMicrokernelTester()
246 .cr(16)
247 .kr(9)
248 .channels(channels)
249 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800250 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700251 }
252 }
253
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700254 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_lt_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700255 TEST_REQUIRES_ARM_NEON;
256 for (uint32_t channels = 1; channels < 16; channels++) {
257 DWConvMicrokernelTester()
258 .cr(16)
259 .kr(9)
260 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800261 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700262 }
263 }
264
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700265 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_gt_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700266 TEST_REQUIRES_ARM_NEON;
267 for (uint32_t channels = 17; channels < 32; channels++) {
268 DWConvMicrokernelTester()
269 .cr(16)
270 .kr(9)
271 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800272 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700273 }
274 }
275
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700276 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700277 TEST_REQUIRES_ARM_NEON;
278 for (uint32_t channels = 17; channels < 32; channels++) {
279 DWConvMicrokernelTester()
280 .cr(16)
281 .kr(9)
282 .channels(channels)
283 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800284 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700285 }
286 }
287
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700288 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700289 TEST_REQUIRES_ARM_NEON;
290 for (uint32_t channels = 17; channels < 32; channels++) {
291 DWConvMicrokernelTester()
292 .cr(16)
293 .kr(9)
294 .channels(channels)
295 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800296 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700297 }
298 }
299
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700300 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700301 TEST_REQUIRES_ARM_NEON;
302 for (size_t channels = 1; channels <= 80; channels += 15) {
303 DWConvMicrokernelTester()
304 .cr(16)
305 .kr(9)
306 .channels(channels)
307 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800308 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700309 }
310 }
311
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700312 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_step) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700313 TEST_REQUIRES_ARM_NEON;
314 for (size_t channels = 1; channels <= 80; channels += 15) {
315 for (size_t step = 2; step <= 9; step++) {
316 DWConvMicrokernelTester()
317 .cr(16)
318 .kr(9)
319 .channels(channels)
320 .width(3)
321 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800322 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700323 }
324 }
325 }
326
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700327 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_output_stride) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700328 TEST_REQUIRES_ARM_NEON;
329 for (size_t channels = 1; channels <= 80; channels += 15) {
330 DWConvMicrokernelTester()
331 .cr(16)
332 .kr(9)
333 .channels(16)
334 .width(5)
335 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800336 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700337 }
338 }
339
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700340 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700341 TEST_REQUIRES_ARM_NEON;
342 for (size_t channels = 1; channels <= 80; channels += 15) {
343 DWConvMicrokernelTester()
344 .cr(16)
345 .kr(9)
346 .channels(channels)
347 .width(3)
348 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800349 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700350 }
351 }
352
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700353 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700354 TEST_REQUIRES_ARM_NEON;
355 for (size_t channels = 1; channels <= 80; channels += 15) {
356 DWConvMicrokernelTester()
357 .cr(16)
358 .kr(9)
359 .channels(channels)
360 .width(3)
361 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800362 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700363 }
364 }
365
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700366 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, input_offset) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700367 TEST_REQUIRES_ARM_NEON;
368 for (uint32_t channels = 32; channels < 256; channels += 48) {
369 DWConvMicrokernelTester()
370 .cr(16)
371 .kr(9)
372 .channels(channels)
373 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -0800374 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700375 }
376 }
377
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700378 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, zero) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700379 TEST_REQUIRES_ARM_NEON;
380 for (uint32_t mz = 0; mz < 9; mz++) {
381 for (uint32_t channels = 32; channels < 256; channels += 48) {
382 DWConvMicrokernelTester()
383 .cr(16)
384 .kr(9)
385 .channels(channels)
386 .input_offset(304)
387 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700389 }
390 }
391 }
392#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
393
394
395#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700396 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_eq_16) {
397 TEST_REQUIRES_ARM_NEON;
398 DWConvMicrokernelTester()
399 .cr(16)
400 .kr(9)
401 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800402 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700403 }
404
405 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_div_16) {
406 TEST_REQUIRES_ARM_NEON;
407 for (uint32_t channels = 32; channels < 256; channels += 48) {
408 DWConvMicrokernelTester()
409 .cr(16)
410 .kr(9)
411 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800412 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700413 }
414 }
415
416 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmin) {
417 TEST_REQUIRES_ARM_NEON;
418 for (uint32_t channels = 32; channels < 256; channels += 48) {
419 DWConvMicrokernelTester()
420 .cr(16)
421 .kr(9)
422 .channels(channels)
423 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800424 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700425 }
426 }
427
428 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmax) {
429 TEST_REQUIRES_ARM_NEON;
430 for (uint32_t channels = 32; channels < 256; channels += 48) {
431 DWConvMicrokernelTester()
432 .cr(16)
433 .kr(9)
434 .channels(channels)
435 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800436 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700437 }
438 }
439
440 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_lt_16) {
441 TEST_REQUIRES_ARM_NEON;
442 for (uint32_t channels = 1; channels < 16; channels++) {
443 DWConvMicrokernelTester()
444 .cr(16)
445 .kr(9)
446 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800447 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700448 }
449 }
450
451 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_gt_16) {
452 TEST_REQUIRES_ARM_NEON;
453 for (uint32_t channels = 17; channels < 32; channels++) {
454 DWConvMicrokernelTester()
455 .cr(16)
456 .kr(9)
457 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700459 }
460 }
461
462 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmin) {
463 TEST_REQUIRES_ARM_NEON;
464 for (uint32_t channels = 17; channels < 32; channels++) {
465 DWConvMicrokernelTester()
466 .cr(16)
467 .kr(9)
468 .channels(channels)
469 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700471 }
472 }
473
474 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmax) {
475 TEST_REQUIRES_ARM_NEON;
476 for (uint32_t channels = 17; channels < 32; channels++) {
477 DWConvMicrokernelTester()
478 .cr(16)
479 .kr(9)
480 .channels(channels)
481 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800482 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700483 }
484 }
485
486 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel) {
487 TEST_REQUIRES_ARM_NEON;
488 for (size_t channels = 1; channels <= 80; channels += 15) {
489 DWConvMicrokernelTester()
490 .cr(16)
491 .kr(9)
492 .channels(channels)
493 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800494 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700495 }
496 }
497
498 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_step) {
499 TEST_REQUIRES_ARM_NEON;
500 for (size_t channels = 1; channels <= 80; channels += 15) {
501 for (size_t step = 2; step <= 9; step++) {
502 DWConvMicrokernelTester()
503 .cr(16)
504 .kr(9)
505 .channels(channels)
506 .width(3)
507 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800508 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700509 }
510 }
511 }
512
513 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_output_stride) {
514 TEST_REQUIRES_ARM_NEON;
515 for (size_t channels = 1; channels <= 80; channels += 15) {
516 DWConvMicrokernelTester()
517 .cr(16)
518 .kr(9)
519 .channels(16)
520 .width(5)
521 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800522 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700523 }
524 }
525
526 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_qmin) {
527 TEST_REQUIRES_ARM_NEON;
528 for (size_t channels = 1; channels <= 80; channels += 15) {
529 DWConvMicrokernelTester()
530 .cr(16)
531 .kr(9)
532 .channels(channels)
533 .width(3)
534 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800535 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700536 }
537 }
538
539 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_qmax) {
540 TEST_REQUIRES_ARM_NEON;
541 for (size_t channels = 1; channels <= 80; channels += 15) {
542 DWConvMicrokernelTester()
543 .cr(16)
544 .kr(9)
545 .channels(channels)
546 .width(3)
547 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800548 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700549 }
550 }
551
552 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, input_offset) {
553 TEST_REQUIRES_ARM_NEON;
554 for (uint32_t channels = 32; channels < 256; channels += 48) {
555 DWConvMicrokernelTester()
556 .cr(16)
557 .kr(9)
558 .channels(channels)
559 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -0800560 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700561 }
562 }
563
564 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, zero) {
565 TEST_REQUIRES_ARM_NEON;
566 for (uint32_t mz = 0; mz < 9; mz++) {
567 for (uint32_t channels = 32; channels < 256; channels += 48) {
568 DWConvMicrokernelTester()
569 .cr(16)
570 .kr(9)
571 .channels(channels)
572 .input_offset(304)
573 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700575 }
576 }
577 }
578#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
579
580
581#if XNN_ARCH_ARM || XNN_ARCH_ARM64
582 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_eq_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700583 TEST_REQUIRES_ARM_NEON_V8;
584 DWConvMicrokernelTester()
585 .cr(8)
586 .kr(9)
587 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -0800588 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700589 }
590
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700591 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_div_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700592 TEST_REQUIRES_ARM_NEON_V8;
593 for (uint32_t channels = 16; channels < 128; channels += 24) {
594 DWConvMicrokernelTester()
595 .cr(8)
596 .kr(9)
597 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700599 }
600 }
601
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700602 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_div_8_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700603 TEST_REQUIRES_ARM_NEON_V8;
604 for (uint32_t channels = 16; channels < 128; channels += 24) {
605 DWConvMicrokernelTester()
606 .cr(8)
607 .kr(9)
608 .channels(channels)
609 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800610 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700611 }
612 }
613
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700614 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_div_8_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700615 TEST_REQUIRES_ARM_NEON_V8;
616 for (uint32_t channels = 16; channels < 128; channels += 24) {
617 DWConvMicrokernelTester()
618 .cr(8)
619 .kr(9)
620 .channels(channels)
621 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800622 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700623 }
624 }
625
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700626 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_lt_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700627 TEST_REQUIRES_ARM_NEON_V8;
628 for (uint32_t channels = 1; channels < 8; channels++) {
629 DWConvMicrokernelTester()
630 .cr(8)
631 .kr(9)
632 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800633 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700634 }
635 }
636
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700637 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_gt_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700638 TEST_REQUIRES_ARM_NEON_V8;
639 for (uint32_t channels = 9; channels < 16; channels++) {
640 DWConvMicrokernelTester()
641 .cr(8)
642 .kr(9)
643 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700645 }
646 }
647
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700648 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_gt_8_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700649 TEST_REQUIRES_ARM_NEON_V8;
650 for (uint32_t channels = 9; channels < 16; channels++) {
651 DWConvMicrokernelTester()
652 .cr(8)
653 .kr(9)
654 .channels(channels)
655 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700657 }
658 }
659
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700660 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_gt_8_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700661 TEST_REQUIRES_ARM_NEON_V8;
662 for (uint32_t channels = 9; channels < 16; channels++) {
663 DWConvMicrokernelTester()
664 .cr(8)
665 .kr(9)
666 .channels(channels)
667 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800668 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700669 }
670 }
671
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700672 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700673 TEST_REQUIRES_ARM_NEON_V8;
674 for (size_t channels = 1; channels <= 40; channels += 7) {
675 DWConvMicrokernelTester()
676 .cr(8)
677 .kr(9)
678 .channels(channels)
679 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800680 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700681 }
682 }
683
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700684 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_step) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700685 TEST_REQUIRES_ARM_NEON_V8;
686 for (size_t channels = 1; channels <= 40; channels += 7) {
687 for (size_t step = 2; step <= 9; step++) {
688 DWConvMicrokernelTester()
689 .cr(8)
690 .kr(9)
691 .channels(channels)
692 .width(3)
693 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800694 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700695 }
696 }
697 }
698
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700699 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700700 TEST_REQUIRES_ARM_NEON_V8;
701 for (size_t channels = 1; channels <= 40; channels += 7) {
702 DWConvMicrokernelTester()
703 .cr(8)
704 .kr(9)
705 .channels(8)
706 .width(5)
707 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -0800708 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700709 }
710 }
711
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700712 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700713 TEST_REQUIRES_ARM_NEON_V8;
714 for (size_t channels = 1; channels <= 40; channels += 7) {
715 DWConvMicrokernelTester()
716 .cr(8)
717 .kr(9)
718 .channels(channels)
719 .width(3)
720 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800721 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700722 }
723 }
724
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700725 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700726 TEST_REQUIRES_ARM_NEON_V8;
727 for (size_t channels = 1; channels <= 40; channels += 7) {
728 DWConvMicrokernelTester()
729 .cr(8)
730 .kr(9)
731 .channels(channels)
732 .width(3)
733 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800734 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700735 }
736 }
737
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700738 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, input_offset) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700739 TEST_REQUIRES_ARM_NEON_V8;
740 for (uint32_t channels = 16; channels < 128; channels += 24) {
741 DWConvMicrokernelTester()
742 .cr(8)
743 .kr(9)
744 .channels(channels)
745 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -0800746 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700747 }
748 }
749
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700750 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, zero) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700751 TEST_REQUIRES_ARM_NEON_V8;
752 for (uint32_t mz = 0; mz < 9; mz++) {
753 for (uint32_t channels = 16; channels < 128; channels += 24) {
754 DWConvMicrokernelTester()
755 .cr(8)
756 .kr(9)
757 .channels(channels)
758 .input_offset(176)
759 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700761 }
762 }
763 }
764#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
765
766
767#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700768 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_eq_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700769 TEST_REQUIRES_ARM_NEON_V8;
770 DWConvMicrokernelTester()
771 .cr(16)
772 .kr(9)
773 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800774 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700775 }
776
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700777 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_div_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700778 TEST_REQUIRES_ARM_NEON_V8;
779 for (uint32_t channels = 32; channels < 256; channels += 48) {
780 DWConvMicrokernelTester()
781 .cr(16)
782 .kr(9)
783 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800784 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700785 }
786 }
787
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700788 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_div_16_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700789 TEST_REQUIRES_ARM_NEON_V8;
790 for (uint32_t channels = 32; channels < 256; channels += 48) {
791 DWConvMicrokernelTester()
792 .cr(16)
793 .kr(9)
794 .channels(channels)
795 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800796 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700797 }
798 }
799
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700800 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_div_16_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700801 TEST_REQUIRES_ARM_NEON_V8;
802 for (uint32_t channels = 32; channels < 256; channels += 48) {
803 DWConvMicrokernelTester()
804 .cr(16)
805 .kr(9)
806 .channels(channels)
807 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800808 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700809 }
810 }
811
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700812 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_lt_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700813 TEST_REQUIRES_ARM_NEON_V8;
814 for (uint32_t channels = 1; channels < 16; channels++) {
815 DWConvMicrokernelTester()
816 .cr(16)
817 .kr(9)
818 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800819 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700820 }
821 }
822
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700823 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_gt_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700824 TEST_REQUIRES_ARM_NEON_V8;
825 for (uint32_t channels = 17; channels < 32; channels++) {
826 DWConvMicrokernelTester()
827 .cr(16)
828 .kr(9)
829 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700831 }
832 }
833
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700834 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_gt_16_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700835 TEST_REQUIRES_ARM_NEON_V8;
836 for (uint32_t channels = 17; channels < 32; channels++) {
837 DWConvMicrokernelTester()
838 .cr(16)
839 .kr(9)
840 .channels(channels)
841 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800842 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700843 }
844 }
845
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700846 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_gt_16_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700847 TEST_REQUIRES_ARM_NEON_V8;
848 for (uint32_t channels = 17; channels < 32; channels++) {
849 DWConvMicrokernelTester()
850 .cr(16)
851 .kr(9)
852 .channels(channels)
853 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800854 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700855 }
856 }
857
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700858 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700859 TEST_REQUIRES_ARM_NEON_V8;
860 for (size_t channels = 1; channels <= 80; channels += 15) {
861 DWConvMicrokernelTester()
862 .cr(16)
863 .kr(9)
864 .channels(channels)
865 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800866 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700867 }
868 }
869
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700870 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_step) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700871 TEST_REQUIRES_ARM_NEON_V8;
872 for (size_t channels = 1; channels <= 80; channels += 15) {
873 for (size_t step = 2; step <= 9; step++) {
874 DWConvMicrokernelTester()
875 .cr(16)
876 .kr(9)
877 .channels(channels)
878 .width(3)
879 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -0800880 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700881 }
882 }
883 }
884
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700885 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700886 TEST_REQUIRES_ARM_NEON_V8;
887 for (size_t channels = 1; channels <= 80; channels += 15) {
888 DWConvMicrokernelTester()
889 .cr(16)
890 .kr(9)
891 .channels(16)
892 .width(5)
893 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800894 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700895 }
896 }
897
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700898 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700899 TEST_REQUIRES_ARM_NEON_V8;
900 for (size_t channels = 1; channels <= 80; channels += 15) {
901 DWConvMicrokernelTester()
902 .cr(16)
903 .kr(9)
904 .channels(channels)
905 .width(3)
906 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800907 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700908 }
909 }
910
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700911 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700912 TEST_REQUIRES_ARM_NEON_V8;
913 for (size_t channels = 1; channels <= 80; channels += 15) {
914 DWConvMicrokernelTester()
915 .cr(16)
916 .kr(9)
917 .channels(channels)
918 .width(3)
919 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800920 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700921 }
922 }
923
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700924 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, input_offset) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700925 TEST_REQUIRES_ARM_NEON_V8;
926 for (uint32_t channels = 32; channels < 256; channels += 48) {
927 DWConvMicrokernelTester()
928 .cr(16)
929 .kr(9)
930 .channels(channels)
931 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -0800932 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700933 }
934 }
935
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700936 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, zero) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700937 TEST_REQUIRES_ARM_NEON_V8;
938 for (uint32_t mz = 0; mz < 9; mz++) {
939 for (uint32_t channels = 32; channels < 256; channels += 48) {
940 DWConvMicrokernelTester()
941 .cr(16)
942 .kr(9)
943 .channels(channels)
944 .input_offset(304)
945 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -0700947 }
948 }
949 }
950#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
951
952
953#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700954 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_eq_16) {
955 TEST_REQUIRES_ARM_NEON_V8;
956 DWConvMicrokernelTester()
957 .cr(16)
958 .kr(9)
959 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800960 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700961 }
962
963 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_div_16) {
964 TEST_REQUIRES_ARM_NEON_V8;
965 for (uint32_t channels = 32; channels < 256; channels += 48) {
966 DWConvMicrokernelTester()
967 .cr(16)
968 .kr(9)
969 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -0800970 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700971 }
972 }
973
974 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_div_16_with_qmin) {
975 TEST_REQUIRES_ARM_NEON_V8;
976 for (uint32_t channels = 32; channels < 256; channels += 48) {
977 DWConvMicrokernelTester()
978 .cr(16)
979 .kr(9)
980 .channels(channels)
981 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800982 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700983 }
984 }
985
986 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_div_16_with_qmax) {
987 TEST_REQUIRES_ARM_NEON_V8;
988 for (uint32_t channels = 32; channels < 256; channels += 48) {
989 DWConvMicrokernelTester()
990 .cr(16)
991 .kr(9)
992 .channels(channels)
993 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800994 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -0700995 }
996 }
997
998 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_lt_16) {
999 TEST_REQUIRES_ARM_NEON_V8;
1000 for (uint32_t channels = 1; channels < 16; channels++) {
1001 DWConvMicrokernelTester()
1002 .cr(16)
1003 .kr(9)
1004 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001005 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001006 }
1007 }
1008
1009 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_gt_16) {
1010 TEST_REQUIRES_ARM_NEON_V8;
1011 for (uint32_t channels = 17; channels < 32; channels++) {
1012 DWConvMicrokernelTester()
1013 .cr(16)
1014 .kr(9)
1015 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001017 }
1018 }
1019
1020 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_gt_16_with_qmin) {
1021 TEST_REQUIRES_ARM_NEON_V8;
1022 for (uint32_t channels = 17; channels < 32; channels++) {
1023 DWConvMicrokernelTester()
1024 .cr(16)
1025 .kr(9)
1026 .channels(channels)
1027 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001028 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001029 }
1030 }
1031
1032 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_gt_16_with_qmax) {
1033 TEST_REQUIRES_ARM_NEON_V8;
1034 for (uint32_t channels = 17; channels < 32; channels++) {
1035 DWConvMicrokernelTester()
1036 .cr(16)
1037 .kr(9)
1038 .channels(channels)
1039 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001040 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001041 }
1042 }
1043
1044 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel) {
1045 TEST_REQUIRES_ARM_NEON_V8;
1046 for (size_t channels = 1; channels <= 80; channels += 15) {
1047 DWConvMicrokernelTester()
1048 .cr(16)
1049 .kr(9)
1050 .channels(channels)
1051 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001052 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001053 }
1054 }
1055
1056 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_step) {
1057 TEST_REQUIRES_ARM_NEON_V8;
1058 for (size_t channels = 1; channels <= 80; channels += 15) {
1059 for (size_t step = 2; step <= 9; step++) {
1060 DWConvMicrokernelTester()
1061 .cr(16)
1062 .kr(9)
1063 .channels(channels)
1064 .width(3)
1065 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001066 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001067 }
1068 }
1069 }
1070
1071 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_output_stride) {
1072 TEST_REQUIRES_ARM_NEON_V8;
1073 for (size_t channels = 1; channels <= 80; channels += 15) {
1074 DWConvMicrokernelTester()
1075 .cr(16)
1076 .kr(9)
1077 .channels(16)
1078 .width(5)
1079 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001080 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001081 }
1082 }
1083
1084 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_qmin) {
1085 TEST_REQUIRES_ARM_NEON_V8;
1086 for (size_t channels = 1; channels <= 80; channels += 15) {
1087 DWConvMicrokernelTester()
1088 .cr(16)
1089 .kr(9)
1090 .channels(channels)
1091 .width(3)
1092 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001093 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001094 }
1095 }
1096
1097 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_qmax) {
1098 TEST_REQUIRES_ARM_NEON_V8;
1099 for (size_t channels = 1; channels <= 80; channels += 15) {
1100 DWConvMicrokernelTester()
1101 .cr(16)
1102 .kr(9)
1103 .channels(channels)
1104 .width(3)
1105 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001106 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001107 }
1108 }
1109
1110 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, input_offset) {
1111 TEST_REQUIRES_ARM_NEON_V8;
1112 for (uint32_t channels = 32; channels < 256; channels += 48) {
1113 DWConvMicrokernelTester()
1114 .cr(16)
1115 .kr(9)
1116 .channels(channels)
1117 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08001118 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001119 }
1120 }
1121
1122 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, zero) {
1123 TEST_REQUIRES_ARM_NEON_V8;
1124 for (uint32_t mz = 0; mz < 9; mz++) {
1125 for (uint32_t channels = 32; channels < 256; channels += 48) {
1126 DWConvMicrokernelTester()
1127 .cr(16)
1128 .kr(9)
1129 .channels(channels)
1130 .input_offset(304)
1131 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001133 }
1134 }
1135 }
1136#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1137
1138
1139#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1140 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_eq_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001141 TEST_REQUIRES_ARM_NEON;
1142 DWConvMicrokernelTester()
1143 .cr(8)
1144 .kr(9)
1145 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001146 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001147 }
1148
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001149 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_div_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001150 TEST_REQUIRES_ARM_NEON;
1151 for (uint32_t channels = 16; channels < 128; channels += 24) {
1152 DWConvMicrokernelTester()
1153 .cr(8)
1154 .kr(9)
1155 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001156 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001157 }
1158 }
1159
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001160 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001161 TEST_REQUIRES_ARM_NEON;
1162 for (uint32_t channels = 16; channels < 128; channels += 24) {
1163 DWConvMicrokernelTester()
1164 .cr(8)
1165 .kr(9)
1166 .channels(channels)
1167 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001169 }
1170 }
1171
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001172 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001173 TEST_REQUIRES_ARM_NEON;
1174 for (uint32_t channels = 16; channels < 128; channels += 24) {
1175 DWConvMicrokernelTester()
1176 .cr(8)
1177 .kr(9)
1178 .channels(channels)
1179 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001180 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001181 }
1182 }
1183
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001184 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_lt_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001185 TEST_REQUIRES_ARM_NEON;
1186 for (uint32_t channels = 1; channels < 8; channels++) {
1187 DWConvMicrokernelTester()
1188 .cr(8)
1189 .kr(9)
1190 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001191 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001192 }
1193 }
1194
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001195 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_gt_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001196 TEST_REQUIRES_ARM_NEON;
1197 for (uint32_t channels = 9; channels < 16; channels++) {
1198 DWConvMicrokernelTester()
1199 .cr(8)
1200 .kr(9)
1201 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001203 }
1204 }
1205
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001206 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001207 TEST_REQUIRES_ARM_NEON;
1208 for (uint32_t channels = 9; channels < 16; channels++) {
1209 DWConvMicrokernelTester()
1210 .cr(8)
1211 .kr(9)
1212 .channels(channels)
1213 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001215 }
1216 }
1217
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001218 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001219 TEST_REQUIRES_ARM_NEON;
1220 for (uint32_t channels = 9; channels < 16; channels++) {
1221 DWConvMicrokernelTester()
1222 .cr(8)
1223 .kr(9)
1224 .channels(channels)
1225 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001226 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001227 }
1228 }
1229
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001230 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001231 TEST_REQUIRES_ARM_NEON;
1232 for (size_t channels = 1; channels <= 40; channels += 7) {
1233 DWConvMicrokernelTester()
1234 .cr(8)
1235 .kr(9)
1236 .channels(channels)
1237 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001238 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001239 }
1240 }
1241
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001242 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_step) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001243 TEST_REQUIRES_ARM_NEON;
1244 for (size_t channels = 1; channels <= 40; channels += 7) {
1245 for (size_t step = 2; step <= 9; step++) {
1246 DWConvMicrokernelTester()
1247 .cr(8)
1248 .kr(9)
1249 .channels(channels)
1250 .width(3)
1251 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001252 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001253 }
1254 }
1255 }
1256
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001257 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_output_stride) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001258 TEST_REQUIRES_ARM_NEON;
1259 for (size_t channels = 1; channels <= 40; channels += 7) {
1260 DWConvMicrokernelTester()
1261 .cr(8)
1262 .kr(9)
1263 .channels(8)
1264 .width(5)
1265 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001266 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001267 }
1268 }
1269
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001270 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001271 TEST_REQUIRES_ARM_NEON;
1272 for (size_t channels = 1; channels <= 40; channels += 7) {
1273 DWConvMicrokernelTester()
1274 .cr(8)
1275 .kr(9)
1276 .channels(channels)
1277 .width(3)
1278 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001279 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001280 }
1281 }
1282
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001283 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001284 TEST_REQUIRES_ARM_NEON;
1285 for (size_t channels = 1; channels <= 40; channels += 7) {
1286 DWConvMicrokernelTester()
1287 .cr(8)
1288 .kr(9)
1289 .channels(channels)
1290 .width(3)
1291 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001292 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001293 }
1294 }
1295
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001296 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, input_offset) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001297 TEST_REQUIRES_ARM_NEON;
1298 for (uint32_t channels = 16; channels < 128; channels += 24) {
1299 DWConvMicrokernelTester()
1300 .cr(8)
1301 .kr(9)
1302 .channels(channels)
1303 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08001304 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001305 }
1306 }
1307
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001308 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, zero) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001309 TEST_REQUIRES_ARM_NEON;
1310 for (uint32_t mz = 0; mz < 9; mz++) {
1311 for (uint32_t channels = 16; channels < 128; channels += 24) {
1312 DWConvMicrokernelTester()
1313 .cr(8)
1314 .kr(9)
1315 .channels(channels)
1316 .input_offset(176)
1317 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001319 }
1320 }
1321 }
1322#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1323
1324
1325#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001326 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_eq_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001327 TEST_REQUIRES_ARM_NEON;
1328 DWConvMicrokernelTester()
1329 .cr(16)
1330 .kr(9)
1331 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001332 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001333 }
1334
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001335 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_div_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001336 TEST_REQUIRES_ARM_NEON;
1337 for (uint32_t channels = 32; channels < 256; channels += 48) {
1338 DWConvMicrokernelTester()
1339 .cr(16)
1340 .kr(9)
1341 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001342 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001343 }
1344 }
1345
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001346 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001347 TEST_REQUIRES_ARM_NEON;
1348 for (uint32_t channels = 32; channels < 256; channels += 48) {
1349 DWConvMicrokernelTester()
1350 .cr(16)
1351 .kr(9)
1352 .channels(channels)
1353 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001354 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001355 }
1356 }
1357
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001358 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001359 TEST_REQUIRES_ARM_NEON;
1360 for (uint32_t channels = 32; channels < 256; channels += 48) {
1361 DWConvMicrokernelTester()
1362 .cr(16)
1363 .kr(9)
1364 .channels(channels)
1365 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001366 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001367 }
1368 }
1369
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001370 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_lt_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001371 TEST_REQUIRES_ARM_NEON;
1372 for (uint32_t channels = 1; channels < 16; channels++) {
1373 DWConvMicrokernelTester()
1374 .cr(16)
1375 .kr(9)
1376 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001377 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001378 }
1379 }
1380
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001381 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_gt_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001382 TEST_REQUIRES_ARM_NEON;
1383 for (uint32_t channels = 17; channels < 32; channels++) {
1384 DWConvMicrokernelTester()
1385 .cr(16)
1386 .kr(9)
1387 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001389 }
1390 }
1391
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001392 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001393 TEST_REQUIRES_ARM_NEON;
1394 for (uint32_t channels = 17; channels < 32; channels++) {
1395 DWConvMicrokernelTester()
1396 .cr(16)
1397 .kr(9)
1398 .channels(channels)
1399 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001401 }
1402 }
1403
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001404 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001405 TEST_REQUIRES_ARM_NEON;
1406 for (uint32_t channels = 17; channels < 32; channels++) {
1407 DWConvMicrokernelTester()
1408 .cr(16)
1409 .kr(9)
1410 .channels(channels)
1411 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001412 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001413 }
1414 }
1415
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001416 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001417 TEST_REQUIRES_ARM_NEON;
1418 for (size_t channels = 1; channels <= 80; channels += 15) {
1419 DWConvMicrokernelTester()
1420 .cr(16)
1421 .kr(9)
1422 .channels(channels)
1423 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001424 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001425 }
1426 }
1427
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001428 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_step) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001429 TEST_REQUIRES_ARM_NEON;
1430 for (size_t channels = 1; channels <= 80; channels += 15) {
1431 for (size_t step = 2; step <= 9; step++) {
1432 DWConvMicrokernelTester()
1433 .cr(16)
1434 .kr(9)
1435 .channels(channels)
1436 .width(3)
1437 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001438 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001439 }
1440 }
1441 }
1442
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001443 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_output_stride) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001444 TEST_REQUIRES_ARM_NEON;
1445 for (size_t channels = 1; channels <= 80; channels += 15) {
1446 DWConvMicrokernelTester()
1447 .cr(16)
1448 .kr(9)
1449 .channels(16)
1450 .width(5)
1451 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001452 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001453 }
1454 }
1455
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001456 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001457 TEST_REQUIRES_ARM_NEON;
1458 for (size_t channels = 1; channels <= 80; channels += 15) {
1459 DWConvMicrokernelTester()
1460 .cr(16)
1461 .kr(9)
1462 .channels(channels)
1463 .width(3)
1464 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001465 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001466 }
1467 }
1468
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001469 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001470 TEST_REQUIRES_ARM_NEON;
1471 for (size_t channels = 1; channels <= 80; channels += 15) {
1472 DWConvMicrokernelTester()
1473 .cr(16)
1474 .kr(9)
1475 .channels(channels)
1476 .width(3)
1477 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001478 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001479 }
1480 }
1481
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001482 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, input_offset) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001483 TEST_REQUIRES_ARM_NEON;
1484 for (uint32_t channels = 32; channels < 256; channels += 48) {
1485 DWConvMicrokernelTester()
1486 .cr(16)
1487 .kr(9)
1488 .channels(channels)
1489 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08001490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001491 }
1492 }
1493
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001494 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, zero) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001495 TEST_REQUIRES_ARM_NEON;
1496 for (uint32_t mz = 0; mz < 9; mz++) {
1497 for (uint32_t channels = 32; channels < 256; channels += 48) {
1498 DWConvMicrokernelTester()
1499 .cr(16)
1500 .kr(9)
1501 .channels(channels)
1502 .input_offset(304)
1503 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001505 }
1506 }
1507 }
1508#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1509
1510
1511#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001512 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_eq_16) {
1513 TEST_REQUIRES_ARM_NEON;
1514 DWConvMicrokernelTester()
1515 .cr(16)
1516 .kr(9)
1517 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001518 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001519 }
1520
1521 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_div_16) {
1522 TEST_REQUIRES_ARM_NEON;
1523 for (uint32_t channels = 32; channels < 256; channels += 48) {
1524 DWConvMicrokernelTester()
1525 .cr(16)
1526 .kr(9)
1527 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001528 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001529 }
1530 }
1531
1532 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmin) {
1533 TEST_REQUIRES_ARM_NEON;
1534 for (uint32_t channels = 32; channels < 256; channels += 48) {
1535 DWConvMicrokernelTester()
1536 .cr(16)
1537 .kr(9)
1538 .channels(channels)
1539 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001540 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001541 }
1542 }
1543
1544 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmax) {
1545 TEST_REQUIRES_ARM_NEON;
1546 for (uint32_t channels = 32; channels < 256; channels += 48) {
1547 DWConvMicrokernelTester()
1548 .cr(16)
1549 .kr(9)
1550 .channels(channels)
1551 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001552 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001553 }
1554 }
1555
1556 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_lt_16) {
1557 TEST_REQUIRES_ARM_NEON;
1558 for (uint32_t channels = 1; channels < 16; channels++) {
1559 DWConvMicrokernelTester()
1560 .cr(16)
1561 .kr(9)
1562 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001563 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001564 }
1565 }
1566
1567 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_gt_16) {
1568 TEST_REQUIRES_ARM_NEON;
1569 for (uint32_t channels = 17; channels < 32; channels++) {
1570 DWConvMicrokernelTester()
1571 .cr(16)
1572 .kr(9)
1573 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001575 }
1576 }
1577
1578 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmin) {
1579 TEST_REQUIRES_ARM_NEON;
1580 for (uint32_t channels = 17; channels < 32; channels++) {
1581 DWConvMicrokernelTester()
1582 .cr(16)
1583 .kr(9)
1584 .channels(channels)
1585 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001587 }
1588 }
1589
1590 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmax) {
1591 TEST_REQUIRES_ARM_NEON;
1592 for (uint32_t channels = 17; channels < 32; channels++) {
1593 DWConvMicrokernelTester()
1594 .cr(16)
1595 .kr(9)
1596 .channels(channels)
1597 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001599 }
1600 }
1601
1602 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel) {
1603 TEST_REQUIRES_ARM_NEON;
1604 for (size_t channels = 1; channels <= 80; channels += 15) {
1605 DWConvMicrokernelTester()
1606 .cr(16)
1607 .kr(9)
1608 .channels(channels)
1609 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001610 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001611 }
1612 }
1613
1614 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_step) {
1615 TEST_REQUIRES_ARM_NEON;
1616 for (size_t channels = 1; channels <= 80; channels += 15) {
1617 for (size_t step = 2; step <= 9; step++) {
1618 DWConvMicrokernelTester()
1619 .cr(16)
1620 .kr(9)
1621 .channels(channels)
1622 .width(3)
1623 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001624 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001625 }
1626 }
1627 }
1628
1629 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_output_stride) {
1630 TEST_REQUIRES_ARM_NEON;
1631 for (size_t channels = 1; channels <= 80; channels += 15) {
1632 DWConvMicrokernelTester()
1633 .cr(16)
1634 .kr(9)
1635 .channels(16)
1636 .width(5)
1637 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001638 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001639 }
1640 }
1641
1642 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_qmin) {
1643 TEST_REQUIRES_ARM_NEON;
1644 for (size_t channels = 1; channels <= 80; channels += 15) {
1645 DWConvMicrokernelTester()
1646 .cr(16)
1647 .kr(9)
1648 .channels(channels)
1649 .width(3)
1650 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001651 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001652 }
1653 }
1654
1655 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_qmax) {
1656 TEST_REQUIRES_ARM_NEON;
1657 for (size_t channels = 1; channels <= 80; channels += 15) {
1658 DWConvMicrokernelTester()
1659 .cr(16)
1660 .kr(9)
1661 .channels(channels)
1662 .width(3)
1663 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001664 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001665 }
1666 }
1667
1668 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, input_offset) {
1669 TEST_REQUIRES_ARM_NEON;
1670 for (uint32_t channels = 32; channels < 256; channels += 48) {
1671 DWConvMicrokernelTester()
1672 .cr(16)
1673 .kr(9)
1674 .channels(channels)
1675 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08001676 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001677 }
1678 }
1679
1680 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, zero) {
1681 TEST_REQUIRES_ARM_NEON;
1682 for (uint32_t mz = 0; mz < 9; mz++) {
1683 for (uint32_t channels = 32; channels < 256; channels += 48) {
1684 DWConvMicrokernelTester()
1685 .cr(16)
1686 .kr(9)
1687 .channels(channels)
1688 .input_offset(304)
1689 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001691 }
1692 }
1693 }
1694#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1695
1696
1697#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1698 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_eq_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001699 TEST_REQUIRES_ARM_NEON_V8;
1700 DWConvMicrokernelTester()
1701 .cr(8)
1702 .kr(9)
1703 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001704 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001705 }
1706
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001707 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_div_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001708 TEST_REQUIRES_ARM_NEON_V8;
1709 for (uint32_t channels = 16; channels < 128; channels += 24) {
1710 DWConvMicrokernelTester()
1711 .cr(8)
1712 .kr(9)
1713 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001714 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001715 }
1716 }
1717
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001718 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_div_8_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001719 TEST_REQUIRES_ARM_NEON_V8;
1720 for (uint32_t channels = 16; channels < 128; channels += 24) {
1721 DWConvMicrokernelTester()
1722 .cr(8)
1723 .kr(9)
1724 .channels(channels)
1725 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001726 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001727 }
1728 }
1729
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001730 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_div_8_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001731 TEST_REQUIRES_ARM_NEON_V8;
1732 for (uint32_t channels = 16; channels < 128; channels += 24) {
1733 DWConvMicrokernelTester()
1734 .cr(8)
1735 .kr(9)
1736 .channels(channels)
1737 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001738 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001739 }
1740 }
1741
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001742 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_lt_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001743 TEST_REQUIRES_ARM_NEON_V8;
1744 for (uint32_t channels = 1; channels < 8; channels++) {
1745 DWConvMicrokernelTester()
1746 .cr(8)
1747 .kr(9)
1748 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001749 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001750 }
1751 }
1752
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001753 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_gt_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001754 TEST_REQUIRES_ARM_NEON_V8;
1755 for (uint32_t channels = 9; channels < 16; channels++) {
1756 DWConvMicrokernelTester()
1757 .cr(8)
1758 .kr(9)
1759 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001761 }
1762 }
1763
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001764 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_gt_8_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001765 TEST_REQUIRES_ARM_NEON_V8;
1766 for (uint32_t channels = 9; channels < 16; channels++) {
1767 DWConvMicrokernelTester()
1768 .cr(8)
1769 .kr(9)
1770 .channels(channels)
1771 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001772 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001773 }
1774 }
1775
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001776 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_gt_8_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001777 TEST_REQUIRES_ARM_NEON_V8;
1778 for (uint32_t channels = 9; channels < 16; channels++) {
1779 DWConvMicrokernelTester()
1780 .cr(8)
1781 .kr(9)
1782 .channels(channels)
1783 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001784 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001785 }
1786 }
1787
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001788 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001789 TEST_REQUIRES_ARM_NEON_V8;
1790 for (size_t channels = 1; channels <= 40; channels += 7) {
1791 DWConvMicrokernelTester()
1792 .cr(8)
1793 .kr(9)
1794 .channels(channels)
1795 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001796 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001797 }
1798 }
1799
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001800 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_step) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001801 TEST_REQUIRES_ARM_NEON_V8;
1802 for (size_t channels = 1; channels <= 40; channels += 7) {
1803 for (size_t step = 2; step <= 9; step++) {
1804 DWConvMicrokernelTester()
1805 .cr(8)
1806 .kr(9)
1807 .channels(channels)
1808 .width(3)
1809 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001810 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001811 }
1812 }
1813 }
1814
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001815 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001816 TEST_REQUIRES_ARM_NEON_V8;
1817 for (size_t channels = 1; channels <= 40; channels += 7) {
1818 DWConvMicrokernelTester()
1819 .cr(8)
1820 .kr(9)
1821 .channels(8)
1822 .width(5)
1823 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001824 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001825 }
1826 }
1827
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001828 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001829 TEST_REQUIRES_ARM_NEON_V8;
1830 for (size_t channels = 1; channels <= 40; channels += 7) {
1831 DWConvMicrokernelTester()
1832 .cr(8)
1833 .kr(9)
1834 .channels(channels)
1835 .width(3)
1836 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001837 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001838 }
1839 }
1840
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001841 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001842 TEST_REQUIRES_ARM_NEON_V8;
1843 for (size_t channels = 1; channels <= 40; channels += 7) {
1844 DWConvMicrokernelTester()
1845 .cr(8)
1846 .kr(9)
1847 .channels(channels)
1848 .width(3)
1849 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001850 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001851 }
1852 }
1853
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001854 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, input_offset) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001855 TEST_REQUIRES_ARM_NEON_V8;
1856 for (uint32_t channels = 16; channels < 128; channels += 24) {
1857 DWConvMicrokernelTester()
1858 .cr(8)
1859 .kr(9)
1860 .channels(channels)
1861 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08001862 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001863 }
1864 }
1865
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001866 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, zero) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001867 TEST_REQUIRES_ARM_NEON_V8;
1868 for (uint32_t mz = 0; mz < 9; mz++) {
1869 for (uint32_t channels = 16; channels < 128; channels += 24) {
1870 DWConvMicrokernelTester()
1871 .cr(8)
1872 .kr(9)
1873 .channels(channels)
1874 .input_offset(176)
1875 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001877 }
1878 }
1879 }
1880#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1881
1882
1883#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001884 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_eq_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001885 TEST_REQUIRES_ARM_NEON_V8;
1886 DWConvMicrokernelTester()
1887 .cr(16)
1888 .kr(9)
1889 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001890 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001891 }
1892
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001893 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_div_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001894 TEST_REQUIRES_ARM_NEON_V8;
1895 for (uint32_t channels = 32; channels < 256; channels += 48) {
1896 DWConvMicrokernelTester()
1897 .cr(16)
1898 .kr(9)
1899 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001900 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001901 }
1902 }
1903
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001904 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_div_16_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001905 TEST_REQUIRES_ARM_NEON_V8;
1906 for (uint32_t channels = 32; channels < 256; channels += 48) {
1907 DWConvMicrokernelTester()
1908 .cr(16)
1909 .kr(9)
1910 .channels(channels)
1911 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001912 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001913 }
1914 }
1915
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001916 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_div_16_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001917 TEST_REQUIRES_ARM_NEON_V8;
1918 for (uint32_t channels = 32; channels < 256; channels += 48) {
1919 DWConvMicrokernelTester()
1920 .cr(16)
1921 .kr(9)
1922 .channels(channels)
1923 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001924 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001925 }
1926 }
1927
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001928 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_lt_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001929 TEST_REQUIRES_ARM_NEON_V8;
1930 for (uint32_t channels = 1; channels < 16; channels++) {
1931 DWConvMicrokernelTester()
1932 .cr(16)
1933 .kr(9)
1934 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001935 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001936 }
1937 }
1938
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001939 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_gt_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001940 TEST_REQUIRES_ARM_NEON_V8;
1941 for (uint32_t channels = 17; channels < 32; channels++) {
1942 DWConvMicrokernelTester()
1943 .cr(16)
1944 .kr(9)
1945 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08001946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001947 }
1948 }
1949
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001950 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_gt_16_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001951 TEST_REQUIRES_ARM_NEON_V8;
1952 for (uint32_t channels = 17; channels < 32; channels++) {
1953 DWConvMicrokernelTester()
1954 .cr(16)
1955 .kr(9)
1956 .channels(channels)
1957 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001959 }
1960 }
1961
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001962 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_gt_16_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001963 TEST_REQUIRES_ARM_NEON_V8;
1964 for (uint32_t channels = 17; channels < 32; channels++) {
1965 DWConvMicrokernelTester()
1966 .cr(16)
1967 .kr(9)
1968 .channels(channels)
1969 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001970 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001971 }
1972 }
1973
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001974 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001975 TEST_REQUIRES_ARM_NEON_V8;
1976 for (size_t channels = 1; channels <= 80; channels += 15) {
1977 DWConvMicrokernelTester()
1978 .cr(16)
1979 .kr(9)
1980 .channels(channels)
1981 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001982 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001983 }
1984 }
1985
Marat Dukhan5f2939f2021-07-23 13:38:32 -07001986 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_step) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001987 TEST_REQUIRES_ARM_NEON_V8;
1988 for (size_t channels = 1; channels <= 80; channels += 15) {
1989 for (size_t step = 2; step <= 9; step++) {
1990 DWConvMicrokernelTester()
1991 .cr(16)
1992 .kr(9)
1993 .channels(channels)
1994 .width(3)
1995 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08001996 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07001997 }
1998 }
1999 }
2000
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002001 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002002 TEST_REQUIRES_ARM_NEON_V8;
2003 for (size_t channels = 1; channels <= 80; channels += 15) {
2004 DWConvMicrokernelTester()
2005 .cr(16)
2006 .kr(9)
2007 .channels(16)
2008 .width(5)
2009 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002010 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002011 }
2012 }
2013
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002014 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002015 TEST_REQUIRES_ARM_NEON_V8;
2016 for (size_t channels = 1; channels <= 80; channels += 15) {
2017 DWConvMicrokernelTester()
2018 .cr(16)
2019 .kr(9)
2020 .channels(channels)
2021 .width(3)
2022 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002023 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002024 }
2025 }
2026
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002027 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002028 TEST_REQUIRES_ARM_NEON_V8;
2029 for (size_t channels = 1; channels <= 80; channels += 15) {
2030 DWConvMicrokernelTester()
2031 .cr(16)
2032 .kr(9)
2033 .channels(channels)
2034 .width(3)
2035 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002036 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002037 }
2038 }
2039
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002040 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, input_offset) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002041 TEST_REQUIRES_ARM_NEON_V8;
2042 for (uint32_t channels = 32; channels < 256; channels += 48) {
2043 DWConvMicrokernelTester()
2044 .cr(16)
2045 .kr(9)
2046 .channels(channels)
2047 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08002048 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002049 }
2050 }
2051
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002052 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, zero) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002053 TEST_REQUIRES_ARM_NEON_V8;
2054 for (uint32_t mz = 0; mz < 9; mz++) {
2055 for (uint32_t channels = 32; channels < 256; channels += 48) {
2056 DWConvMicrokernelTester()
2057 .cr(16)
2058 .kr(9)
2059 .channels(channels)
2060 .input_offset(304)
2061 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002063 }
2064 }
2065 }
2066#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2067
2068
2069#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2070 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_eq_16) {
2071 TEST_REQUIRES_ARM_NEON_V8;
2072 DWConvMicrokernelTester()
2073 .cr(16)
2074 .kr(9)
2075 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002076 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002077 }
2078
2079 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_div_16) {
2080 TEST_REQUIRES_ARM_NEON_V8;
2081 for (uint32_t channels = 32; channels < 256; channels += 48) {
2082 DWConvMicrokernelTester()
2083 .cr(16)
2084 .kr(9)
2085 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002086 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002087 }
2088 }
2089
2090 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_div_16_with_qmin) {
2091 TEST_REQUIRES_ARM_NEON_V8;
2092 for (uint32_t channels = 32; channels < 256; channels += 48) {
2093 DWConvMicrokernelTester()
2094 .cr(16)
2095 .kr(9)
2096 .channels(channels)
2097 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002099 }
2100 }
2101
2102 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_div_16_with_qmax) {
2103 TEST_REQUIRES_ARM_NEON_V8;
2104 for (uint32_t channels = 32; channels < 256; channels += 48) {
2105 DWConvMicrokernelTester()
2106 .cr(16)
2107 .kr(9)
2108 .channels(channels)
2109 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002110 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002111 }
2112 }
2113
2114 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_lt_16) {
2115 TEST_REQUIRES_ARM_NEON_V8;
2116 for (uint32_t channels = 1; channels < 16; channels++) {
2117 DWConvMicrokernelTester()
2118 .cr(16)
2119 .kr(9)
2120 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002121 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002122 }
2123 }
2124
2125 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_gt_16) {
2126 TEST_REQUIRES_ARM_NEON_V8;
2127 for (uint32_t channels = 17; channels < 32; channels++) {
2128 DWConvMicrokernelTester()
2129 .cr(16)
2130 .kr(9)
2131 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002133 }
2134 }
2135
2136 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_gt_16_with_qmin) {
2137 TEST_REQUIRES_ARM_NEON_V8;
2138 for (uint32_t channels = 17; channels < 32; channels++) {
2139 DWConvMicrokernelTester()
2140 .cr(16)
2141 .kr(9)
2142 .channels(channels)
2143 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002145 }
2146 }
2147
2148 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_gt_16_with_qmax) {
2149 TEST_REQUIRES_ARM_NEON_V8;
2150 for (uint32_t channels = 17; channels < 32; channels++) {
2151 DWConvMicrokernelTester()
2152 .cr(16)
2153 .kr(9)
2154 .channels(channels)
2155 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002156 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002157 }
2158 }
2159
2160 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel) {
2161 TEST_REQUIRES_ARM_NEON_V8;
2162 for (size_t channels = 1; channels <= 80; channels += 15) {
2163 DWConvMicrokernelTester()
2164 .cr(16)
2165 .kr(9)
2166 .channels(channels)
2167 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002169 }
2170 }
2171
2172 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_step) {
2173 TEST_REQUIRES_ARM_NEON_V8;
2174 for (size_t channels = 1; channels <= 80; channels += 15) {
2175 for (size_t step = 2; step <= 9; step++) {
2176 DWConvMicrokernelTester()
2177 .cr(16)
2178 .kr(9)
2179 .channels(channels)
2180 .width(3)
2181 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002182 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002183 }
2184 }
2185 }
2186
2187 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_output_stride) {
2188 TEST_REQUIRES_ARM_NEON_V8;
2189 for (size_t channels = 1; channels <= 80; channels += 15) {
2190 DWConvMicrokernelTester()
2191 .cr(16)
2192 .kr(9)
2193 .channels(16)
2194 .width(5)
2195 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002196 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002197 }
2198 }
2199
2200 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_qmin) {
2201 TEST_REQUIRES_ARM_NEON_V8;
2202 for (size_t channels = 1; channels <= 80; channels += 15) {
2203 DWConvMicrokernelTester()
2204 .cr(16)
2205 .kr(9)
2206 .channels(channels)
2207 .width(3)
2208 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002209 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002210 }
2211 }
2212
2213 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_qmax) {
2214 TEST_REQUIRES_ARM_NEON_V8;
2215 for (size_t channels = 1; channels <= 80; channels += 15) {
2216 DWConvMicrokernelTester()
2217 .cr(16)
2218 .kr(9)
2219 .channels(channels)
2220 .width(3)
2221 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002222 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002223 }
2224 }
2225
2226 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, input_offset) {
2227 TEST_REQUIRES_ARM_NEON_V8;
2228 for (uint32_t channels = 32; channels < 256; channels += 48) {
2229 DWConvMicrokernelTester()
2230 .cr(16)
2231 .kr(9)
2232 .channels(channels)
2233 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08002234 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -07002235 }
2236 }
2237
2238 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, zero) {
2239 TEST_REQUIRES_ARM_NEON_V8;
2240 for (uint32_t mz = 0; mz < 9; mz++) {
2241 for (uint32_t channels = 32; channels < 256; channels += 48) {
2242 DWConvMicrokernelTester()
2243 .cr(16)
2244 .kr(9)
2245 .channels(channels)
2246 .input_offset(304)
2247 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -07002249 }
2250 }
2251 }
2252#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2253
2254
2255#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan59af5812021-06-29 18:09:57 -07002256 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_eq_8) {
2257 TEST_REQUIRES_ARM_NEON;
2258 DWConvMicrokernelTester()
2259 .cr(8)
2260 .kr(9)
2261 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002262 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002263 }
2264
2265 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8) {
2266 TEST_REQUIRES_ARM_NEON;
2267 for (uint32_t channels = 16; channels < 128; channels += 24) {
2268 DWConvMicrokernelTester()
2269 .cr(8)
2270 .kr(9)
2271 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002272 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002273 }
2274 }
2275
2276 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
2277 TEST_REQUIRES_ARM_NEON;
2278 for (uint32_t channels = 16; channels < 128; channels += 24) {
2279 DWConvMicrokernelTester()
2280 .cr(8)
2281 .kr(9)
2282 .channels(channels)
2283 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002284 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002285 }
2286 }
2287
2288 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
2289 TEST_REQUIRES_ARM_NEON;
2290 for (uint32_t channels = 16; channels < 128; channels += 24) {
2291 DWConvMicrokernelTester()
2292 .cr(8)
2293 .kr(9)
2294 .channels(channels)
2295 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002296 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002297 }
2298 }
2299
2300 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_lt_8) {
2301 TEST_REQUIRES_ARM_NEON;
2302 for (uint32_t channels = 1; channels < 8; channels++) {
2303 DWConvMicrokernelTester()
2304 .cr(8)
2305 .kr(9)
2306 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002307 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002308 }
2309 }
2310
2311 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8) {
2312 TEST_REQUIRES_ARM_NEON;
2313 for (uint32_t channels = 9; channels < 16; channels++) {
2314 DWConvMicrokernelTester()
2315 .cr(8)
2316 .kr(9)
2317 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002319 }
2320 }
2321
2322 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
2323 TEST_REQUIRES_ARM_NEON;
2324 for (uint32_t channels = 9; channels < 16; channels++) {
2325 DWConvMicrokernelTester()
2326 .cr(8)
2327 .kr(9)
2328 .channels(channels)
2329 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002331 }
2332 }
2333
2334 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
2335 TEST_REQUIRES_ARM_NEON;
2336 for (uint32_t channels = 9; channels < 16; channels++) {
2337 DWConvMicrokernelTester()
2338 .cr(8)
2339 .kr(9)
2340 .channels(channels)
2341 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002342 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002343 }
2344 }
2345
2346 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel) {
2347 TEST_REQUIRES_ARM_NEON;
2348 for (size_t channels = 1; channels <= 40; channels += 7) {
2349 DWConvMicrokernelTester()
2350 .cr(8)
2351 .kr(9)
2352 .channels(channels)
2353 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002354 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002355 }
2356 }
2357
2358 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_step) {
2359 TEST_REQUIRES_ARM_NEON;
2360 for (size_t channels = 1; channels <= 40; channels += 7) {
2361 for (size_t step = 2; step <= 9; step++) {
2362 DWConvMicrokernelTester()
2363 .cr(8)
2364 .kr(9)
2365 .channels(channels)
2366 .width(3)
2367 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002368 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002369 }
2370 }
2371 }
2372
2373 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
2374 TEST_REQUIRES_ARM_NEON;
2375 for (size_t channels = 1; channels <= 40; channels += 7) {
2376 DWConvMicrokernelTester()
2377 .cr(8)
2378 .kr(9)
2379 .channels(8)
2380 .width(5)
2381 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002382 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002383 }
2384 }
2385
2386 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmin) {
2387 TEST_REQUIRES_ARM_NEON;
2388 for (size_t channels = 1; channels <= 40; channels += 7) {
2389 DWConvMicrokernelTester()
2390 .cr(8)
2391 .kr(9)
2392 .channels(channels)
2393 .width(3)
2394 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002395 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002396 }
2397 }
2398
2399 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmax) {
2400 TEST_REQUIRES_ARM_NEON;
2401 for (size_t channels = 1; channels <= 40; channels += 7) {
2402 DWConvMicrokernelTester()
2403 .cr(8)
2404 .kr(9)
2405 .channels(channels)
2406 .width(3)
2407 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002408 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002409 }
2410 }
2411
2412 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, input_offset) {
2413 TEST_REQUIRES_ARM_NEON;
2414 for (uint32_t channels = 16; channels < 128; channels += 24) {
2415 DWConvMicrokernelTester()
2416 .cr(8)
2417 .kr(9)
2418 .channels(channels)
2419 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08002420 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002421 }
2422 }
2423
2424 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, zero) {
2425 TEST_REQUIRES_ARM_NEON;
2426 for (uint32_t mz = 0; mz < 9; mz++) {
2427 for (uint32_t channels = 16; channels < 128; channels += 24) {
2428 DWConvMicrokernelTester()
2429 .cr(8)
2430 .kr(9)
2431 .channels(channels)
2432 .input_offset(176)
2433 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002435 }
2436 }
2437 }
2438#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2439
2440
2441#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2442 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_eq_16) {
2443 TEST_REQUIRES_ARM_NEON;
2444 DWConvMicrokernelTester()
2445 .cr(16)
2446 .kr(9)
2447 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002448 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002449 }
2450
2451 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16) {
2452 TEST_REQUIRES_ARM_NEON;
2453 for (uint32_t channels = 32; channels < 256; channels += 48) {
2454 DWConvMicrokernelTester()
2455 .cr(16)
2456 .kr(9)
2457 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002459 }
2460 }
2461
2462 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
2463 TEST_REQUIRES_ARM_NEON;
2464 for (uint32_t channels = 32; channels < 256; channels += 48) {
2465 DWConvMicrokernelTester()
2466 .cr(16)
2467 .kr(9)
2468 .channels(channels)
2469 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002471 }
2472 }
2473
2474 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
2475 TEST_REQUIRES_ARM_NEON;
2476 for (uint32_t channels = 32; channels < 256; channels += 48) {
2477 DWConvMicrokernelTester()
2478 .cr(16)
2479 .kr(9)
2480 .channels(channels)
2481 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002482 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002483 }
2484 }
2485
2486 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_lt_16) {
2487 TEST_REQUIRES_ARM_NEON;
2488 for (uint32_t channels = 1; channels < 16; channels++) {
2489 DWConvMicrokernelTester()
2490 .cr(16)
2491 .kr(9)
2492 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002493 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002494 }
2495 }
2496
2497 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16) {
2498 TEST_REQUIRES_ARM_NEON;
2499 for (uint32_t channels = 17; channels < 32; channels++) {
2500 DWConvMicrokernelTester()
2501 .cr(16)
2502 .kr(9)
2503 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002505 }
2506 }
2507
2508 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
2509 TEST_REQUIRES_ARM_NEON;
2510 for (uint32_t channels = 17; channels < 32; channels++) {
2511 DWConvMicrokernelTester()
2512 .cr(16)
2513 .kr(9)
2514 .channels(channels)
2515 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002516 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002517 }
2518 }
2519
2520 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
2521 TEST_REQUIRES_ARM_NEON;
2522 for (uint32_t channels = 17; channels < 32; channels++) {
2523 DWConvMicrokernelTester()
2524 .cr(16)
2525 .kr(9)
2526 .channels(channels)
2527 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002528 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002529 }
2530 }
2531
2532 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel) {
2533 TEST_REQUIRES_ARM_NEON;
2534 for (size_t channels = 1; channels <= 80; channels += 15) {
2535 DWConvMicrokernelTester()
2536 .cr(16)
2537 .kr(9)
2538 .channels(channels)
2539 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002540 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002541 }
2542 }
2543
2544 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_step) {
2545 TEST_REQUIRES_ARM_NEON;
2546 for (size_t channels = 1; channels <= 80; channels += 15) {
2547 for (size_t step = 2; step <= 9; step++) {
2548 DWConvMicrokernelTester()
2549 .cr(16)
2550 .kr(9)
2551 .channels(channels)
2552 .width(3)
2553 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002554 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002555 }
2556 }
2557 }
2558
2559 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
2560 TEST_REQUIRES_ARM_NEON;
2561 for (size_t channels = 1; channels <= 80; channels += 15) {
2562 DWConvMicrokernelTester()
2563 .cr(16)
2564 .kr(9)
2565 .channels(16)
2566 .width(5)
2567 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002568 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002569 }
2570 }
2571
2572 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmin) {
2573 TEST_REQUIRES_ARM_NEON;
2574 for (size_t channels = 1; channels <= 80; channels += 15) {
2575 DWConvMicrokernelTester()
2576 .cr(16)
2577 .kr(9)
2578 .channels(channels)
2579 .width(3)
2580 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002581 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002582 }
2583 }
2584
2585 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmax) {
2586 TEST_REQUIRES_ARM_NEON;
2587 for (size_t channels = 1; channels <= 80; channels += 15) {
2588 DWConvMicrokernelTester()
2589 .cr(16)
2590 .kr(9)
2591 .channels(channels)
2592 .width(3)
2593 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002594 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002595 }
2596 }
2597
2598 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, input_offset) {
2599 TEST_REQUIRES_ARM_NEON;
2600 for (uint32_t channels = 32; channels < 256; channels += 48) {
2601 DWConvMicrokernelTester()
2602 .cr(16)
2603 .kr(9)
2604 .channels(channels)
2605 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08002606 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002607 }
2608 }
2609
2610 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, zero) {
2611 TEST_REQUIRES_ARM_NEON;
2612 for (uint32_t mz = 0; mz < 9; mz++) {
2613 for (uint32_t channels = 32; channels < 256; channels += 48) {
2614 DWConvMicrokernelTester()
2615 .cr(16)
2616 .kr(9)
2617 .channels(channels)
2618 .input_offset(304)
2619 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002621 }
2622 }
2623 }
2624#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2625
2626
2627#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2628 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_eq_24) {
2629 TEST_REQUIRES_ARM_NEON;
2630 DWConvMicrokernelTester()
2631 .cr(24)
2632 .kr(9)
2633 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08002634 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002635 }
2636
2637 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24) {
2638 TEST_REQUIRES_ARM_NEON;
2639 for (uint32_t channels = 48; channels < 384; channels += 72) {
2640 DWConvMicrokernelTester()
2641 .cr(24)
2642 .kr(9)
2643 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002645 }
2646 }
2647
2648 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
2649 TEST_REQUIRES_ARM_NEON;
2650 for (uint32_t channels = 48; channels < 384; channels += 72) {
2651 DWConvMicrokernelTester()
2652 .cr(24)
2653 .kr(9)
2654 .channels(channels)
2655 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002657 }
2658 }
2659
2660 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
2661 TEST_REQUIRES_ARM_NEON;
2662 for (uint32_t channels = 48; channels < 384; channels += 72) {
2663 DWConvMicrokernelTester()
2664 .cr(24)
2665 .kr(9)
2666 .channels(channels)
2667 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002668 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002669 }
2670 }
2671
2672 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_lt_24) {
2673 TEST_REQUIRES_ARM_NEON;
2674 for (uint32_t channels = 1; channels < 24; channels++) {
2675 DWConvMicrokernelTester()
2676 .cr(24)
2677 .kr(9)
2678 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002679 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002680 }
2681 }
2682
2683 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24) {
2684 TEST_REQUIRES_ARM_NEON;
2685 for (uint32_t channels = 25; channels < 48; channels++) {
2686 DWConvMicrokernelTester()
2687 .cr(24)
2688 .kr(9)
2689 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002691 }
2692 }
2693
2694 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
2695 TEST_REQUIRES_ARM_NEON;
2696 for (uint32_t channels = 25; channels < 48; channels++) {
2697 DWConvMicrokernelTester()
2698 .cr(24)
2699 .kr(9)
2700 .channels(channels)
2701 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002702 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002703 }
2704 }
2705
2706 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
2707 TEST_REQUIRES_ARM_NEON;
2708 for (uint32_t channels = 25; channels < 48; channels++) {
2709 DWConvMicrokernelTester()
2710 .cr(24)
2711 .kr(9)
2712 .channels(channels)
2713 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002714 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002715 }
2716 }
2717
2718 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel) {
2719 TEST_REQUIRES_ARM_NEON;
2720 for (size_t channels = 1; channels <= 120; channels += 23) {
2721 DWConvMicrokernelTester()
2722 .cr(24)
2723 .kr(9)
2724 .channels(channels)
2725 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002726 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002727 }
2728 }
2729
2730 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_step) {
2731 TEST_REQUIRES_ARM_NEON;
2732 for (size_t channels = 1; channels <= 120; channels += 23) {
2733 for (size_t step = 2; step <= 9; step++) {
2734 DWConvMicrokernelTester()
2735 .cr(24)
2736 .kr(9)
2737 .channels(channels)
2738 .width(3)
2739 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002740 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002741 }
2742 }
2743 }
2744
2745 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
2746 TEST_REQUIRES_ARM_NEON;
2747 for (size_t channels = 1; channels <= 120; channels += 23) {
2748 DWConvMicrokernelTester()
2749 .cr(24)
2750 .kr(9)
2751 .channels(24)
2752 .width(5)
2753 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08002754 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002755 }
2756 }
2757
2758 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmin) {
2759 TEST_REQUIRES_ARM_NEON;
2760 for (size_t channels = 1; channels <= 120; channels += 23) {
2761 DWConvMicrokernelTester()
2762 .cr(24)
2763 .kr(9)
2764 .channels(channels)
2765 .width(3)
2766 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002767 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002768 }
2769 }
2770
2771 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmax) {
2772 TEST_REQUIRES_ARM_NEON;
2773 for (size_t channels = 1; channels <= 120; channels += 23) {
2774 DWConvMicrokernelTester()
2775 .cr(24)
2776 .kr(9)
2777 .channels(channels)
2778 .width(3)
2779 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002780 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002781 }
2782 }
2783
2784 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, input_offset) {
2785 TEST_REQUIRES_ARM_NEON;
2786 for (uint32_t channels = 48; channels < 384; channels += 72) {
2787 DWConvMicrokernelTester()
2788 .cr(24)
2789 .kr(9)
2790 .channels(channels)
2791 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08002792 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002793 }
2794 }
2795
2796 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, zero) {
2797 TEST_REQUIRES_ARM_NEON;
2798 for (uint32_t mz = 0; mz < 9; mz++) {
2799 for (uint32_t channels = 48; channels < 384; channels += 72) {
2800 DWConvMicrokernelTester()
2801 .cr(24)
2802 .kr(9)
2803 .channels(channels)
2804 .input_offset(464)
2805 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002807 }
2808 }
2809 }
2810#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2811
2812
2813#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2814 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_eq_32) {
2815 TEST_REQUIRES_ARM_NEON;
2816 DWConvMicrokernelTester()
2817 .cr(32)
2818 .kr(9)
2819 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08002820 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002821 }
2822
2823 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32) {
2824 TEST_REQUIRES_ARM_NEON;
2825 for (uint32_t channels = 64; channels < 512; channels += 96) {
2826 DWConvMicrokernelTester()
2827 .cr(32)
2828 .kr(9)
2829 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002831 }
2832 }
2833
2834 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
2835 TEST_REQUIRES_ARM_NEON;
2836 for (uint32_t channels = 64; channels < 512; channels += 96) {
2837 DWConvMicrokernelTester()
2838 .cr(32)
2839 .kr(9)
2840 .channels(channels)
2841 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002842 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002843 }
2844 }
2845
2846 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
2847 TEST_REQUIRES_ARM_NEON;
2848 for (uint32_t channels = 64; channels < 512; channels += 96) {
2849 DWConvMicrokernelTester()
2850 .cr(32)
2851 .kr(9)
2852 .channels(channels)
2853 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002854 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002855 }
2856 }
2857
2858 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_lt_32) {
2859 TEST_REQUIRES_ARM_NEON;
2860 for (uint32_t channels = 1; channels < 32; channels++) {
2861 DWConvMicrokernelTester()
2862 .cr(32)
2863 .kr(9)
2864 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002865 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002866 }
2867 }
2868
2869 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32) {
2870 TEST_REQUIRES_ARM_NEON;
2871 for (uint32_t channels = 33; channels < 64; channels++) {
2872 DWConvMicrokernelTester()
2873 .cr(32)
2874 .kr(9)
2875 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08002876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002877 }
2878 }
2879
2880 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
2881 TEST_REQUIRES_ARM_NEON;
2882 for (uint32_t channels = 33; channels < 64; channels++) {
2883 DWConvMicrokernelTester()
2884 .cr(32)
2885 .kr(9)
2886 .channels(channels)
2887 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002889 }
2890 }
2891
2892 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
2893 TEST_REQUIRES_ARM_NEON;
2894 for (uint32_t channels = 33; channels < 64; channels++) {
2895 DWConvMicrokernelTester()
2896 .cr(32)
2897 .kr(9)
2898 .channels(channels)
2899 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002900 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002901 }
2902 }
2903
2904 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel) {
2905 TEST_REQUIRES_ARM_NEON;
2906 for (size_t channels = 1; channels <= 160; channels += 31) {
2907 DWConvMicrokernelTester()
2908 .cr(32)
2909 .kr(9)
2910 .channels(channels)
2911 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002912 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002913 }
2914 }
2915
2916 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_step) {
2917 TEST_REQUIRES_ARM_NEON;
2918 for (size_t channels = 1; channels <= 160; channels += 31) {
2919 for (size_t step = 2; step <= 9; step++) {
2920 DWConvMicrokernelTester()
2921 .cr(32)
2922 .kr(9)
2923 .channels(channels)
2924 .width(3)
2925 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08002926 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002927 }
2928 }
2929 }
2930
2931 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
2932 TEST_REQUIRES_ARM_NEON;
2933 for (size_t channels = 1; channels <= 160; channels += 31) {
2934 DWConvMicrokernelTester()
2935 .cr(32)
2936 .kr(9)
2937 .channels(32)
2938 .width(5)
2939 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08002940 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002941 }
2942 }
2943
2944 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmin) {
2945 TEST_REQUIRES_ARM_NEON;
2946 for (size_t channels = 1; channels <= 160; channels += 31) {
2947 DWConvMicrokernelTester()
2948 .cr(32)
2949 .kr(9)
2950 .channels(channels)
2951 .width(3)
2952 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002953 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002954 }
2955 }
2956
2957 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmax) {
2958 TEST_REQUIRES_ARM_NEON;
2959 for (size_t channels = 1; channels <= 160; channels += 31) {
2960 DWConvMicrokernelTester()
2961 .cr(32)
2962 .kr(9)
2963 .channels(channels)
2964 .width(3)
2965 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002966 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002967 }
2968 }
2969
2970 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, input_offset) {
2971 TEST_REQUIRES_ARM_NEON;
2972 for (uint32_t channels = 64; channels < 512; channels += 96) {
2973 DWConvMicrokernelTester()
2974 .cr(32)
2975 .kr(9)
2976 .channels(channels)
2977 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08002978 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002979 }
2980 }
2981
2982 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, zero) {
2983 TEST_REQUIRES_ARM_NEON;
2984 for (uint32_t mz = 0; mz < 9; mz++) {
2985 for (uint32_t channels = 64; channels < 512; channels += 96) {
2986 DWConvMicrokernelTester()
2987 .cr(32)
2988 .kr(9)
2989 .channels(channels)
2990 .input_offset(592)
2991 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07002993 }
2994 }
2995 }
2996#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2997
2998
2999#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3000 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_eq_8) {
3001 TEST_REQUIRES_ARM_NEON_V8;
3002 DWConvMicrokernelTester()
3003 .cr(8)
3004 .kr(9)
3005 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003006 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003007 }
3008
3009 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8) {
3010 TEST_REQUIRES_ARM_NEON_V8;
3011 for (uint32_t channels = 16; channels < 128; channels += 24) {
3012 DWConvMicrokernelTester()
3013 .cr(8)
3014 .kr(9)
3015 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003017 }
3018 }
3019
3020 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmin) {
3021 TEST_REQUIRES_ARM_NEON_V8;
3022 for (uint32_t channels = 16; channels < 128; channels += 24) {
3023 DWConvMicrokernelTester()
3024 .cr(8)
3025 .kr(9)
3026 .channels(channels)
3027 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003028 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003029 }
3030 }
3031
3032 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmax) {
3033 TEST_REQUIRES_ARM_NEON_V8;
3034 for (uint32_t channels = 16; channels < 128; channels += 24) {
3035 DWConvMicrokernelTester()
3036 .cr(8)
3037 .kr(9)
3038 .channels(channels)
3039 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003040 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003041 }
3042 }
3043
3044 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_lt_8) {
3045 TEST_REQUIRES_ARM_NEON_V8;
3046 for (uint32_t channels = 1; channels < 8; channels++) {
3047 DWConvMicrokernelTester()
3048 .cr(8)
3049 .kr(9)
3050 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003051 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003052 }
3053 }
3054
3055 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8) {
3056 TEST_REQUIRES_ARM_NEON_V8;
3057 for (uint32_t channels = 9; channels < 16; channels++) {
3058 DWConvMicrokernelTester()
3059 .cr(8)
3060 .kr(9)
3061 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003063 }
3064 }
3065
3066 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmin) {
3067 TEST_REQUIRES_ARM_NEON_V8;
3068 for (uint32_t channels = 9; channels < 16; channels++) {
3069 DWConvMicrokernelTester()
3070 .cr(8)
3071 .kr(9)
3072 .channels(channels)
3073 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003074 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003075 }
3076 }
3077
3078 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmax) {
3079 TEST_REQUIRES_ARM_NEON_V8;
3080 for (uint32_t channels = 9; channels < 16; channels++) {
3081 DWConvMicrokernelTester()
3082 .cr(8)
3083 .kr(9)
3084 .channels(channels)
3085 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003086 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003087 }
3088 }
3089
3090 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel) {
3091 TEST_REQUIRES_ARM_NEON_V8;
3092 for (size_t channels = 1; channels <= 40; channels += 7) {
3093 DWConvMicrokernelTester()
3094 .cr(8)
3095 .kr(9)
3096 .channels(channels)
3097 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003099 }
3100 }
3101
3102 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_step) {
3103 TEST_REQUIRES_ARM_NEON_V8;
3104 for (size_t channels = 1; channels <= 40; channels += 7) {
3105 for (size_t step = 2; step <= 9; step++) {
3106 DWConvMicrokernelTester()
3107 .cr(8)
3108 .kr(9)
3109 .channels(channels)
3110 .width(3)
3111 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003112 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003113 }
3114 }
3115 }
3116
3117 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_output_stride) {
3118 TEST_REQUIRES_ARM_NEON_V8;
3119 for (size_t channels = 1; channels <= 40; channels += 7) {
3120 DWConvMicrokernelTester()
3121 .cr(8)
3122 .kr(9)
3123 .channels(8)
3124 .width(5)
3125 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003126 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003127 }
3128 }
3129
3130 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmin) {
3131 TEST_REQUIRES_ARM_NEON_V8;
3132 for (size_t channels = 1; channels <= 40; channels += 7) {
3133 DWConvMicrokernelTester()
3134 .cr(8)
3135 .kr(9)
3136 .channels(channels)
3137 .width(3)
3138 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003139 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003140 }
3141 }
3142
3143 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmax) {
3144 TEST_REQUIRES_ARM_NEON_V8;
3145 for (size_t channels = 1; channels <= 40; channels += 7) {
3146 DWConvMicrokernelTester()
3147 .cr(8)
3148 .kr(9)
3149 .channels(channels)
3150 .width(3)
3151 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003152 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003153 }
3154 }
3155
3156 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, input_offset) {
3157 TEST_REQUIRES_ARM_NEON_V8;
3158 for (uint32_t channels = 16; channels < 128; channels += 24) {
3159 DWConvMicrokernelTester()
3160 .cr(8)
3161 .kr(9)
3162 .channels(channels)
3163 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08003164 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003165 }
3166 }
3167
3168 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, zero) {
3169 TEST_REQUIRES_ARM_NEON_V8;
3170 for (uint32_t mz = 0; mz < 9; mz++) {
3171 for (uint32_t channels = 16; channels < 128; channels += 24) {
3172 DWConvMicrokernelTester()
3173 .cr(8)
3174 .kr(9)
3175 .channels(channels)
3176 .input_offset(176)
3177 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003179 }
3180 }
3181 }
3182#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3183
3184
3185#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3186 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_eq_16) {
3187 TEST_REQUIRES_ARM_NEON_V8;
3188 DWConvMicrokernelTester()
3189 .cr(16)
3190 .kr(9)
3191 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003192 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003193 }
3194
3195 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16) {
3196 TEST_REQUIRES_ARM_NEON_V8;
3197 for (uint32_t channels = 32; channels < 256; channels += 48) {
3198 DWConvMicrokernelTester()
3199 .cr(16)
3200 .kr(9)
3201 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003203 }
3204 }
3205
3206 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmin) {
3207 TEST_REQUIRES_ARM_NEON_V8;
3208 for (uint32_t channels = 32; channels < 256; channels += 48) {
3209 DWConvMicrokernelTester()
3210 .cr(16)
3211 .kr(9)
3212 .channels(channels)
3213 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003215 }
3216 }
3217
3218 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmax) {
3219 TEST_REQUIRES_ARM_NEON_V8;
3220 for (uint32_t channels = 32; channels < 256; channels += 48) {
3221 DWConvMicrokernelTester()
3222 .cr(16)
3223 .kr(9)
3224 .channels(channels)
3225 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003226 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003227 }
3228 }
3229
3230 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_lt_16) {
3231 TEST_REQUIRES_ARM_NEON_V8;
3232 for (uint32_t channels = 1; channels < 16; channels++) {
3233 DWConvMicrokernelTester()
3234 .cr(16)
3235 .kr(9)
3236 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003237 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003238 }
3239 }
3240
3241 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16) {
3242 TEST_REQUIRES_ARM_NEON_V8;
3243 for (uint32_t channels = 17; channels < 32; channels++) {
3244 DWConvMicrokernelTester()
3245 .cr(16)
3246 .kr(9)
3247 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003249 }
3250 }
3251
3252 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmin) {
3253 TEST_REQUIRES_ARM_NEON_V8;
3254 for (uint32_t channels = 17; channels < 32; channels++) {
3255 DWConvMicrokernelTester()
3256 .cr(16)
3257 .kr(9)
3258 .channels(channels)
3259 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003260 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003261 }
3262 }
3263
3264 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmax) {
3265 TEST_REQUIRES_ARM_NEON_V8;
3266 for (uint32_t channels = 17; channels < 32; channels++) {
3267 DWConvMicrokernelTester()
3268 .cr(16)
3269 .kr(9)
3270 .channels(channels)
3271 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003272 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003273 }
3274 }
3275
3276 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel) {
3277 TEST_REQUIRES_ARM_NEON_V8;
3278 for (size_t channels = 1; channels <= 80; channels += 15) {
3279 DWConvMicrokernelTester()
3280 .cr(16)
3281 .kr(9)
3282 .channels(channels)
3283 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003284 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003285 }
3286 }
3287
3288 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_step) {
3289 TEST_REQUIRES_ARM_NEON_V8;
3290 for (size_t channels = 1; channels <= 80; channels += 15) {
3291 for (size_t step = 2; step <= 9; step++) {
3292 DWConvMicrokernelTester()
3293 .cr(16)
3294 .kr(9)
3295 .channels(channels)
3296 .width(3)
3297 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003298 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003299 }
3300 }
3301 }
3302
3303 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_output_stride) {
3304 TEST_REQUIRES_ARM_NEON_V8;
3305 for (size_t channels = 1; channels <= 80; channels += 15) {
3306 DWConvMicrokernelTester()
3307 .cr(16)
3308 .kr(9)
3309 .channels(16)
3310 .width(5)
3311 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003312 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003313 }
3314 }
3315
3316 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmin) {
3317 TEST_REQUIRES_ARM_NEON_V8;
3318 for (size_t channels = 1; channels <= 80; channels += 15) {
3319 DWConvMicrokernelTester()
3320 .cr(16)
3321 .kr(9)
3322 .channels(channels)
3323 .width(3)
3324 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003325 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003326 }
3327 }
3328
3329 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmax) {
3330 TEST_REQUIRES_ARM_NEON_V8;
3331 for (size_t channels = 1; channels <= 80; channels += 15) {
3332 DWConvMicrokernelTester()
3333 .cr(16)
3334 .kr(9)
3335 .channels(channels)
3336 .width(3)
3337 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003338 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003339 }
3340 }
3341
3342 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, input_offset) {
3343 TEST_REQUIRES_ARM_NEON_V8;
3344 for (uint32_t channels = 32; channels < 256; channels += 48) {
3345 DWConvMicrokernelTester()
3346 .cr(16)
3347 .kr(9)
3348 .channels(channels)
3349 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08003350 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003351 }
3352 }
3353
3354 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, zero) {
3355 TEST_REQUIRES_ARM_NEON_V8;
3356 for (uint32_t mz = 0; mz < 9; mz++) {
3357 for (uint32_t channels = 32; channels < 256; channels += 48) {
3358 DWConvMicrokernelTester()
3359 .cr(16)
3360 .kr(9)
3361 .channels(channels)
3362 .input_offset(304)
3363 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003365 }
3366 }
3367 }
3368#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3369
3370
3371#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3372 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_eq_24) {
3373 TEST_REQUIRES_ARM_NEON_V8;
3374 DWConvMicrokernelTester()
3375 .cr(24)
3376 .kr(9)
3377 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08003378 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003379 }
3380
3381 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24) {
3382 TEST_REQUIRES_ARM_NEON_V8;
3383 for (uint32_t channels = 48; channels < 384; channels += 72) {
3384 DWConvMicrokernelTester()
3385 .cr(24)
3386 .kr(9)
3387 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003389 }
3390 }
3391
3392 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmin) {
3393 TEST_REQUIRES_ARM_NEON_V8;
3394 for (uint32_t channels = 48; channels < 384; channels += 72) {
3395 DWConvMicrokernelTester()
3396 .cr(24)
3397 .kr(9)
3398 .channels(channels)
3399 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003401 }
3402 }
3403
3404 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmax) {
3405 TEST_REQUIRES_ARM_NEON_V8;
3406 for (uint32_t channels = 48; channels < 384; channels += 72) {
3407 DWConvMicrokernelTester()
3408 .cr(24)
3409 .kr(9)
3410 .channels(channels)
3411 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003412 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003413 }
3414 }
3415
3416 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_lt_24) {
3417 TEST_REQUIRES_ARM_NEON_V8;
3418 for (uint32_t channels = 1; channels < 24; channels++) {
3419 DWConvMicrokernelTester()
3420 .cr(24)
3421 .kr(9)
3422 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003423 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003424 }
3425 }
3426
3427 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24) {
3428 TEST_REQUIRES_ARM_NEON_V8;
3429 for (uint32_t channels = 25; channels < 48; channels++) {
3430 DWConvMicrokernelTester()
3431 .cr(24)
3432 .kr(9)
3433 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003435 }
3436 }
3437
3438 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmin) {
3439 TEST_REQUIRES_ARM_NEON_V8;
3440 for (uint32_t channels = 25; channels < 48; channels++) {
3441 DWConvMicrokernelTester()
3442 .cr(24)
3443 .kr(9)
3444 .channels(channels)
3445 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003446 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003447 }
3448 }
3449
3450 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmax) {
3451 TEST_REQUIRES_ARM_NEON_V8;
3452 for (uint32_t channels = 25; channels < 48; channels++) {
3453 DWConvMicrokernelTester()
3454 .cr(24)
3455 .kr(9)
3456 .channels(channels)
3457 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003459 }
3460 }
3461
3462 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel) {
3463 TEST_REQUIRES_ARM_NEON_V8;
3464 for (size_t channels = 1; channels <= 120; channels += 23) {
3465 DWConvMicrokernelTester()
3466 .cr(24)
3467 .kr(9)
3468 .channels(channels)
3469 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003471 }
3472 }
3473
3474 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_step) {
3475 TEST_REQUIRES_ARM_NEON_V8;
3476 for (size_t channels = 1; channels <= 120; channels += 23) {
3477 for (size_t step = 2; step <= 9; step++) {
3478 DWConvMicrokernelTester()
3479 .cr(24)
3480 .kr(9)
3481 .channels(channels)
3482 .width(3)
3483 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003484 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003485 }
3486 }
3487 }
3488
3489 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_output_stride) {
3490 TEST_REQUIRES_ARM_NEON_V8;
3491 for (size_t channels = 1; channels <= 120; channels += 23) {
3492 DWConvMicrokernelTester()
3493 .cr(24)
3494 .kr(9)
3495 .channels(24)
3496 .width(5)
3497 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08003498 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003499 }
3500 }
3501
3502 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmin) {
3503 TEST_REQUIRES_ARM_NEON_V8;
3504 for (size_t channels = 1; channels <= 120; channels += 23) {
3505 DWConvMicrokernelTester()
3506 .cr(24)
3507 .kr(9)
3508 .channels(channels)
3509 .width(3)
3510 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003511 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003512 }
3513 }
3514
3515 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmax) {
3516 TEST_REQUIRES_ARM_NEON_V8;
3517 for (size_t channels = 1; channels <= 120; channels += 23) {
3518 DWConvMicrokernelTester()
3519 .cr(24)
3520 .kr(9)
3521 .channels(channels)
3522 .width(3)
3523 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003524 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003525 }
3526 }
3527
3528 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, input_offset) {
3529 TEST_REQUIRES_ARM_NEON_V8;
3530 for (uint32_t channels = 48; channels < 384; channels += 72) {
3531 DWConvMicrokernelTester()
3532 .cr(24)
3533 .kr(9)
3534 .channels(channels)
3535 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08003536 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003537 }
3538 }
3539
3540 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, zero) {
3541 TEST_REQUIRES_ARM_NEON_V8;
3542 for (uint32_t mz = 0; mz < 9; mz++) {
3543 for (uint32_t channels = 48; channels < 384; channels += 72) {
3544 DWConvMicrokernelTester()
3545 .cr(24)
3546 .kr(9)
3547 .channels(channels)
3548 .input_offset(464)
3549 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003551 }
3552 }
3553 }
3554#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3555
3556
3557#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3558 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_eq_32) {
3559 TEST_REQUIRES_ARM_NEON_V8;
3560 DWConvMicrokernelTester()
3561 .cr(32)
3562 .kr(9)
3563 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08003564 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003565 }
3566
3567 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32) {
3568 TEST_REQUIRES_ARM_NEON_V8;
3569 for (uint32_t channels = 64; channels < 512; channels += 96) {
3570 DWConvMicrokernelTester()
3571 .cr(32)
3572 .kr(9)
3573 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003575 }
3576 }
3577
3578 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmin) {
3579 TEST_REQUIRES_ARM_NEON_V8;
3580 for (uint32_t channels = 64; channels < 512; channels += 96) {
3581 DWConvMicrokernelTester()
3582 .cr(32)
3583 .kr(9)
3584 .channels(channels)
3585 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003587 }
3588 }
3589
3590 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmax) {
3591 TEST_REQUIRES_ARM_NEON_V8;
3592 for (uint32_t channels = 64; channels < 512; channels += 96) {
3593 DWConvMicrokernelTester()
3594 .cr(32)
3595 .kr(9)
3596 .channels(channels)
3597 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003599 }
3600 }
3601
3602 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_lt_32) {
3603 TEST_REQUIRES_ARM_NEON_V8;
3604 for (uint32_t channels = 1; channels < 32; channels++) {
3605 DWConvMicrokernelTester()
3606 .cr(32)
3607 .kr(9)
3608 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003609 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003610 }
3611 }
3612
3613 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32) {
3614 TEST_REQUIRES_ARM_NEON_V8;
3615 for (uint32_t channels = 33; channels < 64; channels++) {
3616 DWConvMicrokernelTester()
3617 .cr(32)
3618 .kr(9)
3619 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003621 }
3622 }
3623
3624 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmin) {
3625 TEST_REQUIRES_ARM_NEON_V8;
3626 for (uint32_t channels = 33; channels < 64; channels++) {
3627 DWConvMicrokernelTester()
3628 .cr(32)
3629 .kr(9)
3630 .channels(channels)
3631 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003632 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003633 }
3634 }
3635
3636 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmax) {
3637 TEST_REQUIRES_ARM_NEON_V8;
3638 for (uint32_t channels = 33; channels < 64; channels++) {
3639 DWConvMicrokernelTester()
3640 .cr(32)
3641 .kr(9)
3642 .channels(channels)
3643 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003645 }
3646 }
3647
3648 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel) {
3649 TEST_REQUIRES_ARM_NEON_V8;
3650 for (size_t channels = 1; channels <= 160; channels += 31) {
3651 DWConvMicrokernelTester()
3652 .cr(32)
3653 .kr(9)
3654 .channels(channels)
3655 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003657 }
3658 }
3659
3660 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_step) {
3661 TEST_REQUIRES_ARM_NEON_V8;
3662 for (size_t channels = 1; channels <= 160; channels += 31) {
3663 for (size_t step = 2; step <= 9; step++) {
3664 DWConvMicrokernelTester()
3665 .cr(32)
3666 .kr(9)
3667 .channels(channels)
3668 .width(3)
3669 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003670 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003671 }
3672 }
3673 }
3674
3675 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_output_stride) {
3676 TEST_REQUIRES_ARM_NEON_V8;
3677 for (size_t channels = 1; channels <= 160; channels += 31) {
3678 DWConvMicrokernelTester()
3679 .cr(32)
3680 .kr(9)
3681 .channels(32)
3682 .width(5)
3683 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08003684 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003685 }
3686 }
3687
3688 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmin) {
3689 TEST_REQUIRES_ARM_NEON_V8;
3690 for (size_t channels = 1; channels <= 160; channels += 31) {
3691 DWConvMicrokernelTester()
3692 .cr(32)
3693 .kr(9)
3694 .channels(channels)
3695 .width(3)
3696 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003697 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003698 }
3699 }
3700
3701 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmax) {
3702 TEST_REQUIRES_ARM_NEON_V8;
3703 for (size_t channels = 1; channels <= 160; channels += 31) {
3704 DWConvMicrokernelTester()
3705 .cr(32)
3706 .kr(9)
3707 .channels(channels)
3708 .width(3)
3709 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003710 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003711 }
3712 }
3713
3714 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, input_offset) {
3715 TEST_REQUIRES_ARM_NEON_V8;
3716 for (uint32_t channels = 64; channels < 512; channels += 96) {
3717 DWConvMicrokernelTester()
3718 .cr(32)
3719 .kr(9)
3720 .channels(channels)
3721 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08003722 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003723 }
3724 }
3725
3726 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, zero) {
3727 TEST_REQUIRES_ARM_NEON_V8;
3728 for (uint32_t mz = 0; mz < 9; mz++) {
3729 for (uint32_t channels = 64; channels < 512; channels += 96) {
3730 DWConvMicrokernelTester()
3731 .cr(32)
3732 .kr(9)
3733 .channels(channels)
3734 .input_offset(592)
3735 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -07003737 }
3738 }
3739 }
3740#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3741
3742
Marat Dukhan82286892021-06-04 17:27:27 -07003743#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan98042f22021-06-15 00:43:13 -07003744 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_eq_8) {
3745 TEST_REQUIRES_X86_SSE2;
3746 DWConvMicrokernelTester()
3747 .cr(8)
3748 .kr(9)
3749 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003750 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003751 }
3752
3753 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8) {
3754 TEST_REQUIRES_X86_SSE2;
3755 for (uint32_t channels = 16; channels < 128; channels += 24) {
3756 DWConvMicrokernelTester()
3757 .cr(8)
3758 .kr(9)
3759 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003761 }
3762 }
3763
3764 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
3765 TEST_REQUIRES_X86_SSE2;
3766 for (uint32_t channels = 16; channels < 128; channels += 24) {
3767 DWConvMicrokernelTester()
3768 .cr(8)
3769 .kr(9)
3770 .channels(channels)
3771 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003772 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003773 }
3774 }
3775
3776 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
3777 TEST_REQUIRES_X86_SSE2;
3778 for (uint32_t channels = 16; channels < 128; channels += 24) {
3779 DWConvMicrokernelTester()
3780 .cr(8)
3781 .kr(9)
3782 .channels(channels)
3783 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003784 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003785 }
3786 }
3787
3788 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_lt_8) {
3789 TEST_REQUIRES_X86_SSE2;
3790 for (uint32_t channels = 1; channels < 8; channels++) {
3791 DWConvMicrokernelTester()
3792 .cr(8)
3793 .kr(9)
3794 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003795 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003796 }
3797 }
3798
3799 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8) {
3800 TEST_REQUIRES_X86_SSE2;
3801 for (uint32_t channels = 9; channels < 16; channels++) {
3802 DWConvMicrokernelTester()
3803 .cr(8)
3804 .kr(9)
3805 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003807 }
3808 }
3809
3810 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
3811 TEST_REQUIRES_X86_SSE2;
3812 for (uint32_t channels = 9; channels < 16; channels++) {
3813 DWConvMicrokernelTester()
3814 .cr(8)
3815 .kr(9)
3816 .channels(channels)
3817 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003818 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003819 }
3820 }
3821
3822 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
3823 TEST_REQUIRES_X86_SSE2;
3824 for (uint32_t channels = 9; channels < 16; channels++) {
3825 DWConvMicrokernelTester()
3826 .cr(8)
3827 .kr(9)
3828 .channels(channels)
3829 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003831 }
3832 }
3833
3834 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel) {
3835 TEST_REQUIRES_X86_SSE2;
3836 for (size_t channels = 1; channels <= 40; channels += 7) {
3837 DWConvMicrokernelTester()
3838 .cr(8)
3839 .kr(9)
3840 .channels(channels)
3841 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003842 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003843 }
3844 }
3845
3846 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_step) {
3847 TEST_REQUIRES_X86_SSE2;
3848 for (size_t channels = 1; channels <= 40; channels += 7) {
3849 for (size_t step = 2; step <= 9; step++) {
3850 DWConvMicrokernelTester()
3851 .cr(8)
3852 .kr(9)
3853 .channels(channels)
3854 .width(3)
3855 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08003856 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003857 }
3858 }
3859 }
3860
3861 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
3862 TEST_REQUIRES_X86_SSE2;
3863 for (size_t channels = 1; channels <= 40; channels += 7) {
3864 DWConvMicrokernelTester()
3865 .cr(8)
3866 .kr(9)
3867 .channels(8)
3868 .width(5)
3869 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003870 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003871 }
3872 }
3873
3874 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
3875 TEST_REQUIRES_X86_SSE2;
3876 for (size_t channels = 1; channels <= 40; channels += 7) {
3877 DWConvMicrokernelTester()
3878 .cr(8)
3879 .kr(9)
3880 .channels(channels)
3881 .width(3)
3882 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003883 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003884 }
3885 }
3886
3887 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
3888 TEST_REQUIRES_X86_SSE2;
3889 for (size_t channels = 1; channels <= 40; channels += 7) {
3890 DWConvMicrokernelTester()
3891 .cr(8)
3892 .kr(9)
3893 .channels(channels)
3894 .width(3)
3895 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003896 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003897 }
3898 }
3899
3900 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, input_offset) {
3901 TEST_REQUIRES_X86_SSE2;
3902 for (uint32_t channels = 16; channels < 128; channels += 24) {
3903 DWConvMicrokernelTester()
3904 .cr(8)
3905 .kr(9)
3906 .channels(channels)
3907 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08003908 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003909 }
3910 }
3911
3912 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, zero) {
3913 TEST_REQUIRES_X86_SSE2;
3914 for (uint32_t mz = 0; mz < 9; mz++) {
3915 for (uint32_t channels = 16; channels < 128; channels += 24) {
3916 DWConvMicrokernelTester()
3917 .cr(8)
3918 .kr(9)
3919 .channels(channels)
3920 .input_offset(176)
3921 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003923 }
3924 }
3925 }
3926#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3927
3928
3929#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3930 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_eq_16) {
3931 TEST_REQUIRES_X86_SSE2;
3932 DWConvMicrokernelTester()
3933 .cr(16)
3934 .kr(9)
3935 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003936 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003937 }
3938
3939 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16) {
3940 TEST_REQUIRES_X86_SSE2;
3941 for (uint32_t channels = 32; channels < 256; channels += 48) {
3942 DWConvMicrokernelTester()
3943 .cr(16)
3944 .kr(9)
3945 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003947 }
3948 }
3949
3950 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
3951 TEST_REQUIRES_X86_SSE2;
3952 for (uint32_t channels = 32; channels < 256; channels += 48) {
3953 DWConvMicrokernelTester()
3954 .cr(16)
3955 .kr(9)
3956 .channels(channels)
3957 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003959 }
3960 }
3961
3962 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
3963 TEST_REQUIRES_X86_SSE2;
3964 for (uint32_t channels = 32; channels < 256; channels += 48) {
3965 DWConvMicrokernelTester()
3966 .cr(16)
3967 .kr(9)
3968 .channels(channels)
3969 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003970 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003971 }
3972 }
3973
3974 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_lt_16) {
3975 TEST_REQUIRES_X86_SSE2;
3976 for (uint32_t channels = 1; channels < 16; channels++) {
3977 DWConvMicrokernelTester()
3978 .cr(16)
3979 .kr(9)
3980 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003981 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003982 }
3983 }
3984
3985 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16) {
3986 TEST_REQUIRES_X86_SSE2;
3987 for (uint32_t channels = 17; channels < 32; channels++) {
3988 DWConvMicrokernelTester()
3989 .cr(16)
3990 .kr(9)
3991 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08003992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07003993 }
3994 }
3995
3996 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
3997 TEST_REQUIRES_X86_SSE2;
3998 for (uint32_t channels = 17; channels < 32; channels++) {
3999 DWConvMicrokernelTester()
4000 .cr(16)
4001 .kr(9)
4002 .channels(channels)
4003 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004004 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004005 }
4006 }
4007
4008 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
4009 TEST_REQUIRES_X86_SSE2;
4010 for (uint32_t channels = 17; channels < 32; channels++) {
4011 DWConvMicrokernelTester()
4012 .cr(16)
4013 .kr(9)
4014 .channels(channels)
4015 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004017 }
4018 }
4019
4020 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel) {
4021 TEST_REQUIRES_X86_SSE2;
4022 for (size_t channels = 1; channels <= 80; channels += 15) {
4023 DWConvMicrokernelTester()
4024 .cr(16)
4025 .kr(9)
4026 .channels(channels)
4027 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004028 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004029 }
4030 }
4031
4032 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_step) {
4033 TEST_REQUIRES_X86_SSE2;
4034 for (size_t channels = 1; channels <= 80; channels += 15) {
4035 for (size_t step = 2; step <= 9; step++) {
4036 DWConvMicrokernelTester()
4037 .cr(16)
4038 .kr(9)
4039 .channels(channels)
4040 .width(3)
4041 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004042 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004043 }
4044 }
4045 }
4046
4047 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
4048 TEST_REQUIRES_X86_SSE2;
4049 for (size_t channels = 1; channels <= 80; channels += 15) {
4050 DWConvMicrokernelTester()
4051 .cr(16)
4052 .kr(9)
4053 .channels(16)
4054 .width(5)
4055 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004056 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004057 }
4058 }
4059
4060 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
4061 TEST_REQUIRES_X86_SSE2;
4062 for (size_t channels = 1; channels <= 80; channels += 15) {
4063 DWConvMicrokernelTester()
4064 .cr(16)
4065 .kr(9)
4066 .channels(channels)
4067 .width(3)
4068 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004069 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004070 }
4071 }
4072
4073 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
4074 TEST_REQUIRES_X86_SSE2;
4075 for (size_t channels = 1; channels <= 80; channels += 15) {
4076 DWConvMicrokernelTester()
4077 .cr(16)
4078 .kr(9)
4079 .channels(channels)
4080 .width(3)
4081 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004082 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004083 }
4084 }
4085
4086 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, input_offset) {
4087 TEST_REQUIRES_X86_SSE2;
4088 for (uint32_t channels = 32; channels < 256; channels += 48) {
4089 DWConvMicrokernelTester()
4090 .cr(16)
4091 .kr(9)
4092 .channels(channels)
4093 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08004094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004095 }
4096 }
4097
4098 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, zero) {
4099 TEST_REQUIRES_X86_SSE2;
4100 for (uint32_t mz = 0; mz < 9; mz++) {
4101 for (uint32_t channels = 32; channels < 256; channels += 48) {
4102 DWConvMicrokernelTester()
4103 .cr(16)
4104 .kr(9)
4105 .channels(channels)
4106 .input_offset(304)
4107 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004109 }
4110 }
4111 }
4112#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4113
4114
4115#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4116 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_eq_24) {
4117 TEST_REQUIRES_X86_SSE2;
4118 DWConvMicrokernelTester()
4119 .cr(24)
4120 .kr(9)
4121 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08004122 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004123 }
4124
4125 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24) {
4126 TEST_REQUIRES_X86_SSE2;
4127 for (uint32_t channels = 48; channels < 384; channels += 72) {
4128 DWConvMicrokernelTester()
4129 .cr(24)
4130 .kr(9)
4131 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004133 }
4134 }
4135
4136 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmin) {
4137 TEST_REQUIRES_X86_SSE2;
4138 for (uint32_t channels = 48; channels < 384; channels += 72) {
4139 DWConvMicrokernelTester()
4140 .cr(24)
4141 .kr(9)
4142 .channels(channels)
4143 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004145 }
4146 }
4147
4148 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmax) {
4149 TEST_REQUIRES_X86_SSE2;
4150 for (uint32_t channels = 48; channels < 384; channels += 72) {
4151 DWConvMicrokernelTester()
4152 .cr(24)
4153 .kr(9)
4154 .channels(channels)
4155 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004156 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004157 }
4158 }
4159
4160 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_lt_24) {
4161 TEST_REQUIRES_X86_SSE2;
4162 for (uint32_t channels = 1; channels < 24; channels++) {
4163 DWConvMicrokernelTester()
4164 .cr(24)
4165 .kr(9)
4166 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004167 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004168 }
4169 }
4170
4171 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24) {
4172 TEST_REQUIRES_X86_SSE2;
4173 for (uint32_t channels = 25; channels < 48; channels++) {
4174 DWConvMicrokernelTester()
4175 .cr(24)
4176 .kr(9)
4177 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004179 }
4180 }
4181
4182 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmin) {
4183 TEST_REQUIRES_X86_SSE2;
4184 for (uint32_t channels = 25; channels < 48; channels++) {
4185 DWConvMicrokernelTester()
4186 .cr(24)
4187 .kr(9)
4188 .channels(channels)
4189 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004190 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004191 }
4192 }
4193
4194 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmax) {
4195 TEST_REQUIRES_X86_SSE2;
4196 for (uint32_t channels = 25; channels < 48; channels++) {
4197 DWConvMicrokernelTester()
4198 .cr(24)
4199 .kr(9)
4200 .channels(channels)
4201 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004203 }
4204 }
4205
4206 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel) {
4207 TEST_REQUIRES_X86_SSE2;
4208 for (size_t channels = 1; channels <= 120; channels += 23) {
4209 DWConvMicrokernelTester()
4210 .cr(24)
4211 .kr(9)
4212 .channels(channels)
4213 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004215 }
4216 }
4217
4218 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_step) {
4219 TEST_REQUIRES_X86_SSE2;
4220 for (size_t channels = 1; channels <= 120; channels += 23) {
4221 for (size_t step = 2; step <= 9; step++) {
4222 DWConvMicrokernelTester()
4223 .cr(24)
4224 .kr(9)
4225 .channels(channels)
4226 .width(3)
4227 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004228 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004229 }
4230 }
4231 }
4232
4233 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_output_stride) {
4234 TEST_REQUIRES_X86_SSE2;
4235 for (size_t channels = 1; channels <= 120; channels += 23) {
4236 DWConvMicrokernelTester()
4237 .cr(24)
4238 .kr(9)
4239 .channels(24)
4240 .width(5)
4241 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08004242 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004243 }
4244 }
4245
4246 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmin) {
4247 TEST_REQUIRES_X86_SSE2;
4248 for (size_t channels = 1; channels <= 120; channels += 23) {
4249 DWConvMicrokernelTester()
4250 .cr(24)
4251 .kr(9)
4252 .channels(channels)
4253 .width(3)
4254 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004255 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004256 }
4257 }
4258
4259 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmax) {
4260 TEST_REQUIRES_X86_SSE2;
4261 for (size_t channels = 1; channels <= 120; channels += 23) {
4262 DWConvMicrokernelTester()
4263 .cr(24)
4264 .kr(9)
4265 .channels(channels)
4266 .width(3)
4267 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004268 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004269 }
4270 }
4271
4272 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, input_offset) {
4273 TEST_REQUIRES_X86_SSE2;
4274 for (uint32_t channels = 48; channels < 384; channels += 72) {
4275 DWConvMicrokernelTester()
4276 .cr(24)
4277 .kr(9)
4278 .channels(channels)
4279 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08004280 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004281 }
4282 }
4283
4284 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, zero) {
4285 TEST_REQUIRES_X86_SSE2;
4286 for (uint32_t mz = 0; mz < 9; mz++) {
4287 for (uint32_t channels = 48; channels < 384; channels += 72) {
4288 DWConvMicrokernelTester()
4289 .cr(24)
4290 .kr(9)
4291 .channels(channels)
4292 .input_offset(464)
4293 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004295 }
4296 }
4297 }
4298#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4299
4300
4301#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -07004302 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_eq_8) {
4303 TEST_REQUIRES_X86_SSE2;
4304 DWConvMicrokernelTester()
4305 .cr(8)
4306 .kr(9)
4307 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004308 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004309 }
4310
4311 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8) {
4312 TEST_REQUIRES_X86_SSE2;
4313 for (uint32_t channels = 16; channels < 128; channels += 24) {
4314 DWConvMicrokernelTester()
4315 .cr(8)
4316 .kr(9)
4317 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004319 }
4320 }
4321
4322 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
4323 TEST_REQUIRES_X86_SSE2;
4324 for (uint32_t channels = 16; channels < 128; channels += 24) {
4325 DWConvMicrokernelTester()
4326 .cr(8)
4327 .kr(9)
4328 .channels(channels)
4329 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004331 }
4332 }
4333
4334 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
4335 TEST_REQUIRES_X86_SSE2;
4336 for (uint32_t channels = 16; channels < 128; channels += 24) {
4337 DWConvMicrokernelTester()
4338 .cr(8)
4339 .kr(9)
4340 .channels(channels)
4341 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004342 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004343 }
4344 }
4345
4346 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_lt_8) {
4347 TEST_REQUIRES_X86_SSE2;
4348 for (uint32_t channels = 1; channels < 8; channels++) {
4349 DWConvMicrokernelTester()
4350 .cr(8)
4351 .kr(9)
4352 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004353 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004354 }
4355 }
4356
4357 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8) {
4358 TEST_REQUIRES_X86_SSE2;
4359 for (uint32_t channels = 9; channels < 16; channels++) {
4360 DWConvMicrokernelTester()
4361 .cr(8)
4362 .kr(9)
4363 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004365 }
4366 }
4367
4368 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
4369 TEST_REQUIRES_X86_SSE2;
4370 for (uint32_t channels = 9; channels < 16; channels++) {
4371 DWConvMicrokernelTester()
4372 .cr(8)
4373 .kr(9)
4374 .channels(channels)
4375 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004376 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004377 }
4378 }
4379
4380 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
4381 TEST_REQUIRES_X86_SSE2;
4382 for (uint32_t channels = 9; channels < 16; channels++) {
4383 DWConvMicrokernelTester()
4384 .cr(8)
4385 .kr(9)
4386 .channels(channels)
4387 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004389 }
4390 }
4391
4392 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel) {
4393 TEST_REQUIRES_X86_SSE2;
4394 for (size_t channels = 1; channels <= 40; channels += 7) {
4395 DWConvMicrokernelTester()
4396 .cr(8)
4397 .kr(9)
4398 .channels(channels)
4399 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004401 }
4402 }
4403
4404 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_step) {
4405 TEST_REQUIRES_X86_SSE2;
4406 for (size_t channels = 1; channels <= 40; channels += 7) {
4407 for (size_t step = 2; step <= 9; step++) {
4408 DWConvMicrokernelTester()
4409 .cr(8)
4410 .kr(9)
4411 .channels(channels)
4412 .width(3)
4413 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004414 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004415 }
4416 }
4417 }
4418
4419 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
4420 TEST_REQUIRES_X86_SSE2;
4421 for (size_t channels = 1; channels <= 40; channels += 7) {
4422 DWConvMicrokernelTester()
4423 .cr(8)
4424 .kr(9)
4425 .channels(8)
4426 .width(5)
4427 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004428 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004429 }
4430 }
4431
4432 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
4433 TEST_REQUIRES_X86_SSE2;
4434 for (size_t channels = 1; channels <= 40; channels += 7) {
4435 DWConvMicrokernelTester()
4436 .cr(8)
4437 .kr(9)
4438 .channels(channels)
4439 .width(3)
4440 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004441 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004442 }
4443 }
4444
4445 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
4446 TEST_REQUIRES_X86_SSE2;
4447 for (size_t channels = 1; channels <= 40; channels += 7) {
4448 DWConvMicrokernelTester()
4449 .cr(8)
4450 .kr(9)
4451 .channels(channels)
4452 .width(3)
4453 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004454 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004455 }
4456 }
4457
4458 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, input_offset) {
4459 TEST_REQUIRES_X86_SSE2;
4460 for (uint32_t channels = 16; channels < 128; channels += 24) {
4461 DWConvMicrokernelTester()
4462 .cr(8)
4463 .kr(9)
4464 .channels(channels)
4465 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08004466 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004467 }
4468 }
4469
4470 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, zero) {
4471 TEST_REQUIRES_X86_SSE2;
4472 for (uint32_t mz = 0; mz < 9; mz++) {
4473 for (uint32_t channels = 16; channels < 128; channels += 24) {
4474 DWConvMicrokernelTester()
4475 .cr(8)
4476 .kr(9)
4477 .channels(channels)
4478 .input_offset(176)
4479 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004481 }
4482 }
4483 }
4484#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4485
4486
4487#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4488 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_eq_16) {
4489 TEST_REQUIRES_X86_SSE2;
4490 DWConvMicrokernelTester()
4491 .cr(16)
4492 .kr(9)
4493 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004494 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004495 }
4496
4497 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16) {
4498 TEST_REQUIRES_X86_SSE2;
4499 for (uint32_t channels = 32; channels < 256; channels += 48) {
4500 DWConvMicrokernelTester()
4501 .cr(16)
4502 .kr(9)
4503 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004505 }
4506 }
4507
4508 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
4509 TEST_REQUIRES_X86_SSE2;
4510 for (uint32_t channels = 32; channels < 256; channels += 48) {
4511 DWConvMicrokernelTester()
4512 .cr(16)
4513 .kr(9)
4514 .channels(channels)
4515 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004516 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004517 }
4518 }
4519
4520 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
4521 TEST_REQUIRES_X86_SSE2;
4522 for (uint32_t channels = 32; channels < 256; channels += 48) {
4523 DWConvMicrokernelTester()
4524 .cr(16)
4525 .kr(9)
4526 .channels(channels)
4527 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004528 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004529 }
4530 }
4531
4532 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_lt_16) {
4533 TEST_REQUIRES_X86_SSE2;
4534 for (uint32_t channels = 1; channels < 16; channels++) {
4535 DWConvMicrokernelTester()
4536 .cr(16)
4537 .kr(9)
4538 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004539 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004540 }
4541 }
4542
4543 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16) {
4544 TEST_REQUIRES_X86_SSE2;
4545 for (uint32_t channels = 17; channels < 32; channels++) {
4546 DWConvMicrokernelTester()
4547 .cr(16)
4548 .kr(9)
4549 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004551 }
4552 }
4553
4554 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
4555 TEST_REQUIRES_X86_SSE2;
4556 for (uint32_t channels = 17; channels < 32; channels++) {
4557 DWConvMicrokernelTester()
4558 .cr(16)
4559 .kr(9)
4560 .channels(channels)
4561 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004562 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004563 }
4564 }
4565
4566 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
4567 TEST_REQUIRES_X86_SSE2;
4568 for (uint32_t channels = 17; channels < 32; channels++) {
4569 DWConvMicrokernelTester()
4570 .cr(16)
4571 .kr(9)
4572 .channels(channels)
4573 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004575 }
4576 }
4577
4578 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel) {
4579 TEST_REQUIRES_X86_SSE2;
4580 for (size_t channels = 1; channels <= 80; channels += 15) {
4581 DWConvMicrokernelTester()
4582 .cr(16)
4583 .kr(9)
4584 .channels(channels)
4585 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004587 }
4588 }
4589
4590 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_step) {
4591 TEST_REQUIRES_X86_SSE2;
4592 for (size_t channels = 1; channels <= 80; channels += 15) {
4593 for (size_t step = 2; step <= 9; step++) {
4594 DWConvMicrokernelTester()
4595 .cr(16)
4596 .kr(9)
4597 .channels(channels)
4598 .width(3)
4599 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004600 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004601 }
4602 }
4603 }
4604
4605 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
4606 TEST_REQUIRES_X86_SSE2;
4607 for (size_t channels = 1; channels <= 80; channels += 15) {
4608 DWConvMicrokernelTester()
4609 .cr(16)
4610 .kr(9)
4611 .channels(16)
4612 .width(5)
4613 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004614 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004615 }
4616 }
4617
4618 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
4619 TEST_REQUIRES_X86_SSE2;
4620 for (size_t channels = 1; channels <= 80; channels += 15) {
4621 DWConvMicrokernelTester()
4622 .cr(16)
4623 .kr(9)
4624 .channels(channels)
4625 .width(3)
4626 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004627 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004628 }
4629 }
4630
4631 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
4632 TEST_REQUIRES_X86_SSE2;
4633 for (size_t channels = 1; channels <= 80; channels += 15) {
4634 DWConvMicrokernelTester()
4635 .cr(16)
4636 .kr(9)
4637 .channels(channels)
4638 .width(3)
4639 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004640 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004641 }
4642 }
4643
4644 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, input_offset) {
4645 TEST_REQUIRES_X86_SSE2;
4646 for (uint32_t channels = 32; channels < 256; channels += 48) {
4647 DWConvMicrokernelTester()
4648 .cr(16)
4649 .kr(9)
4650 .channels(channels)
4651 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08004652 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004653 }
4654 }
4655
4656 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, zero) {
4657 TEST_REQUIRES_X86_SSE2;
4658 for (uint32_t mz = 0; mz < 9; mz++) {
4659 for (uint32_t channels = 32; channels < 256; channels += 48) {
4660 DWConvMicrokernelTester()
4661 .cr(16)
4662 .kr(9)
4663 .channels(channels)
4664 .input_offset(304)
4665 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07004667 }
4668 }
4669 }
4670#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4671
4672
4673#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan98042f22021-06-15 00:43:13 -07004674 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_eq_8) {
4675 TEST_REQUIRES_X86_SSE41;
4676 DWConvMicrokernelTester()
4677 .cr(8)
4678 .kr(9)
4679 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004680 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004681 }
4682
4683 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8) {
4684 TEST_REQUIRES_X86_SSE41;
4685 for (uint32_t channels = 16; channels < 128; channels += 24) {
4686 DWConvMicrokernelTester()
4687 .cr(8)
4688 .kr(9)
4689 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004691 }
4692 }
4693
4694 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
4695 TEST_REQUIRES_X86_SSE41;
4696 for (uint32_t channels = 16; channels < 128; channels += 24) {
4697 DWConvMicrokernelTester()
4698 .cr(8)
4699 .kr(9)
4700 .channels(channels)
4701 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004702 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004703 }
4704 }
4705
4706 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
4707 TEST_REQUIRES_X86_SSE41;
4708 for (uint32_t channels = 16; channels < 128; channels += 24) {
4709 DWConvMicrokernelTester()
4710 .cr(8)
4711 .kr(9)
4712 .channels(channels)
4713 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004714 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004715 }
4716 }
4717
4718 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_lt_8) {
4719 TEST_REQUIRES_X86_SSE41;
4720 for (uint32_t channels = 1; channels < 8; channels++) {
4721 DWConvMicrokernelTester()
4722 .cr(8)
4723 .kr(9)
4724 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004725 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004726 }
4727 }
4728
4729 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8) {
4730 TEST_REQUIRES_X86_SSE41;
4731 for (uint32_t channels = 9; channels < 16; channels++) {
4732 DWConvMicrokernelTester()
4733 .cr(8)
4734 .kr(9)
4735 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004737 }
4738 }
4739
4740 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
4741 TEST_REQUIRES_X86_SSE41;
4742 for (uint32_t channels = 9; channels < 16; channels++) {
4743 DWConvMicrokernelTester()
4744 .cr(8)
4745 .kr(9)
4746 .channels(channels)
4747 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004748 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004749 }
4750 }
4751
4752 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
4753 TEST_REQUIRES_X86_SSE41;
4754 for (uint32_t channels = 9; channels < 16; channels++) {
4755 DWConvMicrokernelTester()
4756 .cr(8)
4757 .kr(9)
4758 .channels(channels)
4759 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004761 }
4762 }
4763
4764 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel) {
4765 TEST_REQUIRES_X86_SSE41;
4766 for (size_t channels = 1; channels <= 40; channels += 7) {
4767 DWConvMicrokernelTester()
4768 .cr(8)
4769 .kr(9)
4770 .channels(channels)
4771 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004772 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004773 }
4774 }
4775
4776 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_step) {
4777 TEST_REQUIRES_X86_SSE41;
4778 for (size_t channels = 1; channels <= 40; channels += 7) {
4779 for (size_t step = 2; step <= 9; step++) {
4780 DWConvMicrokernelTester()
4781 .cr(8)
4782 .kr(9)
4783 .channels(channels)
4784 .width(3)
4785 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004786 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004787 }
4788 }
4789 }
4790
4791 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
4792 TEST_REQUIRES_X86_SSE41;
4793 for (size_t channels = 1; channels <= 40; channels += 7) {
4794 DWConvMicrokernelTester()
4795 .cr(8)
4796 .kr(9)
4797 .channels(8)
4798 .width(5)
4799 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004800 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004801 }
4802 }
4803
4804 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
4805 TEST_REQUIRES_X86_SSE41;
4806 for (size_t channels = 1; channels <= 40; channels += 7) {
4807 DWConvMicrokernelTester()
4808 .cr(8)
4809 .kr(9)
4810 .channels(channels)
4811 .width(3)
4812 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004813 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004814 }
4815 }
4816
4817 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
4818 TEST_REQUIRES_X86_SSE41;
4819 for (size_t channels = 1; channels <= 40; channels += 7) {
4820 DWConvMicrokernelTester()
4821 .cr(8)
4822 .kr(9)
4823 .channels(channels)
4824 .width(3)
4825 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004826 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004827 }
4828 }
4829
4830 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, input_offset) {
4831 TEST_REQUIRES_X86_SSE41;
4832 for (uint32_t channels = 16; channels < 128; channels += 24) {
4833 DWConvMicrokernelTester()
4834 .cr(8)
4835 .kr(9)
4836 .channels(channels)
4837 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08004838 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004839 }
4840 }
4841
4842 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, zero) {
4843 TEST_REQUIRES_X86_SSE41;
4844 for (uint32_t mz = 0; mz < 9; mz++) {
4845 for (uint32_t channels = 16; channels < 128; channels += 24) {
4846 DWConvMicrokernelTester()
4847 .cr(8)
4848 .kr(9)
4849 .channels(channels)
4850 .input_offset(176)
4851 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004853 }
4854 }
4855 }
4856#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4857
4858
4859#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4860 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_eq_16) {
4861 TEST_REQUIRES_X86_SSE41;
4862 DWConvMicrokernelTester()
4863 .cr(16)
4864 .kr(9)
4865 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004866 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004867 }
4868
4869 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16) {
4870 TEST_REQUIRES_X86_SSE41;
4871 for (uint32_t channels = 32; channels < 256; channels += 48) {
4872 DWConvMicrokernelTester()
4873 .cr(16)
4874 .kr(9)
4875 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004877 }
4878 }
4879
4880 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
4881 TEST_REQUIRES_X86_SSE41;
4882 for (uint32_t channels = 32; channels < 256; channels += 48) {
4883 DWConvMicrokernelTester()
4884 .cr(16)
4885 .kr(9)
4886 .channels(channels)
4887 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004889 }
4890 }
4891
4892 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
4893 TEST_REQUIRES_X86_SSE41;
4894 for (uint32_t channels = 32; channels < 256; channels += 48) {
4895 DWConvMicrokernelTester()
4896 .cr(16)
4897 .kr(9)
4898 .channels(channels)
4899 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004900 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004901 }
4902 }
4903
4904 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_lt_16) {
4905 TEST_REQUIRES_X86_SSE41;
4906 for (uint32_t channels = 1; channels < 16; channels++) {
4907 DWConvMicrokernelTester()
4908 .cr(16)
4909 .kr(9)
4910 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004911 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004912 }
4913 }
4914
4915 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16) {
4916 TEST_REQUIRES_X86_SSE41;
4917 for (uint32_t channels = 17; channels < 32; channels++) {
4918 DWConvMicrokernelTester()
4919 .cr(16)
4920 .kr(9)
4921 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08004922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004923 }
4924 }
4925
4926 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
4927 TEST_REQUIRES_X86_SSE41;
4928 for (uint32_t channels = 17; channels < 32; channels++) {
4929 DWConvMicrokernelTester()
4930 .cr(16)
4931 .kr(9)
4932 .channels(channels)
4933 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004935 }
4936 }
4937
4938 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
4939 TEST_REQUIRES_X86_SSE41;
4940 for (uint32_t channels = 17; channels < 32; channels++) {
4941 DWConvMicrokernelTester()
4942 .cr(16)
4943 .kr(9)
4944 .channels(channels)
4945 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004947 }
4948 }
4949
4950 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel) {
4951 TEST_REQUIRES_X86_SSE41;
4952 for (size_t channels = 1; channels <= 80; channels += 15) {
4953 DWConvMicrokernelTester()
4954 .cr(16)
4955 .kr(9)
4956 .channels(channels)
4957 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004959 }
4960 }
4961
4962 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_step) {
4963 TEST_REQUIRES_X86_SSE41;
4964 for (size_t channels = 1; channels <= 80; channels += 15) {
4965 for (size_t step = 2; step <= 9; step++) {
4966 DWConvMicrokernelTester()
4967 .cr(16)
4968 .kr(9)
4969 .channels(channels)
4970 .width(3)
4971 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08004972 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004973 }
4974 }
4975 }
4976
4977 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
4978 TEST_REQUIRES_X86_SSE41;
4979 for (size_t channels = 1; channels <= 80; channels += 15) {
4980 DWConvMicrokernelTester()
4981 .cr(16)
4982 .kr(9)
4983 .channels(16)
4984 .width(5)
4985 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004986 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07004987 }
4988 }
4989
4990 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
4991 TEST_REQUIRES_X86_SSE41;
4992 for (size_t channels = 1; channels <= 80; channels += 15) {
4993 DWConvMicrokernelTester()
4994 .cr(16)
4995 .kr(9)
4996 .channels(channels)
4997 .width(3)
4998 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004999 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005000 }
5001 }
5002
5003 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
5004 TEST_REQUIRES_X86_SSE41;
5005 for (size_t channels = 1; channels <= 80; channels += 15) {
5006 DWConvMicrokernelTester()
5007 .cr(16)
5008 .kr(9)
5009 .channels(channels)
5010 .width(3)
5011 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005012 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005013 }
5014 }
5015
5016 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, input_offset) {
5017 TEST_REQUIRES_X86_SSE41;
5018 for (uint32_t channels = 32; channels < 256; channels += 48) {
5019 DWConvMicrokernelTester()
5020 .cr(16)
5021 .kr(9)
5022 .channels(channels)
5023 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08005024 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005025 }
5026 }
5027
5028 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, zero) {
5029 TEST_REQUIRES_X86_SSE41;
5030 for (uint32_t mz = 0; mz < 9; mz++) {
5031 for (uint32_t channels = 32; channels < 256; channels += 48) {
5032 DWConvMicrokernelTester()
5033 .cr(16)
5034 .kr(9)
5035 .channels(channels)
5036 .input_offset(304)
5037 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005039 }
5040 }
5041 }
5042#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5043
5044
5045#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5046 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_eq_24) {
5047 TEST_REQUIRES_X86_SSE41;
5048 DWConvMicrokernelTester()
5049 .cr(24)
5050 .kr(9)
5051 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08005052 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005053 }
5054
5055 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24) {
5056 TEST_REQUIRES_X86_SSE41;
5057 for (uint32_t channels = 48; channels < 384; channels += 72) {
5058 DWConvMicrokernelTester()
5059 .cr(24)
5060 .kr(9)
5061 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005063 }
5064 }
5065
5066 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmin) {
5067 TEST_REQUIRES_X86_SSE41;
5068 for (uint32_t channels = 48; channels < 384; channels += 72) {
5069 DWConvMicrokernelTester()
5070 .cr(24)
5071 .kr(9)
5072 .channels(channels)
5073 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005074 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005075 }
5076 }
5077
5078 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmax) {
5079 TEST_REQUIRES_X86_SSE41;
5080 for (uint32_t channels = 48; channels < 384; channels += 72) {
5081 DWConvMicrokernelTester()
5082 .cr(24)
5083 .kr(9)
5084 .channels(channels)
5085 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005086 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005087 }
5088 }
5089
5090 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_lt_24) {
5091 TEST_REQUIRES_X86_SSE41;
5092 for (uint32_t channels = 1; channels < 24; channels++) {
5093 DWConvMicrokernelTester()
5094 .cr(24)
5095 .kr(9)
5096 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005097 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005098 }
5099 }
5100
5101 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24) {
5102 TEST_REQUIRES_X86_SSE41;
5103 for (uint32_t channels = 25; channels < 48; channels++) {
5104 DWConvMicrokernelTester()
5105 .cr(24)
5106 .kr(9)
5107 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005109 }
5110 }
5111
5112 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmin) {
5113 TEST_REQUIRES_X86_SSE41;
5114 for (uint32_t channels = 25; channels < 48; channels++) {
5115 DWConvMicrokernelTester()
5116 .cr(24)
5117 .kr(9)
5118 .channels(channels)
5119 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005120 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005121 }
5122 }
5123
5124 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmax) {
5125 TEST_REQUIRES_X86_SSE41;
5126 for (uint32_t channels = 25; channels < 48; channels++) {
5127 DWConvMicrokernelTester()
5128 .cr(24)
5129 .kr(9)
5130 .channels(channels)
5131 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005133 }
5134 }
5135
5136 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel) {
5137 TEST_REQUIRES_X86_SSE41;
5138 for (size_t channels = 1; channels <= 120; channels += 23) {
5139 DWConvMicrokernelTester()
5140 .cr(24)
5141 .kr(9)
5142 .channels(channels)
5143 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005145 }
5146 }
5147
5148 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_step) {
5149 TEST_REQUIRES_X86_SSE41;
5150 for (size_t channels = 1; channels <= 120; channels += 23) {
5151 for (size_t step = 2; step <= 9; step++) {
5152 DWConvMicrokernelTester()
5153 .cr(24)
5154 .kr(9)
5155 .channels(channels)
5156 .width(3)
5157 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005158 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005159 }
5160 }
5161 }
5162
5163 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_output_stride) {
5164 TEST_REQUIRES_X86_SSE41;
5165 for (size_t channels = 1; channels <= 120; channels += 23) {
5166 DWConvMicrokernelTester()
5167 .cr(24)
5168 .kr(9)
5169 .channels(24)
5170 .width(5)
5171 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08005172 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005173 }
5174 }
5175
5176 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmin) {
5177 TEST_REQUIRES_X86_SSE41;
5178 for (size_t channels = 1; channels <= 120; channels += 23) {
5179 DWConvMicrokernelTester()
5180 .cr(24)
5181 .kr(9)
5182 .channels(channels)
5183 .width(3)
5184 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005185 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005186 }
5187 }
5188
5189 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmax) {
5190 TEST_REQUIRES_X86_SSE41;
5191 for (size_t channels = 1; channels <= 120; channels += 23) {
5192 DWConvMicrokernelTester()
5193 .cr(24)
5194 .kr(9)
5195 .channels(channels)
5196 .width(3)
5197 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005198 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005199 }
5200 }
5201
5202 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, input_offset) {
5203 TEST_REQUIRES_X86_SSE41;
5204 for (uint32_t channels = 48; channels < 384; channels += 72) {
5205 DWConvMicrokernelTester()
5206 .cr(24)
5207 .kr(9)
5208 .channels(channels)
5209 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08005210 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005211 }
5212 }
5213
5214 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, zero) {
5215 TEST_REQUIRES_X86_SSE41;
5216 for (uint32_t mz = 0; mz < 9; mz++) {
5217 for (uint32_t channels = 48; channels < 384; channels += 72) {
5218 DWConvMicrokernelTester()
5219 .cr(24)
5220 .kr(9)
5221 .channels(channels)
5222 .input_offset(464)
5223 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005225 }
5226 }
5227 }
5228#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5229
5230
5231#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -07005232 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_eq_8) {
5233 TEST_REQUIRES_X86_SSE41;
5234 DWConvMicrokernelTester()
5235 .cr(8)
5236 .kr(9)
5237 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005238 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005239 }
5240
5241 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8) {
5242 TEST_REQUIRES_X86_SSE41;
5243 for (uint32_t channels = 16; channels < 128; channels += 24) {
5244 DWConvMicrokernelTester()
5245 .cr(8)
5246 .kr(9)
5247 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005249 }
5250 }
5251
5252 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
5253 TEST_REQUIRES_X86_SSE41;
5254 for (uint32_t channels = 16; channels < 128; channels += 24) {
5255 DWConvMicrokernelTester()
5256 .cr(8)
5257 .kr(9)
5258 .channels(channels)
5259 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005260 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005261 }
5262 }
5263
5264 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
5265 TEST_REQUIRES_X86_SSE41;
5266 for (uint32_t channels = 16; channels < 128; channels += 24) {
5267 DWConvMicrokernelTester()
5268 .cr(8)
5269 .kr(9)
5270 .channels(channels)
5271 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005272 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005273 }
5274 }
5275
5276 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_lt_8) {
5277 TEST_REQUIRES_X86_SSE41;
5278 for (uint32_t channels = 1; channels < 8; channels++) {
5279 DWConvMicrokernelTester()
5280 .cr(8)
5281 .kr(9)
5282 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005283 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005284 }
5285 }
5286
5287 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8) {
5288 TEST_REQUIRES_X86_SSE41;
5289 for (uint32_t channels = 9; channels < 16; channels++) {
5290 DWConvMicrokernelTester()
5291 .cr(8)
5292 .kr(9)
5293 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005295 }
5296 }
5297
5298 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
5299 TEST_REQUIRES_X86_SSE41;
5300 for (uint32_t channels = 9; channels < 16; channels++) {
5301 DWConvMicrokernelTester()
5302 .cr(8)
5303 .kr(9)
5304 .channels(channels)
5305 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005307 }
5308 }
5309
5310 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
5311 TEST_REQUIRES_X86_SSE41;
5312 for (uint32_t channels = 9; channels < 16; channels++) {
5313 DWConvMicrokernelTester()
5314 .cr(8)
5315 .kr(9)
5316 .channels(channels)
5317 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005319 }
5320 }
5321
5322 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel) {
5323 TEST_REQUIRES_X86_SSE41;
5324 for (size_t channels = 1; channels <= 40; channels += 7) {
5325 DWConvMicrokernelTester()
5326 .cr(8)
5327 .kr(9)
5328 .channels(channels)
5329 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005331 }
5332 }
5333
5334 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_step) {
5335 TEST_REQUIRES_X86_SSE41;
5336 for (size_t channels = 1; channels <= 40; channels += 7) {
5337 for (size_t step = 2; step <= 9; step++) {
5338 DWConvMicrokernelTester()
5339 .cr(8)
5340 .kr(9)
5341 .channels(channels)
5342 .width(3)
5343 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005344 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005345 }
5346 }
5347 }
5348
5349 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
5350 TEST_REQUIRES_X86_SSE41;
5351 for (size_t channels = 1; channels <= 40; channels += 7) {
5352 DWConvMicrokernelTester()
5353 .cr(8)
5354 .kr(9)
5355 .channels(8)
5356 .width(5)
5357 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005358 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005359 }
5360 }
5361
5362 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
5363 TEST_REQUIRES_X86_SSE41;
5364 for (size_t channels = 1; channels <= 40; channels += 7) {
5365 DWConvMicrokernelTester()
5366 .cr(8)
5367 .kr(9)
5368 .channels(channels)
5369 .width(3)
5370 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005371 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005372 }
5373 }
5374
5375 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
5376 TEST_REQUIRES_X86_SSE41;
5377 for (size_t channels = 1; channels <= 40; channels += 7) {
5378 DWConvMicrokernelTester()
5379 .cr(8)
5380 .kr(9)
5381 .channels(channels)
5382 .width(3)
5383 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005384 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005385 }
5386 }
5387
5388 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, input_offset) {
5389 TEST_REQUIRES_X86_SSE41;
5390 for (uint32_t channels = 16; channels < 128; channels += 24) {
5391 DWConvMicrokernelTester()
5392 .cr(8)
5393 .kr(9)
5394 .channels(channels)
5395 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08005396 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005397 }
5398 }
5399
5400 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, zero) {
5401 TEST_REQUIRES_X86_SSE41;
5402 for (uint32_t mz = 0; mz < 9; mz++) {
5403 for (uint32_t channels = 16; channels < 128; channels += 24) {
5404 DWConvMicrokernelTester()
5405 .cr(8)
5406 .kr(9)
5407 .channels(channels)
5408 .input_offset(176)
5409 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005411 }
5412 }
5413 }
5414#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5415
5416
5417#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5418 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_eq_16) {
5419 TEST_REQUIRES_X86_SSE41;
5420 DWConvMicrokernelTester()
5421 .cr(16)
5422 .kr(9)
5423 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005424 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005425 }
5426
5427 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16) {
5428 TEST_REQUIRES_X86_SSE41;
5429 for (uint32_t channels = 32; channels < 256; channels += 48) {
5430 DWConvMicrokernelTester()
5431 .cr(16)
5432 .kr(9)
5433 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005435 }
5436 }
5437
5438 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
5439 TEST_REQUIRES_X86_SSE41;
5440 for (uint32_t channels = 32; channels < 256; channels += 48) {
5441 DWConvMicrokernelTester()
5442 .cr(16)
5443 .kr(9)
5444 .channels(channels)
5445 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005446 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005447 }
5448 }
5449
5450 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
5451 TEST_REQUIRES_X86_SSE41;
5452 for (uint32_t channels = 32; channels < 256; channels += 48) {
5453 DWConvMicrokernelTester()
5454 .cr(16)
5455 .kr(9)
5456 .channels(channels)
5457 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005459 }
5460 }
5461
5462 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_lt_16) {
5463 TEST_REQUIRES_X86_SSE41;
5464 for (uint32_t channels = 1; channels < 16; channels++) {
5465 DWConvMicrokernelTester()
5466 .cr(16)
5467 .kr(9)
5468 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005469 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005470 }
5471 }
5472
5473 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16) {
5474 TEST_REQUIRES_X86_SSE41;
5475 for (uint32_t channels = 17; channels < 32; channels++) {
5476 DWConvMicrokernelTester()
5477 .cr(16)
5478 .kr(9)
5479 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005481 }
5482 }
5483
5484 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
5485 TEST_REQUIRES_X86_SSE41;
5486 for (uint32_t channels = 17; channels < 32; channels++) {
5487 DWConvMicrokernelTester()
5488 .cr(16)
5489 .kr(9)
5490 .channels(channels)
5491 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005492 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005493 }
5494 }
5495
5496 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
5497 TEST_REQUIRES_X86_SSE41;
5498 for (uint32_t channels = 17; channels < 32; channels++) {
5499 DWConvMicrokernelTester()
5500 .cr(16)
5501 .kr(9)
5502 .channels(channels)
5503 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005505 }
5506 }
5507
5508 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel) {
5509 TEST_REQUIRES_X86_SSE41;
5510 for (size_t channels = 1; channels <= 80; channels += 15) {
5511 DWConvMicrokernelTester()
5512 .cr(16)
5513 .kr(9)
5514 .channels(channels)
5515 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005516 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005517 }
5518 }
5519
5520 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_step) {
5521 TEST_REQUIRES_X86_SSE41;
5522 for (size_t channels = 1; channels <= 80; channels += 15) {
5523 for (size_t step = 2; step <= 9; step++) {
5524 DWConvMicrokernelTester()
5525 .cr(16)
5526 .kr(9)
5527 .channels(channels)
5528 .width(3)
5529 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005530 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005531 }
5532 }
5533 }
5534
5535 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
5536 TEST_REQUIRES_X86_SSE41;
5537 for (size_t channels = 1; channels <= 80; channels += 15) {
5538 DWConvMicrokernelTester()
5539 .cr(16)
5540 .kr(9)
5541 .channels(16)
5542 .width(5)
5543 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005544 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005545 }
5546 }
5547
5548 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
5549 TEST_REQUIRES_X86_SSE41;
5550 for (size_t channels = 1; channels <= 80; channels += 15) {
5551 DWConvMicrokernelTester()
5552 .cr(16)
5553 .kr(9)
5554 .channels(channels)
5555 .width(3)
5556 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005557 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005558 }
5559 }
5560
5561 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
5562 TEST_REQUIRES_X86_SSE41;
5563 for (size_t channels = 1; channels <= 80; channels += 15) {
5564 DWConvMicrokernelTester()
5565 .cr(16)
5566 .kr(9)
5567 .channels(channels)
5568 .width(3)
5569 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005570 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005571 }
5572 }
5573
5574 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, input_offset) {
5575 TEST_REQUIRES_X86_SSE41;
5576 for (uint32_t channels = 32; channels < 256; channels += 48) {
5577 DWConvMicrokernelTester()
5578 .cr(16)
5579 .kr(9)
5580 .channels(channels)
5581 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08005582 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005583 }
5584 }
5585
5586 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, zero) {
5587 TEST_REQUIRES_X86_SSE41;
5588 for (uint32_t mz = 0; mz < 9; mz++) {
5589 for (uint32_t channels = 32; channels < 256; channels += 48) {
5590 DWConvMicrokernelTester()
5591 .cr(16)
5592 .kr(9)
5593 .channels(channels)
5594 .input_offset(304)
5595 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07005597 }
5598 }
5599 }
5600#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5601
5602
5603#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan98042f22021-06-15 00:43:13 -07005604 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_eq_8) {
5605 TEST_REQUIRES_X86_AVX;
5606 DWConvMicrokernelTester()
5607 .cr(8)
5608 .kr(9)
5609 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005610 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005611 }
5612
5613 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8) {
5614 TEST_REQUIRES_X86_AVX;
5615 for (uint32_t channels = 16; channels < 128; channels += 24) {
5616 DWConvMicrokernelTester()
5617 .cr(8)
5618 .kr(9)
5619 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005621 }
5622 }
5623
5624 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
5625 TEST_REQUIRES_X86_AVX;
5626 for (uint32_t channels = 16; channels < 128; channels += 24) {
5627 DWConvMicrokernelTester()
5628 .cr(8)
5629 .kr(9)
5630 .channels(channels)
5631 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005632 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005633 }
5634 }
5635
5636 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
5637 TEST_REQUIRES_X86_AVX;
5638 for (uint32_t channels = 16; channels < 128; channels += 24) {
5639 DWConvMicrokernelTester()
5640 .cr(8)
5641 .kr(9)
5642 .channels(channels)
5643 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005645 }
5646 }
5647
5648 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_lt_8) {
5649 TEST_REQUIRES_X86_AVX;
5650 for (uint32_t channels = 1; channels < 8; channels++) {
5651 DWConvMicrokernelTester()
5652 .cr(8)
5653 .kr(9)
5654 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005655 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005656 }
5657 }
5658
5659 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8) {
5660 TEST_REQUIRES_X86_AVX;
5661 for (uint32_t channels = 9; channels < 16; channels++) {
5662 DWConvMicrokernelTester()
5663 .cr(8)
5664 .kr(9)
5665 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005667 }
5668 }
5669
5670 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
5671 TEST_REQUIRES_X86_AVX;
5672 for (uint32_t channels = 9; channels < 16; channels++) {
5673 DWConvMicrokernelTester()
5674 .cr(8)
5675 .kr(9)
5676 .channels(channels)
5677 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005678 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005679 }
5680 }
5681
5682 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
5683 TEST_REQUIRES_X86_AVX;
5684 for (uint32_t channels = 9; channels < 16; channels++) {
5685 DWConvMicrokernelTester()
5686 .cr(8)
5687 .kr(9)
5688 .channels(channels)
5689 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005691 }
5692 }
5693
5694 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel) {
5695 TEST_REQUIRES_X86_AVX;
5696 for (size_t channels = 1; channels <= 40; channels += 7) {
5697 DWConvMicrokernelTester()
5698 .cr(8)
5699 .kr(9)
5700 .channels(channels)
5701 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005702 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005703 }
5704 }
5705
5706 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_step) {
5707 TEST_REQUIRES_X86_AVX;
5708 for (size_t channels = 1; channels <= 40; channels += 7) {
5709 for (size_t step = 2; step <= 9; step++) {
5710 DWConvMicrokernelTester()
5711 .cr(8)
5712 .kr(9)
5713 .channels(channels)
5714 .width(3)
5715 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005716 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005717 }
5718 }
5719 }
5720
5721 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
5722 TEST_REQUIRES_X86_AVX;
5723 for (size_t channels = 1; channels <= 40; channels += 7) {
5724 DWConvMicrokernelTester()
5725 .cr(8)
5726 .kr(9)
5727 .channels(8)
5728 .width(5)
5729 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005730 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005731 }
5732 }
5733
5734 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmin) {
5735 TEST_REQUIRES_X86_AVX;
5736 for (size_t channels = 1; channels <= 40; channels += 7) {
5737 DWConvMicrokernelTester()
5738 .cr(8)
5739 .kr(9)
5740 .channels(channels)
5741 .width(3)
5742 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005743 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005744 }
5745 }
5746
5747 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmax) {
5748 TEST_REQUIRES_X86_AVX;
5749 for (size_t channels = 1; channels <= 40; channels += 7) {
5750 DWConvMicrokernelTester()
5751 .cr(8)
5752 .kr(9)
5753 .channels(channels)
5754 .width(3)
5755 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005756 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005757 }
5758 }
5759
5760 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, input_offset) {
5761 TEST_REQUIRES_X86_AVX;
5762 for (uint32_t channels = 16; channels < 128; channels += 24) {
5763 DWConvMicrokernelTester()
5764 .cr(8)
5765 .kr(9)
5766 .channels(channels)
5767 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08005768 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005769 }
5770 }
5771
5772 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, zero) {
5773 TEST_REQUIRES_X86_AVX;
5774 for (uint32_t mz = 0; mz < 9; mz++) {
5775 for (uint32_t channels = 16; channels < 128; channels += 24) {
5776 DWConvMicrokernelTester()
5777 .cr(8)
5778 .kr(9)
5779 .channels(channels)
5780 .input_offset(176)
5781 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005783 }
5784 }
5785 }
5786#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5787
5788
5789#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5790 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_eq_16) {
5791 TEST_REQUIRES_X86_AVX;
5792 DWConvMicrokernelTester()
5793 .cr(16)
5794 .kr(9)
5795 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005796 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005797 }
5798
5799 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16) {
5800 TEST_REQUIRES_X86_AVX;
5801 for (uint32_t channels = 32; channels < 256; channels += 48) {
5802 DWConvMicrokernelTester()
5803 .cr(16)
5804 .kr(9)
5805 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005807 }
5808 }
5809
5810 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
5811 TEST_REQUIRES_X86_AVX;
5812 for (uint32_t channels = 32; channels < 256; channels += 48) {
5813 DWConvMicrokernelTester()
5814 .cr(16)
5815 .kr(9)
5816 .channels(channels)
5817 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005818 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005819 }
5820 }
5821
5822 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
5823 TEST_REQUIRES_X86_AVX;
5824 for (uint32_t channels = 32; channels < 256; channels += 48) {
5825 DWConvMicrokernelTester()
5826 .cr(16)
5827 .kr(9)
5828 .channels(channels)
5829 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005831 }
5832 }
5833
5834 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_lt_16) {
5835 TEST_REQUIRES_X86_AVX;
5836 for (uint32_t channels = 1; channels < 16; channels++) {
5837 DWConvMicrokernelTester()
5838 .cr(16)
5839 .kr(9)
5840 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005841 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005842 }
5843 }
5844
5845 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16) {
5846 TEST_REQUIRES_X86_AVX;
5847 for (uint32_t channels = 17; channels < 32; channels++) {
5848 DWConvMicrokernelTester()
5849 .cr(16)
5850 .kr(9)
5851 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005853 }
5854 }
5855
5856 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
5857 TEST_REQUIRES_X86_AVX;
5858 for (uint32_t channels = 17; channels < 32; channels++) {
5859 DWConvMicrokernelTester()
5860 .cr(16)
5861 .kr(9)
5862 .channels(channels)
5863 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005864 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005865 }
5866 }
5867
5868 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
5869 TEST_REQUIRES_X86_AVX;
5870 for (uint32_t channels = 17; channels < 32; channels++) {
5871 DWConvMicrokernelTester()
5872 .cr(16)
5873 .kr(9)
5874 .channels(channels)
5875 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005877 }
5878 }
5879
5880 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel) {
5881 TEST_REQUIRES_X86_AVX;
5882 for (size_t channels = 1; channels <= 80; channels += 15) {
5883 DWConvMicrokernelTester()
5884 .cr(16)
5885 .kr(9)
5886 .channels(channels)
5887 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005889 }
5890 }
5891
5892 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_step) {
5893 TEST_REQUIRES_X86_AVX;
5894 for (size_t channels = 1; channels <= 80; channels += 15) {
5895 for (size_t step = 2; step <= 9; step++) {
5896 DWConvMicrokernelTester()
5897 .cr(16)
5898 .kr(9)
5899 .channels(channels)
5900 .width(3)
5901 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08005902 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005903 }
5904 }
5905 }
5906
5907 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
5908 TEST_REQUIRES_X86_AVX;
5909 for (size_t channels = 1; channels <= 80; channels += 15) {
5910 DWConvMicrokernelTester()
5911 .cr(16)
5912 .kr(9)
5913 .channels(16)
5914 .width(5)
5915 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005916 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005917 }
5918 }
5919
5920 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmin) {
5921 TEST_REQUIRES_X86_AVX;
5922 for (size_t channels = 1; channels <= 80; channels += 15) {
5923 DWConvMicrokernelTester()
5924 .cr(16)
5925 .kr(9)
5926 .channels(channels)
5927 .width(3)
5928 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005929 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005930 }
5931 }
5932
5933 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmax) {
5934 TEST_REQUIRES_X86_AVX;
5935 for (size_t channels = 1; channels <= 80; channels += 15) {
5936 DWConvMicrokernelTester()
5937 .cr(16)
5938 .kr(9)
5939 .channels(channels)
5940 .width(3)
5941 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005942 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005943 }
5944 }
5945
5946 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, input_offset) {
5947 TEST_REQUIRES_X86_AVX;
5948 for (uint32_t channels = 32; channels < 256; channels += 48) {
5949 DWConvMicrokernelTester()
5950 .cr(16)
5951 .kr(9)
5952 .channels(channels)
5953 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08005954 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005955 }
5956 }
5957
5958 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, zero) {
5959 TEST_REQUIRES_X86_AVX;
5960 for (uint32_t mz = 0; mz < 9; mz++) {
5961 for (uint32_t channels = 32; channels < 256; channels += 48) {
5962 DWConvMicrokernelTester()
5963 .cr(16)
5964 .kr(9)
5965 .channels(channels)
5966 .input_offset(304)
5967 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005969 }
5970 }
5971 }
5972#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5973
5974
5975#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5976 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_eq_24) {
5977 TEST_REQUIRES_X86_AVX;
5978 DWConvMicrokernelTester()
5979 .cr(24)
5980 .kr(9)
5981 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08005982 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005983 }
5984
5985 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24) {
5986 TEST_REQUIRES_X86_AVX;
5987 for (uint32_t channels = 48; channels < 384; channels += 72) {
5988 DWConvMicrokernelTester()
5989 .cr(24)
5990 .kr(9)
5991 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08005992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07005993 }
5994 }
5995
5996 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmin) {
5997 TEST_REQUIRES_X86_AVX;
5998 for (uint32_t channels = 48; channels < 384; channels += 72) {
5999 DWConvMicrokernelTester()
6000 .cr(24)
6001 .kr(9)
6002 .channels(channels)
6003 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006004 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006005 }
6006 }
6007
6008 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmax) {
6009 TEST_REQUIRES_X86_AVX;
6010 for (uint32_t channels = 48; channels < 384; channels += 72) {
6011 DWConvMicrokernelTester()
6012 .cr(24)
6013 .kr(9)
6014 .channels(channels)
6015 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006017 }
6018 }
6019
6020 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_lt_24) {
6021 TEST_REQUIRES_X86_AVX;
6022 for (uint32_t channels = 1; channels < 24; channels++) {
6023 DWConvMicrokernelTester()
6024 .cr(24)
6025 .kr(9)
6026 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006027 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006028 }
6029 }
6030
6031 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24) {
6032 TEST_REQUIRES_X86_AVX;
6033 for (uint32_t channels = 25; channels < 48; channels++) {
6034 DWConvMicrokernelTester()
6035 .cr(24)
6036 .kr(9)
6037 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006039 }
6040 }
6041
6042 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmin) {
6043 TEST_REQUIRES_X86_AVX;
6044 for (uint32_t channels = 25; channels < 48; channels++) {
6045 DWConvMicrokernelTester()
6046 .cr(24)
6047 .kr(9)
6048 .channels(channels)
6049 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006050 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006051 }
6052 }
6053
6054 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmax) {
6055 TEST_REQUIRES_X86_AVX;
6056 for (uint32_t channels = 25; channels < 48; channels++) {
6057 DWConvMicrokernelTester()
6058 .cr(24)
6059 .kr(9)
6060 .channels(channels)
6061 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006063 }
6064 }
6065
6066 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel) {
6067 TEST_REQUIRES_X86_AVX;
6068 for (size_t channels = 1; channels <= 120; channels += 23) {
6069 DWConvMicrokernelTester()
6070 .cr(24)
6071 .kr(9)
6072 .channels(channels)
6073 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006074 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006075 }
6076 }
6077
6078 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_step) {
6079 TEST_REQUIRES_X86_AVX;
6080 for (size_t channels = 1; channels <= 120; channels += 23) {
6081 for (size_t step = 2; step <= 9; step++) {
6082 DWConvMicrokernelTester()
6083 .cr(24)
6084 .kr(9)
6085 .channels(channels)
6086 .width(3)
6087 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006088 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006089 }
6090 }
6091 }
6092
6093 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_output_stride) {
6094 TEST_REQUIRES_X86_AVX;
6095 for (size_t channels = 1; channels <= 120; channels += 23) {
6096 DWConvMicrokernelTester()
6097 .cr(24)
6098 .kr(9)
6099 .channels(24)
6100 .width(5)
6101 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08006102 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006103 }
6104 }
6105
6106 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmin) {
6107 TEST_REQUIRES_X86_AVX;
6108 for (size_t channels = 1; channels <= 120; channels += 23) {
6109 DWConvMicrokernelTester()
6110 .cr(24)
6111 .kr(9)
6112 .channels(channels)
6113 .width(3)
6114 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006115 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006116 }
6117 }
6118
6119 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmax) {
6120 TEST_REQUIRES_X86_AVX;
6121 for (size_t channels = 1; channels <= 120; channels += 23) {
6122 DWConvMicrokernelTester()
6123 .cr(24)
6124 .kr(9)
6125 .channels(channels)
6126 .width(3)
6127 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006128 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006129 }
6130 }
6131
6132 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, input_offset) {
6133 TEST_REQUIRES_X86_AVX;
6134 for (uint32_t channels = 48; channels < 384; channels += 72) {
6135 DWConvMicrokernelTester()
6136 .cr(24)
6137 .kr(9)
6138 .channels(channels)
6139 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08006140 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006141 }
6142 }
6143
6144 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, zero) {
6145 TEST_REQUIRES_X86_AVX;
6146 for (uint32_t mz = 0; mz < 9; mz++) {
6147 for (uint32_t channels = 48; channels < 384; channels += 72) {
6148 DWConvMicrokernelTester()
6149 .cr(24)
6150 .kr(9)
6151 .channels(channels)
6152 .input_offset(464)
6153 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006155 }
6156 }
6157 }
6158#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6159
6160
6161#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -07006162 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_eq_8) {
6163 TEST_REQUIRES_X86_AVX;
6164 DWConvMicrokernelTester()
6165 .cr(8)
6166 .kr(9)
6167 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006169 }
6170
6171 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8) {
6172 TEST_REQUIRES_X86_AVX;
6173 for (uint32_t channels = 16; channels < 128; channels += 24) {
6174 DWConvMicrokernelTester()
6175 .cr(8)
6176 .kr(9)
6177 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006179 }
6180 }
6181
6182 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmin) {
6183 TEST_REQUIRES_X86_AVX;
6184 for (uint32_t channels = 16; channels < 128; channels += 24) {
6185 DWConvMicrokernelTester()
6186 .cr(8)
6187 .kr(9)
6188 .channels(channels)
6189 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006190 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006191 }
6192 }
6193
6194 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmax) {
6195 TEST_REQUIRES_X86_AVX;
6196 for (uint32_t channels = 16; channels < 128; channels += 24) {
6197 DWConvMicrokernelTester()
6198 .cr(8)
6199 .kr(9)
6200 .channels(channels)
6201 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006203 }
6204 }
6205
6206 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_lt_8) {
6207 TEST_REQUIRES_X86_AVX;
6208 for (uint32_t channels = 1; channels < 8; channels++) {
6209 DWConvMicrokernelTester()
6210 .cr(8)
6211 .kr(9)
6212 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006213 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006214 }
6215 }
6216
6217 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8) {
6218 TEST_REQUIRES_X86_AVX;
6219 for (uint32_t channels = 9; channels < 16; channels++) {
6220 DWConvMicrokernelTester()
6221 .cr(8)
6222 .kr(9)
6223 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006225 }
6226 }
6227
6228 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
6229 TEST_REQUIRES_X86_AVX;
6230 for (uint32_t channels = 9; channels < 16; channels++) {
6231 DWConvMicrokernelTester()
6232 .cr(8)
6233 .kr(9)
6234 .channels(channels)
6235 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006236 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006237 }
6238 }
6239
6240 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
6241 TEST_REQUIRES_X86_AVX;
6242 for (uint32_t channels = 9; channels < 16; channels++) {
6243 DWConvMicrokernelTester()
6244 .cr(8)
6245 .kr(9)
6246 .channels(channels)
6247 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006249 }
6250 }
6251
6252 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel) {
6253 TEST_REQUIRES_X86_AVX;
6254 for (size_t channels = 1; channels <= 40; channels += 7) {
6255 DWConvMicrokernelTester()
6256 .cr(8)
6257 .kr(9)
6258 .channels(channels)
6259 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006260 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006261 }
6262 }
6263
6264 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_step) {
6265 TEST_REQUIRES_X86_AVX;
6266 for (size_t channels = 1; channels <= 40; channels += 7) {
6267 for (size_t step = 2; step <= 9; step++) {
6268 DWConvMicrokernelTester()
6269 .cr(8)
6270 .kr(9)
6271 .channels(channels)
6272 .width(3)
6273 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006274 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006275 }
6276 }
6277 }
6278
6279 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
6280 TEST_REQUIRES_X86_AVX;
6281 for (size_t channels = 1; channels <= 40; channels += 7) {
6282 DWConvMicrokernelTester()
6283 .cr(8)
6284 .kr(9)
6285 .channels(8)
6286 .width(5)
6287 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006288 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006289 }
6290 }
6291
6292 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
6293 TEST_REQUIRES_X86_AVX;
6294 for (size_t channels = 1; channels <= 40; channels += 7) {
6295 DWConvMicrokernelTester()
6296 .cr(8)
6297 .kr(9)
6298 .channels(channels)
6299 .width(3)
6300 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006301 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006302 }
6303 }
6304
6305 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
6306 TEST_REQUIRES_X86_AVX;
6307 for (size_t channels = 1; channels <= 40; channels += 7) {
6308 DWConvMicrokernelTester()
6309 .cr(8)
6310 .kr(9)
6311 .channels(channels)
6312 .width(3)
6313 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006314 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006315 }
6316 }
6317
6318 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, input_offset) {
6319 TEST_REQUIRES_X86_AVX;
6320 for (uint32_t channels = 16; channels < 128; channels += 24) {
6321 DWConvMicrokernelTester()
6322 .cr(8)
6323 .kr(9)
6324 .channels(channels)
6325 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08006326 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006327 }
6328 }
6329
6330 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, zero) {
6331 TEST_REQUIRES_X86_AVX;
6332 for (uint32_t mz = 0; mz < 9; mz++) {
6333 for (uint32_t channels = 16; channels < 128; channels += 24) {
6334 DWConvMicrokernelTester()
6335 .cr(8)
6336 .kr(9)
6337 .channels(channels)
6338 .input_offset(176)
6339 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006341 }
6342 }
6343 }
6344#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6345
6346
6347#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6348 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_eq_16) {
6349 TEST_REQUIRES_X86_AVX;
6350 DWConvMicrokernelTester()
6351 .cr(16)
6352 .kr(9)
6353 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006354 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006355 }
6356
6357 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16) {
6358 TEST_REQUIRES_X86_AVX;
6359 for (uint32_t channels = 32; channels < 256; channels += 48) {
6360 DWConvMicrokernelTester()
6361 .cr(16)
6362 .kr(9)
6363 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006365 }
6366 }
6367
6368 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmin) {
6369 TEST_REQUIRES_X86_AVX;
6370 for (uint32_t channels = 32; channels < 256; channels += 48) {
6371 DWConvMicrokernelTester()
6372 .cr(16)
6373 .kr(9)
6374 .channels(channels)
6375 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006376 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006377 }
6378 }
6379
6380 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmax) {
6381 TEST_REQUIRES_X86_AVX;
6382 for (uint32_t channels = 32; channels < 256; channels += 48) {
6383 DWConvMicrokernelTester()
6384 .cr(16)
6385 .kr(9)
6386 .channels(channels)
6387 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006389 }
6390 }
6391
6392 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_lt_16) {
6393 TEST_REQUIRES_X86_AVX;
6394 for (uint32_t channels = 1; channels < 16; channels++) {
6395 DWConvMicrokernelTester()
6396 .cr(16)
6397 .kr(9)
6398 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006399 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006400 }
6401 }
6402
6403 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16) {
6404 TEST_REQUIRES_X86_AVX;
6405 for (uint32_t channels = 17; channels < 32; channels++) {
6406 DWConvMicrokernelTester()
6407 .cr(16)
6408 .kr(9)
6409 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006411 }
6412 }
6413
6414 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
6415 TEST_REQUIRES_X86_AVX;
6416 for (uint32_t channels = 17; channels < 32; channels++) {
6417 DWConvMicrokernelTester()
6418 .cr(16)
6419 .kr(9)
6420 .channels(channels)
6421 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006422 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006423 }
6424 }
6425
6426 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
6427 TEST_REQUIRES_X86_AVX;
6428 for (uint32_t channels = 17; channels < 32; channels++) {
6429 DWConvMicrokernelTester()
6430 .cr(16)
6431 .kr(9)
6432 .channels(channels)
6433 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006435 }
6436 }
6437
6438 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel) {
6439 TEST_REQUIRES_X86_AVX;
6440 for (size_t channels = 1; channels <= 80; channels += 15) {
6441 DWConvMicrokernelTester()
6442 .cr(16)
6443 .kr(9)
6444 .channels(channels)
6445 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006446 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006447 }
6448 }
6449
6450 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_step) {
6451 TEST_REQUIRES_X86_AVX;
6452 for (size_t channels = 1; channels <= 80; channels += 15) {
6453 for (size_t step = 2; step <= 9; step++) {
6454 DWConvMicrokernelTester()
6455 .cr(16)
6456 .kr(9)
6457 .channels(channels)
6458 .width(3)
6459 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006460 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006461 }
6462 }
6463 }
6464
6465 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
6466 TEST_REQUIRES_X86_AVX;
6467 for (size_t channels = 1; channels <= 80; channels += 15) {
6468 DWConvMicrokernelTester()
6469 .cr(16)
6470 .kr(9)
6471 .channels(16)
6472 .width(5)
6473 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006474 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006475 }
6476 }
6477
6478 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
6479 TEST_REQUIRES_X86_AVX;
6480 for (size_t channels = 1; channels <= 80; channels += 15) {
6481 DWConvMicrokernelTester()
6482 .cr(16)
6483 .kr(9)
6484 .channels(channels)
6485 .width(3)
6486 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006487 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006488 }
6489 }
6490
6491 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
6492 TEST_REQUIRES_X86_AVX;
6493 for (size_t channels = 1; channels <= 80; channels += 15) {
6494 DWConvMicrokernelTester()
6495 .cr(16)
6496 .kr(9)
6497 .channels(channels)
6498 .width(3)
6499 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006500 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006501 }
6502 }
6503
6504 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, input_offset) {
6505 TEST_REQUIRES_X86_AVX;
6506 for (uint32_t channels = 32; channels < 256; channels += 48) {
6507 DWConvMicrokernelTester()
6508 .cr(16)
6509 .kr(9)
6510 .channels(channels)
6511 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08006512 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006513 }
6514 }
6515
6516 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, zero) {
6517 TEST_REQUIRES_X86_AVX;
6518 for (uint32_t mz = 0; mz < 9; mz++) {
6519 for (uint32_t channels = 32; channels < 256; channels += 48) {
6520 DWConvMicrokernelTester()
6521 .cr(16)
6522 .kr(9)
6523 .channels(channels)
6524 .input_offset(304)
6525 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006527 }
6528 }
6529 }
6530#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6531
6532
6533#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6534 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_eq_8) {
6535 TEST_REQUIRES_X86_XOP;
6536 DWConvMicrokernelTester()
6537 .cr(8)
6538 .kr(9)
6539 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006540 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006541 }
6542
6543 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8) {
6544 TEST_REQUIRES_X86_XOP;
6545 for (uint32_t channels = 16; channels < 128; channels += 24) {
6546 DWConvMicrokernelTester()
6547 .cr(8)
6548 .kr(9)
6549 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006551 }
6552 }
6553
6554 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmin) {
6555 TEST_REQUIRES_X86_XOP;
6556 for (uint32_t channels = 16; channels < 128; channels += 24) {
6557 DWConvMicrokernelTester()
6558 .cr(8)
6559 .kr(9)
6560 .channels(channels)
6561 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006562 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006563 }
6564 }
6565
6566 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmax) {
6567 TEST_REQUIRES_X86_XOP;
6568 for (uint32_t channels = 16; channels < 128; channels += 24) {
6569 DWConvMicrokernelTester()
6570 .cr(8)
6571 .kr(9)
6572 .channels(channels)
6573 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006575 }
6576 }
6577
6578 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_lt_8) {
6579 TEST_REQUIRES_X86_XOP;
6580 for (uint32_t channels = 1; channels < 8; channels++) {
6581 DWConvMicrokernelTester()
6582 .cr(8)
6583 .kr(9)
6584 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006585 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006586 }
6587 }
6588
6589 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8) {
6590 TEST_REQUIRES_X86_XOP;
6591 for (uint32_t channels = 9; channels < 16; channels++) {
6592 DWConvMicrokernelTester()
6593 .cr(8)
6594 .kr(9)
6595 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006597 }
6598 }
6599
6600 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
6601 TEST_REQUIRES_X86_XOP;
6602 for (uint32_t channels = 9; channels < 16; channels++) {
6603 DWConvMicrokernelTester()
6604 .cr(8)
6605 .kr(9)
6606 .channels(channels)
6607 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006608 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006609 }
6610 }
6611
6612 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
6613 TEST_REQUIRES_X86_XOP;
6614 for (uint32_t channels = 9; channels < 16; channels++) {
6615 DWConvMicrokernelTester()
6616 .cr(8)
6617 .kr(9)
6618 .channels(channels)
6619 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006621 }
6622 }
6623
6624 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel) {
6625 TEST_REQUIRES_X86_XOP;
6626 for (size_t channels = 1; channels <= 40; channels += 7) {
6627 DWConvMicrokernelTester()
6628 .cr(8)
6629 .kr(9)
6630 .channels(channels)
6631 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006632 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006633 }
6634 }
6635
6636 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_step) {
6637 TEST_REQUIRES_X86_XOP;
6638 for (size_t channels = 1; channels <= 40; channels += 7) {
6639 for (size_t step = 2; step <= 9; step++) {
6640 DWConvMicrokernelTester()
6641 .cr(8)
6642 .kr(9)
6643 .channels(channels)
6644 .width(3)
6645 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006646 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006647 }
6648 }
6649 }
6650
6651 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
6652 TEST_REQUIRES_X86_XOP;
6653 for (size_t channels = 1; channels <= 40; channels += 7) {
6654 DWConvMicrokernelTester()
6655 .cr(8)
6656 .kr(9)
6657 .channels(8)
6658 .width(5)
6659 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006660 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006661 }
6662 }
6663
6664 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
6665 TEST_REQUIRES_X86_XOP;
6666 for (size_t channels = 1; channels <= 40; channels += 7) {
6667 DWConvMicrokernelTester()
6668 .cr(8)
6669 .kr(9)
6670 .channels(channels)
6671 .width(3)
6672 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006673 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006674 }
6675 }
6676
6677 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
6678 TEST_REQUIRES_X86_XOP;
6679 for (size_t channels = 1; channels <= 40; channels += 7) {
6680 DWConvMicrokernelTester()
6681 .cr(8)
6682 .kr(9)
6683 .channels(channels)
6684 .width(3)
6685 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006686 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006687 }
6688 }
6689
6690 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, input_offset) {
6691 TEST_REQUIRES_X86_XOP;
6692 for (uint32_t channels = 16; channels < 128; channels += 24) {
6693 DWConvMicrokernelTester()
6694 .cr(8)
6695 .kr(9)
6696 .channels(channels)
6697 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08006698 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006699 }
6700 }
6701
6702 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, zero) {
6703 TEST_REQUIRES_X86_XOP;
6704 for (uint32_t mz = 0; mz < 9; mz++) {
6705 for (uint32_t channels = 16; channels < 128; channels += 24) {
6706 DWConvMicrokernelTester()
6707 .cr(8)
6708 .kr(9)
6709 .channels(channels)
6710 .input_offset(176)
6711 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006713 }
6714 }
6715 }
6716#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6717
6718
6719#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6720 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_eq_16) {
6721 TEST_REQUIRES_X86_XOP;
6722 DWConvMicrokernelTester()
6723 .cr(16)
6724 .kr(9)
6725 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006726 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006727 }
6728
6729 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16) {
6730 TEST_REQUIRES_X86_XOP;
6731 for (uint32_t channels = 32; channels < 256; channels += 48) {
6732 DWConvMicrokernelTester()
6733 .cr(16)
6734 .kr(9)
6735 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006737 }
6738 }
6739
6740 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmin) {
6741 TEST_REQUIRES_X86_XOP;
6742 for (uint32_t channels = 32; channels < 256; channels += 48) {
6743 DWConvMicrokernelTester()
6744 .cr(16)
6745 .kr(9)
6746 .channels(channels)
6747 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006748 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006749 }
6750 }
6751
6752 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmax) {
6753 TEST_REQUIRES_X86_XOP;
6754 for (uint32_t channels = 32; channels < 256; channels += 48) {
6755 DWConvMicrokernelTester()
6756 .cr(16)
6757 .kr(9)
6758 .channels(channels)
6759 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006761 }
6762 }
6763
6764 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_lt_16) {
6765 TEST_REQUIRES_X86_XOP;
6766 for (uint32_t channels = 1; channels < 16; channels++) {
6767 DWConvMicrokernelTester()
6768 .cr(16)
6769 .kr(9)
6770 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006771 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006772 }
6773 }
6774
6775 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16) {
6776 TEST_REQUIRES_X86_XOP;
6777 for (uint32_t channels = 17; channels < 32; channels++) {
6778 DWConvMicrokernelTester()
6779 .cr(16)
6780 .kr(9)
6781 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006783 }
6784 }
6785
6786 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
6787 TEST_REQUIRES_X86_XOP;
6788 for (uint32_t channels = 17; channels < 32; channels++) {
6789 DWConvMicrokernelTester()
6790 .cr(16)
6791 .kr(9)
6792 .channels(channels)
6793 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006794 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006795 }
6796 }
6797
6798 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
6799 TEST_REQUIRES_X86_XOP;
6800 for (uint32_t channels = 17; channels < 32; channels++) {
6801 DWConvMicrokernelTester()
6802 .cr(16)
6803 .kr(9)
6804 .channels(channels)
6805 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006807 }
6808 }
6809
6810 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel) {
6811 TEST_REQUIRES_X86_XOP;
6812 for (size_t channels = 1; channels <= 80; channels += 15) {
6813 DWConvMicrokernelTester()
6814 .cr(16)
6815 .kr(9)
6816 .channels(channels)
6817 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006818 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006819 }
6820 }
6821
6822 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_step) {
6823 TEST_REQUIRES_X86_XOP;
6824 for (size_t channels = 1; channels <= 80; channels += 15) {
6825 for (size_t step = 2; step <= 9; step++) {
6826 DWConvMicrokernelTester()
6827 .cr(16)
6828 .kr(9)
6829 .channels(channels)
6830 .width(3)
6831 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08006832 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006833 }
6834 }
6835 }
6836
6837 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
6838 TEST_REQUIRES_X86_XOP;
6839 for (size_t channels = 1; channels <= 80; channels += 15) {
6840 DWConvMicrokernelTester()
6841 .cr(16)
6842 .kr(9)
6843 .channels(16)
6844 .width(5)
6845 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006846 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006847 }
6848 }
6849
6850 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
6851 TEST_REQUIRES_X86_XOP;
6852 for (size_t channels = 1; channels <= 80; channels += 15) {
6853 DWConvMicrokernelTester()
6854 .cr(16)
6855 .kr(9)
6856 .channels(channels)
6857 .width(3)
6858 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006859 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006860 }
6861 }
6862
6863 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
6864 TEST_REQUIRES_X86_XOP;
6865 for (size_t channels = 1; channels <= 80; channels += 15) {
6866 DWConvMicrokernelTester()
6867 .cr(16)
6868 .kr(9)
6869 .channels(channels)
6870 .width(3)
6871 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006872 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006873 }
6874 }
6875
6876 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, input_offset) {
6877 TEST_REQUIRES_X86_XOP;
6878 for (uint32_t channels = 32; channels < 256; channels += 48) {
6879 DWConvMicrokernelTester()
6880 .cr(16)
6881 .kr(9)
6882 .channels(channels)
6883 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08006884 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006885 }
6886 }
6887
6888 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, zero) {
6889 TEST_REQUIRES_X86_XOP;
6890 for (uint32_t mz = 0; mz < 9; mz++) {
6891 for (uint32_t channels = 32; channels < 256; channels += 48) {
6892 DWConvMicrokernelTester()
6893 .cr(16)
6894 .kr(9)
6895 .channels(channels)
6896 .input_offset(304)
6897 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -07006899 }
6900 }
6901 }
6902#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6903
6904
6905#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan881ab022021-07-28 13:49:26 -07006906 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_eq_16) {
Marat Dukhan98042f22021-06-15 00:43:13 -07006907 TEST_REQUIRES_X86_AVX2;
6908 DWConvMicrokernelTester()
6909 .cr(16)
6910 .kr(9)
6911 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006912 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006913 }
6914
Marat Dukhan881ab022021-07-28 13:49:26 -07006915 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16) {
Marat Dukhan98042f22021-06-15 00:43:13 -07006916 TEST_REQUIRES_X86_AVX2;
6917 for (uint32_t channels = 32; channels < 256; channels += 48) {
6918 DWConvMicrokernelTester()
6919 .cr(16)
6920 .kr(9)
6921 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006923 }
6924 }
6925
Marat Dukhan881ab022021-07-28 13:49:26 -07006926 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
Marat Dukhan98042f22021-06-15 00:43:13 -07006927 TEST_REQUIRES_X86_AVX2;
6928 for (uint32_t channels = 32; channels < 256; channels += 48) {
6929 DWConvMicrokernelTester()
6930 .cr(16)
6931 .kr(9)
6932 .channels(channels)
6933 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006935 }
6936 }
6937
Marat Dukhan881ab022021-07-28 13:49:26 -07006938 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
Marat Dukhan98042f22021-06-15 00:43:13 -07006939 TEST_REQUIRES_X86_AVX2;
6940 for (uint32_t channels = 32; channels < 256; channels += 48) {
6941 DWConvMicrokernelTester()
6942 .cr(16)
6943 .kr(9)
6944 .channels(channels)
6945 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006947 }
6948 }
6949
Marat Dukhan881ab022021-07-28 13:49:26 -07006950 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_lt_16) {
Marat Dukhan98042f22021-06-15 00:43:13 -07006951 TEST_REQUIRES_X86_AVX2;
6952 for (uint32_t channels = 1; channels < 16; channels++) {
6953 DWConvMicrokernelTester()
6954 .cr(16)
6955 .kr(9)
6956 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006957 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006958 }
6959 }
6960
Marat Dukhan881ab022021-07-28 13:49:26 -07006961 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16) {
Marat Dukhan98042f22021-06-15 00:43:13 -07006962 TEST_REQUIRES_X86_AVX2;
6963 for (uint32_t channels = 17; channels < 32; channels++) {
6964 DWConvMicrokernelTester()
6965 .cr(16)
6966 .kr(9)
6967 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08006968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006969 }
6970 }
6971
Marat Dukhan881ab022021-07-28 13:49:26 -07006972 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
Marat Dukhan98042f22021-06-15 00:43:13 -07006973 TEST_REQUIRES_X86_AVX2;
6974 for (uint32_t channels = 17; channels < 32; channels++) {
6975 DWConvMicrokernelTester()
6976 .cr(16)
6977 .kr(9)
6978 .channels(channels)
6979 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006980 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006981 }
6982 }
6983
Marat Dukhan881ab022021-07-28 13:49:26 -07006984 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
Marat Dukhan98042f22021-06-15 00:43:13 -07006985 TEST_REQUIRES_X86_AVX2;
6986 for (uint32_t channels = 17; channels < 32; channels++) {
6987 DWConvMicrokernelTester()
6988 .cr(16)
6989 .kr(9)
6990 .channels(channels)
6991 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07006993 }
6994 }
6995
Marat Dukhan881ab022021-07-28 13:49:26 -07006996 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel) {
Marat Dukhan98042f22021-06-15 00:43:13 -07006997 TEST_REQUIRES_X86_AVX2;
6998 for (size_t channels = 1; channels <= 80; channels += 15) {
6999 DWConvMicrokernelTester()
7000 .cr(16)
7001 .kr(9)
7002 .channels(channels)
7003 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007004 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007005 }
7006 }
7007
Marat Dukhan881ab022021-07-28 13:49:26 -07007008 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007009 TEST_REQUIRES_X86_AVX2;
7010 for (size_t channels = 1; channels <= 80; channels += 15) {
7011 for (size_t step = 2; step <= 9; step++) {
7012 DWConvMicrokernelTester()
7013 .cr(16)
7014 .kr(9)
7015 .channels(channels)
7016 .width(3)
7017 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007018 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007019 }
7020 }
7021 }
7022
Marat Dukhan881ab022021-07-28 13:49:26 -07007023 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007024 TEST_REQUIRES_X86_AVX2;
7025 for (size_t channels = 1; channels <= 80; channels += 15) {
7026 DWConvMicrokernelTester()
7027 .cr(16)
7028 .kr(9)
7029 .channels(16)
7030 .width(5)
7031 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007032 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007033 }
7034 }
7035
Marat Dukhan881ab022021-07-28 13:49:26 -07007036 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007037 TEST_REQUIRES_X86_AVX2;
7038 for (size_t channels = 1; channels <= 80; channels += 15) {
7039 DWConvMicrokernelTester()
7040 .cr(16)
7041 .kr(9)
7042 .channels(channels)
7043 .width(3)
7044 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007045 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007046 }
7047 }
7048
Marat Dukhan881ab022021-07-28 13:49:26 -07007049 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007050 TEST_REQUIRES_X86_AVX2;
7051 for (size_t channels = 1; channels <= 80; channels += 15) {
7052 DWConvMicrokernelTester()
7053 .cr(16)
7054 .kr(9)
7055 .channels(channels)
7056 .width(3)
7057 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007058 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007059 }
7060 }
7061
Marat Dukhan881ab022021-07-28 13:49:26 -07007062 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, input_offset) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007063 TEST_REQUIRES_X86_AVX2;
7064 for (uint32_t channels = 32; channels < 256; channels += 48) {
7065 DWConvMicrokernelTester()
7066 .cr(16)
7067 .kr(9)
7068 .channels(channels)
7069 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08007070 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007071 }
7072 }
7073
Marat Dukhan881ab022021-07-28 13:49:26 -07007074 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, zero) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007075 TEST_REQUIRES_X86_AVX2;
7076 for (uint32_t mz = 0; mz < 9; mz++) {
7077 for (uint32_t channels = 32; channels < 256; channels += 48) {
7078 DWConvMicrokernelTester()
7079 .cr(16)
7080 .kr(9)
7081 .channels(channels)
7082 .input_offset(304)
7083 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007085 }
7086 }
7087 }
7088#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7089
7090
7091#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan881ab022021-07-28 13:49:26 -07007092 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_eq_32) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007093 TEST_REQUIRES_X86_AVX2;
7094 DWConvMicrokernelTester()
7095 .cr(32)
7096 .kr(9)
7097 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08007098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007099 }
7100
Marat Dukhan881ab022021-07-28 13:49:26 -07007101 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007102 TEST_REQUIRES_X86_AVX2;
7103 for (uint32_t channels = 64; channels < 512; channels += 96) {
7104 DWConvMicrokernelTester()
7105 .cr(32)
7106 .kr(9)
7107 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007109 }
7110 }
7111
Marat Dukhan881ab022021-07-28 13:49:26 -07007112 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007113 TEST_REQUIRES_X86_AVX2;
7114 for (uint32_t channels = 64; channels < 512; channels += 96) {
7115 DWConvMicrokernelTester()
7116 .cr(32)
7117 .kr(9)
7118 .channels(channels)
7119 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007120 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007121 }
7122 }
7123
Marat Dukhan881ab022021-07-28 13:49:26 -07007124 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007125 TEST_REQUIRES_X86_AVX2;
7126 for (uint32_t channels = 64; channels < 512; channels += 96) {
7127 DWConvMicrokernelTester()
7128 .cr(32)
7129 .kr(9)
7130 .channels(channels)
7131 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007133 }
7134 }
7135
Marat Dukhan881ab022021-07-28 13:49:26 -07007136 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_lt_32) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007137 TEST_REQUIRES_X86_AVX2;
7138 for (uint32_t channels = 1; channels < 32; channels++) {
7139 DWConvMicrokernelTester()
7140 .cr(32)
7141 .kr(9)
7142 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007143 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007144 }
7145 }
7146
Marat Dukhan881ab022021-07-28 13:49:26 -07007147 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007148 TEST_REQUIRES_X86_AVX2;
7149 for (uint32_t channels = 33; channels < 64; channels++) {
7150 DWConvMicrokernelTester()
7151 .cr(32)
7152 .kr(9)
7153 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007155 }
7156 }
7157
Marat Dukhan881ab022021-07-28 13:49:26 -07007158 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007159 TEST_REQUIRES_X86_AVX2;
7160 for (uint32_t channels = 33; channels < 64; channels++) {
7161 DWConvMicrokernelTester()
7162 .cr(32)
7163 .kr(9)
7164 .channels(channels)
7165 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007166 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007167 }
7168 }
7169
Marat Dukhan881ab022021-07-28 13:49:26 -07007170 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007171 TEST_REQUIRES_X86_AVX2;
7172 for (uint32_t channels = 33; channels < 64; channels++) {
7173 DWConvMicrokernelTester()
7174 .cr(32)
7175 .kr(9)
7176 .channels(channels)
7177 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007179 }
7180 }
7181
Marat Dukhan881ab022021-07-28 13:49:26 -07007182 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007183 TEST_REQUIRES_X86_AVX2;
7184 for (size_t channels = 1; channels <= 160; channels += 31) {
7185 DWConvMicrokernelTester()
7186 .cr(32)
7187 .kr(9)
7188 .channels(channels)
7189 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007190 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007191 }
7192 }
7193
Marat Dukhan881ab022021-07-28 13:49:26 -07007194 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007195 TEST_REQUIRES_X86_AVX2;
7196 for (size_t channels = 1; channels <= 160; channels += 31) {
7197 for (size_t step = 2; step <= 9; step++) {
7198 DWConvMicrokernelTester()
7199 .cr(32)
7200 .kr(9)
7201 .channels(channels)
7202 .width(3)
7203 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007204 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007205 }
7206 }
7207 }
7208
Marat Dukhan881ab022021-07-28 13:49:26 -07007209 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007210 TEST_REQUIRES_X86_AVX2;
7211 for (size_t channels = 1; channels <= 160; channels += 31) {
7212 DWConvMicrokernelTester()
7213 .cr(32)
7214 .kr(9)
7215 .channels(32)
7216 .width(5)
7217 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08007218 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007219 }
7220 }
7221
Marat Dukhan881ab022021-07-28 13:49:26 -07007222 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007223 TEST_REQUIRES_X86_AVX2;
7224 for (size_t channels = 1; channels <= 160; channels += 31) {
7225 DWConvMicrokernelTester()
7226 .cr(32)
7227 .kr(9)
7228 .channels(channels)
7229 .width(3)
7230 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007231 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007232 }
7233 }
7234
Marat Dukhan881ab022021-07-28 13:49:26 -07007235 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007236 TEST_REQUIRES_X86_AVX2;
7237 for (size_t channels = 1; channels <= 160; channels += 31) {
7238 DWConvMicrokernelTester()
7239 .cr(32)
7240 .kr(9)
7241 .channels(channels)
7242 .width(3)
7243 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007244 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007245 }
7246 }
7247
Marat Dukhan881ab022021-07-28 13:49:26 -07007248 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, input_offset) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007249 TEST_REQUIRES_X86_AVX2;
7250 for (uint32_t channels = 64; channels < 512; channels += 96) {
7251 DWConvMicrokernelTester()
7252 .cr(32)
7253 .kr(9)
7254 .channels(channels)
7255 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08007256 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007257 }
7258 }
7259
Marat Dukhan881ab022021-07-28 13:49:26 -07007260 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, zero) {
Marat Dukhan98042f22021-06-15 00:43:13 -07007261 TEST_REQUIRES_X86_AVX2;
7262 for (uint32_t mz = 0; mz < 9; mz++) {
7263 for (uint32_t channels = 64; channels < 512; channels += 96) {
7264 DWConvMicrokernelTester()
7265 .cr(32)
7266 .kr(9)
7267 .channels(channels)
7268 .input_offset(592)
7269 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007271 }
7272 }
7273 }
7274#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7275
7276
7277#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7278 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_eq_16) {
7279 TEST_REQUIRES_X86_AVX2;
7280 DWConvMicrokernelTester()
7281 .cr(16)
7282 .kr(9)
7283 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08007284 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007285 }
7286
7287 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16) {
7288 TEST_REQUIRES_X86_AVX2;
7289 for (uint32_t channels = 32; channels < 256; channels += 48) {
7290 DWConvMicrokernelTester()
7291 .cr(16)
7292 .kr(9)
7293 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007295 }
7296 }
7297
7298 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
7299 TEST_REQUIRES_X86_AVX2;
7300 for (uint32_t channels = 32; channels < 256; channels += 48) {
7301 DWConvMicrokernelTester()
7302 .cr(16)
7303 .kr(9)
7304 .channels(channels)
7305 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007307 }
7308 }
7309
7310 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
7311 TEST_REQUIRES_X86_AVX2;
7312 for (uint32_t channels = 32; channels < 256; channels += 48) {
7313 DWConvMicrokernelTester()
7314 .cr(16)
7315 .kr(9)
7316 .channels(channels)
7317 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007319 }
7320 }
7321
7322 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_lt_16) {
7323 TEST_REQUIRES_X86_AVX2;
7324 for (uint32_t channels = 1; channels < 16; channels++) {
7325 DWConvMicrokernelTester()
7326 .cr(16)
7327 .kr(9)
7328 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007329 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007330 }
7331 }
7332
7333 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16) {
7334 TEST_REQUIRES_X86_AVX2;
7335 for (uint32_t channels = 17; channels < 32; channels++) {
7336 DWConvMicrokernelTester()
7337 .cr(16)
7338 .kr(9)
7339 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007341 }
7342 }
7343
7344 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
7345 TEST_REQUIRES_X86_AVX2;
7346 for (uint32_t channels = 17; channels < 32; channels++) {
7347 DWConvMicrokernelTester()
7348 .cr(16)
7349 .kr(9)
7350 .channels(channels)
7351 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007352 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007353 }
7354 }
7355
7356 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
7357 TEST_REQUIRES_X86_AVX2;
7358 for (uint32_t channels = 17; channels < 32; channels++) {
7359 DWConvMicrokernelTester()
7360 .cr(16)
7361 .kr(9)
7362 .channels(channels)
7363 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007365 }
7366 }
7367
7368 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel) {
7369 TEST_REQUIRES_X86_AVX2;
7370 for (size_t channels = 1; channels <= 80; channels += 15) {
7371 DWConvMicrokernelTester()
7372 .cr(16)
7373 .kr(9)
7374 .channels(channels)
7375 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007376 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007377 }
7378 }
7379
7380 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
7381 TEST_REQUIRES_X86_AVX2;
7382 for (size_t channels = 1; channels <= 80; channels += 15) {
7383 for (size_t step = 2; step <= 9; step++) {
7384 DWConvMicrokernelTester()
7385 .cr(16)
7386 .kr(9)
7387 .channels(channels)
7388 .width(3)
7389 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007390 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007391 }
7392 }
7393 }
7394
7395 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
7396 TEST_REQUIRES_X86_AVX2;
7397 for (size_t channels = 1; channels <= 80; channels += 15) {
7398 DWConvMicrokernelTester()
7399 .cr(16)
7400 .kr(9)
7401 .channels(16)
7402 .width(5)
7403 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007404 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007405 }
7406 }
7407
7408 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
7409 TEST_REQUIRES_X86_AVX2;
7410 for (size_t channels = 1; channels <= 80; channels += 15) {
7411 DWConvMicrokernelTester()
7412 .cr(16)
7413 .kr(9)
7414 .channels(channels)
7415 .width(3)
7416 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007417 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007418 }
7419 }
7420
7421 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
7422 TEST_REQUIRES_X86_AVX2;
7423 for (size_t channels = 1; channels <= 80; channels += 15) {
7424 DWConvMicrokernelTester()
7425 .cr(16)
7426 .kr(9)
7427 .channels(channels)
7428 .width(3)
7429 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007430 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007431 }
7432 }
7433
7434 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, input_offset) {
7435 TEST_REQUIRES_X86_AVX2;
7436 for (uint32_t channels = 32; channels < 256; channels += 48) {
7437 DWConvMicrokernelTester()
7438 .cr(16)
7439 .kr(9)
7440 .channels(channels)
7441 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08007442 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007443 }
7444 }
7445
7446 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, zero) {
7447 TEST_REQUIRES_X86_AVX2;
7448 for (uint32_t mz = 0; mz < 9; mz++) {
7449 for (uint32_t channels = 32; channels < 256; channels += 48) {
7450 DWConvMicrokernelTester()
7451 .cr(16)
7452 .kr(9)
7453 .channels(channels)
7454 .input_offset(304)
7455 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007456 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007457 }
7458 }
7459 }
7460#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7461
7462
7463#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7464 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_eq_32) {
7465 TEST_REQUIRES_X86_AVX2;
7466 DWConvMicrokernelTester()
7467 .cr(32)
7468 .kr(9)
7469 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08007470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007471 }
7472
7473 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32) {
7474 TEST_REQUIRES_X86_AVX2;
7475 for (uint32_t channels = 64; channels < 512; channels += 96) {
7476 DWConvMicrokernelTester()
7477 .cr(32)
7478 .kr(9)
7479 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007481 }
7482 }
7483
7484 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
7485 TEST_REQUIRES_X86_AVX2;
7486 for (uint32_t channels = 64; channels < 512; channels += 96) {
7487 DWConvMicrokernelTester()
7488 .cr(32)
7489 .kr(9)
7490 .channels(channels)
7491 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007492 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007493 }
7494 }
7495
7496 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
7497 TEST_REQUIRES_X86_AVX2;
7498 for (uint32_t channels = 64; channels < 512; channels += 96) {
7499 DWConvMicrokernelTester()
7500 .cr(32)
7501 .kr(9)
7502 .channels(channels)
7503 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007505 }
7506 }
7507
7508 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_lt_32) {
7509 TEST_REQUIRES_X86_AVX2;
7510 for (uint32_t channels = 1; channels < 32; channels++) {
7511 DWConvMicrokernelTester()
7512 .cr(32)
7513 .kr(9)
7514 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007515 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007516 }
7517 }
7518
7519 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32) {
7520 TEST_REQUIRES_X86_AVX2;
7521 for (uint32_t channels = 33; channels < 64; channels++) {
7522 DWConvMicrokernelTester()
7523 .cr(32)
7524 .kr(9)
7525 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007527 }
7528 }
7529
7530 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
7531 TEST_REQUIRES_X86_AVX2;
7532 for (uint32_t channels = 33; channels < 64; channels++) {
7533 DWConvMicrokernelTester()
7534 .cr(32)
7535 .kr(9)
7536 .channels(channels)
7537 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007539 }
7540 }
7541
7542 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
7543 TEST_REQUIRES_X86_AVX2;
7544 for (uint32_t channels = 33; channels < 64; channels++) {
7545 DWConvMicrokernelTester()
7546 .cr(32)
7547 .kr(9)
7548 .channels(channels)
7549 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007551 }
7552 }
7553
7554 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel) {
7555 TEST_REQUIRES_X86_AVX2;
7556 for (size_t channels = 1; channels <= 160; channels += 31) {
7557 DWConvMicrokernelTester()
7558 .cr(32)
7559 .kr(9)
7560 .channels(channels)
7561 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007562 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007563 }
7564 }
7565
7566 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
7567 TEST_REQUIRES_X86_AVX2;
7568 for (size_t channels = 1; channels <= 160; channels += 31) {
7569 for (size_t step = 2; step <= 9; step++) {
7570 DWConvMicrokernelTester()
7571 .cr(32)
7572 .kr(9)
7573 .channels(channels)
7574 .width(3)
7575 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007576 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007577 }
7578 }
7579 }
7580
7581 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
7582 TEST_REQUIRES_X86_AVX2;
7583 for (size_t channels = 1; channels <= 160; channels += 31) {
7584 DWConvMicrokernelTester()
7585 .cr(32)
7586 .kr(9)
7587 .channels(32)
7588 .width(5)
7589 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08007590 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007591 }
7592 }
7593
7594 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
7595 TEST_REQUIRES_X86_AVX2;
7596 for (size_t channels = 1; channels <= 160; channels += 31) {
7597 DWConvMicrokernelTester()
7598 .cr(32)
7599 .kr(9)
7600 .channels(channels)
7601 .width(3)
7602 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007603 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007604 }
7605 }
7606
7607 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
7608 TEST_REQUIRES_X86_AVX2;
7609 for (size_t channels = 1; channels <= 160; channels += 31) {
7610 DWConvMicrokernelTester()
7611 .cr(32)
7612 .kr(9)
7613 .channels(channels)
7614 .width(3)
7615 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007616 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007617 }
7618 }
7619
7620 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, input_offset) {
7621 TEST_REQUIRES_X86_AVX2;
7622 for (uint32_t channels = 64; channels < 512; channels += 96) {
7623 DWConvMicrokernelTester()
7624 .cr(32)
7625 .kr(9)
7626 .channels(channels)
7627 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08007628 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -07007629 }
7630 }
7631
7632 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, zero) {
7633 TEST_REQUIRES_X86_AVX2;
7634 for (uint32_t mz = 0; mz < 9; mz++) {
7635 for (uint32_t channels = 64; channels < 512; channels += 96) {
7636 DWConvMicrokernelTester()
7637 .cr(32)
7638 .kr(9)
7639 .channels(channels)
7640 .input_offset(592)
7641 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07007643 }
7644 }
7645 }
7646#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7647
7648
7649#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007650 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
7651 TEST_REQUIRES_X86_AVX2;
7652 DWConvMicrokernelTester()
7653 .cr(16)
7654 .kr(9)
7655 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08007656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007657 }
7658
7659 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
7660 TEST_REQUIRES_X86_AVX2;
7661 for (uint32_t channels = 32; channels < 256; channels += 48) {
7662 DWConvMicrokernelTester()
7663 .cr(16)
7664 .kr(9)
7665 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007667 }
7668 }
7669
7670 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
7671 TEST_REQUIRES_X86_AVX2;
7672 for (uint32_t channels = 32; channels < 256; channels += 48) {
7673 DWConvMicrokernelTester()
7674 .cr(16)
7675 .kr(9)
7676 .channels(channels)
7677 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007678 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007679 }
7680 }
7681
7682 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
7683 TEST_REQUIRES_X86_AVX2;
7684 for (uint32_t channels = 32; channels < 256; channels += 48) {
7685 DWConvMicrokernelTester()
7686 .cr(16)
7687 .kr(9)
7688 .channels(channels)
7689 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007691 }
7692 }
7693
7694 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
7695 TEST_REQUIRES_X86_AVX2;
7696 for (uint32_t channels = 1; channels < 16; channels++) {
7697 DWConvMicrokernelTester()
7698 .cr(16)
7699 .kr(9)
7700 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007701 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007702 }
7703 }
7704
7705 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
7706 TEST_REQUIRES_X86_AVX2;
7707 for (uint32_t channels = 17; channels < 32; channels++) {
7708 DWConvMicrokernelTester()
7709 .cr(16)
7710 .kr(9)
7711 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007713 }
7714 }
7715
7716 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
7717 TEST_REQUIRES_X86_AVX2;
7718 for (uint32_t channels = 17; channels < 32; channels++) {
7719 DWConvMicrokernelTester()
7720 .cr(16)
7721 .kr(9)
7722 .channels(channels)
7723 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007724 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007725 }
7726 }
7727
7728 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
7729 TEST_REQUIRES_X86_AVX2;
7730 for (uint32_t channels = 17; channels < 32; channels++) {
7731 DWConvMicrokernelTester()
7732 .cr(16)
7733 .kr(9)
7734 .channels(channels)
7735 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007737 }
7738 }
7739
7740 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
7741 TEST_REQUIRES_X86_AVX2;
7742 for (size_t channels = 1; channels <= 80; channels += 15) {
7743 DWConvMicrokernelTester()
7744 .cr(16)
7745 .kr(9)
7746 .channels(channels)
7747 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007748 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007749 }
7750 }
7751
7752 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
7753 TEST_REQUIRES_X86_AVX2;
7754 for (size_t channels = 1; channels <= 80; channels += 15) {
7755 for (size_t step = 2; step <= 9; step++) {
7756 DWConvMicrokernelTester()
7757 .cr(16)
7758 .kr(9)
7759 .channels(channels)
7760 .width(3)
7761 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007762 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007763 }
7764 }
7765 }
7766
7767 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
7768 TEST_REQUIRES_X86_AVX2;
7769 for (size_t channels = 1; channels <= 80; channels += 15) {
7770 DWConvMicrokernelTester()
7771 .cr(16)
7772 .kr(9)
7773 .channels(16)
7774 .width(5)
7775 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007776 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007777 }
7778 }
7779
7780 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
7781 TEST_REQUIRES_X86_AVX2;
7782 for (size_t channels = 1; channels <= 80; channels += 15) {
7783 DWConvMicrokernelTester()
7784 .cr(16)
7785 .kr(9)
7786 .channels(channels)
7787 .width(3)
7788 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007789 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007790 }
7791 }
7792
7793 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
7794 TEST_REQUIRES_X86_AVX2;
7795 for (size_t channels = 1; channels <= 80; channels += 15) {
7796 DWConvMicrokernelTester()
7797 .cr(16)
7798 .kr(9)
7799 .channels(channels)
7800 .width(3)
7801 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007802 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007803 }
7804 }
7805
7806 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
7807 TEST_REQUIRES_X86_AVX2;
7808 for (uint32_t channels = 32; channels < 256; channels += 48) {
7809 DWConvMicrokernelTester()
7810 .cr(16)
7811 .kr(9)
7812 .channels(channels)
7813 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08007814 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007815 }
7816 }
7817
7818 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
7819 TEST_REQUIRES_X86_AVX2;
7820 for (uint32_t mz = 0; mz < 9; mz++) {
7821 for (uint32_t channels = 32; channels < 256; channels += 48) {
7822 DWConvMicrokernelTester()
7823 .cr(16)
7824 .kr(9)
7825 .channels(channels)
7826 .input_offset(304)
7827 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007828 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007829 }
7830 }
7831 }
7832#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7833
7834
7835#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7836 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
7837 TEST_REQUIRES_X86_AVX2;
7838 DWConvMicrokernelTester()
7839 .cr(32)
7840 .kr(9)
7841 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -08007842 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007843 }
7844
7845 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
7846 TEST_REQUIRES_X86_AVX2;
7847 for (uint32_t channels = 64; channels < 512; channels += 96) {
7848 DWConvMicrokernelTester()
7849 .cr(32)
7850 .kr(9)
7851 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007853 }
7854 }
7855
7856 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
7857 TEST_REQUIRES_X86_AVX2;
7858 for (uint32_t channels = 64; channels < 512; channels += 96) {
7859 DWConvMicrokernelTester()
7860 .cr(32)
7861 .kr(9)
7862 .channels(channels)
7863 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007864 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007865 }
7866 }
7867
7868 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
7869 TEST_REQUIRES_X86_AVX2;
7870 for (uint32_t channels = 64; channels < 512; channels += 96) {
7871 DWConvMicrokernelTester()
7872 .cr(32)
7873 .kr(9)
7874 .channels(channels)
7875 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007877 }
7878 }
7879
7880 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
7881 TEST_REQUIRES_X86_AVX2;
7882 for (uint32_t channels = 1; channels < 32; channels++) {
7883 DWConvMicrokernelTester()
7884 .cr(32)
7885 .kr(9)
7886 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007887 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007888 }
7889 }
7890
7891 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
7892 TEST_REQUIRES_X86_AVX2;
7893 for (uint32_t channels = 33; channels < 64; channels++) {
7894 DWConvMicrokernelTester()
7895 .cr(32)
7896 .kr(9)
7897 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08007898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007899 }
7900 }
7901
7902 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
7903 TEST_REQUIRES_X86_AVX2;
7904 for (uint32_t channels = 33; channels < 64; channels++) {
7905 DWConvMicrokernelTester()
7906 .cr(32)
7907 .kr(9)
7908 .channels(channels)
7909 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007910 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007911 }
7912 }
7913
7914 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
7915 TEST_REQUIRES_X86_AVX2;
7916 for (uint32_t channels = 33; channels < 64; channels++) {
7917 DWConvMicrokernelTester()
7918 .cr(32)
7919 .kr(9)
7920 .channels(channels)
7921 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007923 }
7924 }
7925
7926 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
7927 TEST_REQUIRES_X86_AVX2;
7928 for (size_t channels = 1; channels <= 160; channels += 31) {
7929 DWConvMicrokernelTester()
7930 .cr(32)
7931 .kr(9)
7932 .channels(channels)
7933 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007935 }
7936 }
7937
7938 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
7939 TEST_REQUIRES_X86_AVX2;
7940 for (size_t channels = 1; channels <= 160; channels += 31) {
7941 for (size_t step = 2; step <= 9; step++) {
7942 DWConvMicrokernelTester()
7943 .cr(32)
7944 .kr(9)
7945 .channels(channels)
7946 .width(3)
7947 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08007948 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007949 }
7950 }
7951 }
7952
7953 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
7954 TEST_REQUIRES_X86_AVX2;
7955 for (size_t channels = 1; channels <= 160; channels += 31) {
7956 DWConvMicrokernelTester()
7957 .cr(32)
7958 .kr(9)
7959 .channels(32)
7960 .width(5)
7961 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08007962 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007963 }
7964 }
7965
7966 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
7967 TEST_REQUIRES_X86_AVX2;
7968 for (size_t channels = 1; channels <= 160; channels += 31) {
7969 DWConvMicrokernelTester()
7970 .cr(32)
7971 .kr(9)
7972 .channels(channels)
7973 .width(3)
7974 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007975 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007976 }
7977 }
7978
7979 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
7980 TEST_REQUIRES_X86_AVX2;
7981 for (size_t channels = 1; channels <= 160; channels += 31) {
7982 DWConvMicrokernelTester()
7983 .cr(32)
7984 .kr(9)
7985 .channels(channels)
7986 .width(3)
7987 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007988 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07007989 }
7990 }
7991
7992 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
7993 TEST_REQUIRES_X86_AVX2;
7994 for (uint32_t channels = 64; channels < 512; channels += 96) {
7995 DWConvMicrokernelTester()
7996 .cr(32)
7997 .kr(9)
7998 .channels(channels)
7999 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -08008000 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07008001 }
8002 }
8003
8004 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
8005 TEST_REQUIRES_X86_AVX2;
8006 for (uint32_t mz = 0; mz < 9; mz++) {
8007 for (uint32_t channels = 64; channels < 512; channels += 96) {
8008 DWConvMicrokernelTester()
8009 .cr(32)
8010 .kr(9)
8011 .channels(channels)
8012 .input_offset(592)
8013 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008014 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -07008015 }
8016 }
8017 }
8018#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8019
8020
8021#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan98042f22021-06-15 00:43:13 -07008022 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_eq_8) {
8023 TEST_REQUIRES_X86_SSE41;
8024 DWConvMicrokernelTester()
8025 .cr(8)
8026 .kr(9)
8027 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008028 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008029 }
8030
8031 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8) {
8032 TEST_REQUIRES_X86_SSE41;
8033 for (uint32_t channels = 16; channels < 128; channels += 24) {
8034 DWConvMicrokernelTester()
8035 .cr(8)
8036 .kr(9)
8037 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008039 }
8040 }
8041
8042 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
8043 TEST_REQUIRES_X86_SSE41;
8044 for (uint32_t channels = 16; channels < 128; channels += 24) {
8045 DWConvMicrokernelTester()
8046 .cr(8)
8047 .kr(9)
8048 .channels(channels)
8049 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008050 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008051 }
8052 }
8053
8054 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
8055 TEST_REQUIRES_X86_SSE41;
8056 for (uint32_t channels = 16; channels < 128; channels += 24) {
8057 DWConvMicrokernelTester()
8058 .cr(8)
8059 .kr(9)
8060 .channels(channels)
8061 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008063 }
8064 }
8065
8066 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_lt_8) {
8067 TEST_REQUIRES_X86_SSE41;
8068 for (uint32_t channels = 1; channels < 8; channels++) {
8069 DWConvMicrokernelTester()
8070 .cr(8)
8071 .kr(9)
8072 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008073 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008074 }
8075 }
8076
8077 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8) {
8078 TEST_REQUIRES_X86_SSE41;
8079 for (uint32_t channels = 9; channels < 16; channels++) {
8080 DWConvMicrokernelTester()
8081 .cr(8)
8082 .kr(9)
8083 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008085 }
8086 }
8087
8088 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
8089 TEST_REQUIRES_X86_SSE41;
8090 for (uint32_t channels = 9; channels < 16; channels++) {
8091 DWConvMicrokernelTester()
8092 .cr(8)
8093 .kr(9)
8094 .channels(channels)
8095 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008096 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008097 }
8098 }
8099
8100 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
8101 TEST_REQUIRES_X86_SSE41;
8102 for (uint32_t channels = 9; channels < 16; channels++) {
8103 DWConvMicrokernelTester()
8104 .cr(8)
8105 .kr(9)
8106 .channels(channels)
8107 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008109 }
8110 }
8111
8112 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel) {
8113 TEST_REQUIRES_X86_SSE41;
8114 for (size_t channels = 1; channels <= 40; channels += 7) {
8115 DWConvMicrokernelTester()
8116 .cr(8)
8117 .kr(9)
8118 .channels(channels)
8119 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008120 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008121 }
8122 }
8123
8124 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_step) {
8125 TEST_REQUIRES_X86_SSE41;
8126 for (size_t channels = 1; channels <= 40; channels += 7) {
8127 for (size_t step = 2; step <= 9; step++) {
8128 DWConvMicrokernelTester()
8129 .cr(8)
8130 .kr(9)
8131 .channels(channels)
8132 .width(3)
8133 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008134 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008135 }
8136 }
8137 }
8138
8139 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
8140 TEST_REQUIRES_X86_SSE41;
8141 for (size_t channels = 1; channels <= 40; channels += 7) {
8142 DWConvMicrokernelTester()
8143 .cr(8)
8144 .kr(9)
8145 .channels(8)
8146 .width(5)
8147 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008148 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008149 }
8150 }
8151
8152 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
8153 TEST_REQUIRES_X86_SSE41;
8154 for (size_t channels = 1; channels <= 40; channels += 7) {
8155 DWConvMicrokernelTester()
8156 .cr(8)
8157 .kr(9)
8158 .channels(channels)
8159 .width(3)
8160 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008161 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008162 }
8163 }
8164
8165 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
8166 TEST_REQUIRES_X86_SSE41;
8167 for (size_t channels = 1; channels <= 40; channels += 7) {
8168 DWConvMicrokernelTester()
8169 .cr(8)
8170 .kr(9)
8171 .channels(channels)
8172 .width(3)
8173 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008175 }
8176 }
8177
8178 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, input_offset) {
8179 TEST_REQUIRES_X86_SSE41;
8180 for (uint32_t channels = 16; channels < 128; channels += 24) {
8181 DWConvMicrokernelTester()
8182 .cr(8)
8183 .kr(9)
8184 .channels(channels)
8185 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08008186 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008187 }
8188 }
8189
8190 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, zero) {
8191 TEST_REQUIRES_X86_SSE41;
8192 for (uint32_t mz = 0; mz < 9; mz++) {
8193 for (uint32_t channels = 16; channels < 128; channels += 24) {
8194 DWConvMicrokernelTester()
8195 .cr(8)
8196 .kr(9)
8197 .channels(channels)
8198 .input_offset(176)
8199 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008201 }
8202 }
8203 }
8204#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8205
8206
8207#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8208 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_eq_16) {
8209 TEST_REQUIRES_X86_SSE41;
8210 DWConvMicrokernelTester()
8211 .cr(16)
8212 .kr(9)
8213 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008215 }
8216
8217 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16) {
8218 TEST_REQUIRES_X86_SSE41;
8219 for (uint32_t channels = 32; channels < 256; channels += 48) {
8220 DWConvMicrokernelTester()
8221 .cr(16)
8222 .kr(9)
8223 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008225 }
8226 }
8227
8228 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
8229 TEST_REQUIRES_X86_SSE41;
8230 for (uint32_t channels = 32; channels < 256; channels += 48) {
8231 DWConvMicrokernelTester()
8232 .cr(16)
8233 .kr(9)
8234 .channels(channels)
8235 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008236 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008237 }
8238 }
8239
8240 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
8241 TEST_REQUIRES_X86_SSE41;
8242 for (uint32_t channels = 32; channels < 256; channels += 48) {
8243 DWConvMicrokernelTester()
8244 .cr(16)
8245 .kr(9)
8246 .channels(channels)
8247 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008249 }
8250 }
8251
8252 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_lt_16) {
8253 TEST_REQUIRES_X86_SSE41;
8254 for (uint32_t channels = 1; channels < 16; channels++) {
8255 DWConvMicrokernelTester()
8256 .cr(16)
8257 .kr(9)
8258 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008259 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008260 }
8261 }
8262
8263 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16) {
8264 TEST_REQUIRES_X86_SSE41;
8265 for (uint32_t channels = 17; channels < 32; channels++) {
8266 DWConvMicrokernelTester()
8267 .cr(16)
8268 .kr(9)
8269 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008271 }
8272 }
8273
8274 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
8275 TEST_REQUIRES_X86_SSE41;
8276 for (uint32_t channels = 17; channels < 32; channels++) {
8277 DWConvMicrokernelTester()
8278 .cr(16)
8279 .kr(9)
8280 .channels(channels)
8281 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008282 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008283 }
8284 }
8285
8286 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
8287 TEST_REQUIRES_X86_SSE41;
8288 for (uint32_t channels = 17; channels < 32; channels++) {
8289 DWConvMicrokernelTester()
8290 .cr(16)
8291 .kr(9)
8292 .channels(channels)
8293 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008295 }
8296 }
8297
8298 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel) {
8299 TEST_REQUIRES_X86_SSE41;
8300 for (size_t channels = 1; channels <= 80; channels += 15) {
8301 DWConvMicrokernelTester()
8302 .cr(16)
8303 .kr(9)
8304 .channels(channels)
8305 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008307 }
8308 }
8309
8310 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_step) {
8311 TEST_REQUIRES_X86_SSE41;
8312 for (size_t channels = 1; channels <= 80; channels += 15) {
8313 for (size_t step = 2; step <= 9; step++) {
8314 DWConvMicrokernelTester()
8315 .cr(16)
8316 .kr(9)
8317 .channels(channels)
8318 .width(3)
8319 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008320 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008321 }
8322 }
8323 }
8324
8325 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
8326 TEST_REQUIRES_X86_SSE41;
8327 for (size_t channels = 1; channels <= 80; channels += 15) {
8328 DWConvMicrokernelTester()
8329 .cr(16)
8330 .kr(9)
8331 .channels(16)
8332 .width(5)
8333 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008334 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008335 }
8336 }
8337
8338 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
8339 TEST_REQUIRES_X86_SSE41;
8340 for (size_t channels = 1; channels <= 80; channels += 15) {
8341 DWConvMicrokernelTester()
8342 .cr(16)
8343 .kr(9)
8344 .channels(channels)
8345 .width(3)
8346 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008347 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008348 }
8349 }
8350
8351 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
8352 TEST_REQUIRES_X86_SSE41;
8353 for (size_t channels = 1; channels <= 80; channels += 15) {
8354 DWConvMicrokernelTester()
8355 .cr(16)
8356 .kr(9)
8357 .channels(channels)
8358 .width(3)
8359 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008360 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008361 }
8362 }
8363
8364 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, input_offset) {
8365 TEST_REQUIRES_X86_SSE41;
8366 for (uint32_t channels = 32; channels < 256; channels += 48) {
8367 DWConvMicrokernelTester()
8368 .cr(16)
8369 .kr(9)
8370 .channels(channels)
8371 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08008372 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008373 }
8374 }
8375
8376 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, zero) {
8377 TEST_REQUIRES_X86_SSE41;
8378 for (uint32_t mz = 0; mz < 9; mz++) {
8379 for (uint32_t channels = 32; channels < 256; channels += 48) {
8380 DWConvMicrokernelTester()
8381 .cr(16)
8382 .kr(9)
8383 .channels(channels)
8384 .input_offset(304)
8385 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008386 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008387 }
8388 }
8389 }
8390#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8391
8392
8393#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8394 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_eq_24) {
8395 TEST_REQUIRES_X86_SSE41;
8396 DWConvMicrokernelTester()
8397 .cr(24)
8398 .kr(9)
8399 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08008400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008401 }
8402
8403 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24) {
8404 TEST_REQUIRES_X86_SSE41;
8405 for (uint32_t channels = 48; channels < 384; channels += 72) {
8406 DWConvMicrokernelTester()
8407 .cr(24)
8408 .kr(9)
8409 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008411 }
8412 }
8413
8414 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmin) {
8415 TEST_REQUIRES_X86_SSE41;
8416 for (uint32_t channels = 48; channels < 384; channels += 72) {
8417 DWConvMicrokernelTester()
8418 .cr(24)
8419 .kr(9)
8420 .channels(channels)
8421 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008422 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008423 }
8424 }
8425
8426 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmax) {
8427 TEST_REQUIRES_X86_SSE41;
8428 for (uint32_t channels = 48; channels < 384; channels += 72) {
8429 DWConvMicrokernelTester()
8430 .cr(24)
8431 .kr(9)
8432 .channels(channels)
8433 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008435 }
8436 }
8437
8438 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_lt_24) {
8439 TEST_REQUIRES_X86_SSE41;
8440 for (uint32_t channels = 1; channels < 24; channels++) {
8441 DWConvMicrokernelTester()
8442 .cr(24)
8443 .kr(9)
8444 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008445 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008446 }
8447 }
8448
8449 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24) {
8450 TEST_REQUIRES_X86_SSE41;
8451 for (uint32_t channels = 25; channels < 48; channels++) {
8452 DWConvMicrokernelTester()
8453 .cr(24)
8454 .kr(9)
8455 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008456 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008457 }
8458 }
8459
8460 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmin) {
8461 TEST_REQUIRES_X86_SSE41;
8462 for (uint32_t channels = 25; channels < 48; channels++) {
8463 DWConvMicrokernelTester()
8464 .cr(24)
8465 .kr(9)
8466 .channels(channels)
8467 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008468 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008469 }
8470 }
8471
8472 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmax) {
8473 TEST_REQUIRES_X86_SSE41;
8474 for (uint32_t channels = 25; channels < 48; channels++) {
8475 DWConvMicrokernelTester()
8476 .cr(24)
8477 .kr(9)
8478 .channels(channels)
8479 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008481 }
8482 }
8483
8484 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel) {
8485 TEST_REQUIRES_X86_SSE41;
8486 for (size_t channels = 1; channels <= 120; channels += 23) {
8487 DWConvMicrokernelTester()
8488 .cr(24)
8489 .kr(9)
8490 .channels(channels)
8491 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008492 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008493 }
8494 }
8495
8496 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_step) {
8497 TEST_REQUIRES_X86_SSE41;
8498 for (size_t channels = 1; channels <= 120; channels += 23) {
8499 for (size_t step = 2; step <= 9; step++) {
8500 DWConvMicrokernelTester()
8501 .cr(24)
8502 .kr(9)
8503 .channels(channels)
8504 .width(3)
8505 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008506 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008507 }
8508 }
8509 }
8510
8511 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_output_stride) {
8512 TEST_REQUIRES_X86_SSE41;
8513 for (size_t channels = 1; channels <= 120; channels += 23) {
8514 DWConvMicrokernelTester()
8515 .cr(24)
8516 .kr(9)
8517 .channels(24)
8518 .width(5)
8519 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08008520 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008521 }
8522 }
8523
8524 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmin) {
8525 TEST_REQUIRES_X86_SSE41;
8526 for (size_t channels = 1; channels <= 120; channels += 23) {
8527 DWConvMicrokernelTester()
8528 .cr(24)
8529 .kr(9)
8530 .channels(channels)
8531 .width(3)
8532 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008533 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008534 }
8535 }
8536
8537 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmax) {
8538 TEST_REQUIRES_X86_SSE41;
8539 for (size_t channels = 1; channels <= 120; channels += 23) {
8540 DWConvMicrokernelTester()
8541 .cr(24)
8542 .kr(9)
8543 .channels(channels)
8544 .width(3)
8545 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008546 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008547 }
8548 }
8549
8550 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, input_offset) {
8551 TEST_REQUIRES_X86_SSE41;
8552 for (uint32_t channels = 48; channels < 384; channels += 72) {
8553 DWConvMicrokernelTester()
8554 .cr(24)
8555 .kr(9)
8556 .channels(channels)
8557 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08008558 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008559 }
8560 }
8561
8562 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, zero) {
8563 TEST_REQUIRES_X86_SSE41;
8564 for (uint32_t mz = 0; mz < 9; mz++) {
8565 for (uint32_t channels = 48; channels < 384; channels += 72) {
8566 DWConvMicrokernelTester()
8567 .cr(24)
8568 .kr(9)
8569 .channels(channels)
8570 .input_offset(464)
8571 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008573 }
8574 }
8575 }
8576#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8577
8578
8579#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8580 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_eq_8) {
8581 TEST_REQUIRES_X86_AVX;
8582 DWConvMicrokernelTester()
8583 .cr(8)
8584 .kr(9)
8585 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008587 }
8588
8589 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8) {
8590 TEST_REQUIRES_X86_AVX;
8591 for (uint32_t channels = 16; channels < 128; channels += 24) {
8592 DWConvMicrokernelTester()
8593 .cr(8)
8594 .kr(9)
8595 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008597 }
8598 }
8599
8600 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
8601 TEST_REQUIRES_X86_AVX;
8602 for (uint32_t channels = 16; channels < 128; channels += 24) {
8603 DWConvMicrokernelTester()
8604 .cr(8)
8605 .kr(9)
8606 .channels(channels)
8607 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008608 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008609 }
8610 }
8611
8612 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
8613 TEST_REQUIRES_X86_AVX;
8614 for (uint32_t channels = 16; channels < 128; channels += 24) {
8615 DWConvMicrokernelTester()
8616 .cr(8)
8617 .kr(9)
8618 .channels(channels)
8619 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008621 }
8622 }
8623
8624 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_lt_8) {
8625 TEST_REQUIRES_X86_AVX;
8626 for (uint32_t channels = 1; channels < 8; channels++) {
8627 DWConvMicrokernelTester()
8628 .cr(8)
8629 .kr(9)
8630 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008631 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008632 }
8633 }
8634
8635 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8) {
8636 TEST_REQUIRES_X86_AVX;
8637 for (uint32_t channels = 9; channels < 16; channels++) {
8638 DWConvMicrokernelTester()
8639 .cr(8)
8640 .kr(9)
8641 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008643 }
8644 }
8645
8646 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
8647 TEST_REQUIRES_X86_AVX;
8648 for (uint32_t channels = 9; channels < 16; channels++) {
8649 DWConvMicrokernelTester()
8650 .cr(8)
8651 .kr(9)
8652 .channels(channels)
8653 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008654 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008655 }
8656 }
8657
8658 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
8659 TEST_REQUIRES_X86_AVX;
8660 for (uint32_t channels = 9; channels < 16; channels++) {
8661 DWConvMicrokernelTester()
8662 .cr(8)
8663 .kr(9)
8664 .channels(channels)
8665 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008667 }
8668 }
8669
8670 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel) {
8671 TEST_REQUIRES_X86_AVX;
8672 for (size_t channels = 1; channels <= 40; channels += 7) {
8673 DWConvMicrokernelTester()
8674 .cr(8)
8675 .kr(9)
8676 .channels(channels)
8677 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008678 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008679 }
8680 }
8681
8682 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_step) {
8683 TEST_REQUIRES_X86_AVX;
8684 for (size_t channels = 1; channels <= 40; channels += 7) {
8685 for (size_t step = 2; step <= 9; step++) {
8686 DWConvMicrokernelTester()
8687 .cr(8)
8688 .kr(9)
8689 .channels(channels)
8690 .width(3)
8691 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008692 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008693 }
8694 }
8695 }
8696
8697 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
8698 TEST_REQUIRES_X86_AVX;
8699 for (size_t channels = 1; channels <= 40; channels += 7) {
8700 DWConvMicrokernelTester()
8701 .cr(8)
8702 .kr(9)
8703 .channels(8)
8704 .width(5)
8705 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008706 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008707 }
8708 }
8709
8710 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmin) {
8711 TEST_REQUIRES_X86_AVX;
8712 for (size_t channels = 1; channels <= 40; channels += 7) {
8713 DWConvMicrokernelTester()
8714 .cr(8)
8715 .kr(9)
8716 .channels(channels)
8717 .width(3)
8718 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008719 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008720 }
8721 }
8722
8723 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmax) {
8724 TEST_REQUIRES_X86_AVX;
8725 for (size_t channels = 1; channels <= 40; channels += 7) {
8726 DWConvMicrokernelTester()
8727 .cr(8)
8728 .kr(9)
8729 .channels(channels)
8730 .width(3)
8731 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008732 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008733 }
8734 }
8735
8736 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, input_offset) {
8737 TEST_REQUIRES_X86_AVX;
8738 for (uint32_t channels = 16; channels < 128; channels += 24) {
8739 DWConvMicrokernelTester()
8740 .cr(8)
8741 .kr(9)
8742 .channels(channels)
8743 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08008744 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008745 }
8746 }
8747
8748 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, zero) {
8749 TEST_REQUIRES_X86_AVX;
8750 for (uint32_t mz = 0; mz < 9; mz++) {
8751 for (uint32_t channels = 16; channels < 128; channels += 24) {
8752 DWConvMicrokernelTester()
8753 .cr(8)
8754 .kr(9)
8755 .channels(channels)
8756 .input_offset(176)
8757 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008758 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008759 }
8760 }
8761 }
8762#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8763
8764
8765#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8766 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_eq_16) {
8767 TEST_REQUIRES_X86_AVX;
8768 DWConvMicrokernelTester()
8769 .cr(16)
8770 .kr(9)
8771 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008772 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008773 }
8774
8775 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16) {
8776 TEST_REQUIRES_X86_AVX;
8777 for (uint32_t channels = 32; channels < 256; channels += 48) {
8778 DWConvMicrokernelTester()
8779 .cr(16)
8780 .kr(9)
8781 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008783 }
8784 }
8785
8786 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
8787 TEST_REQUIRES_X86_AVX;
8788 for (uint32_t channels = 32; channels < 256; channels += 48) {
8789 DWConvMicrokernelTester()
8790 .cr(16)
8791 .kr(9)
8792 .channels(channels)
8793 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008794 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008795 }
8796 }
8797
8798 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
8799 TEST_REQUIRES_X86_AVX;
8800 for (uint32_t channels = 32; channels < 256; channels += 48) {
8801 DWConvMicrokernelTester()
8802 .cr(16)
8803 .kr(9)
8804 .channels(channels)
8805 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008807 }
8808 }
8809
8810 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_lt_16) {
8811 TEST_REQUIRES_X86_AVX;
8812 for (uint32_t channels = 1; channels < 16; channels++) {
8813 DWConvMicrokernelTester()
8814 .cr(16)
8815 .kr(9)
8816 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008817 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008818 }
8819 }
8820
8821 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16) {
8822 TEST_REQUIRES_X86_AVX;
8823 for (uint32_t channels = 17; channels < 32; channels++) {
8824 DWConvMicrokernelTester()
8825 .cr(16)
8826 .kr(9)
8827 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008828 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008829 }
8830 }
8831
8832 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
8833 TEST_REQUIRES_X86_AVX;
8834 for (uint32_t channels = 17; channels < 32; channels++) {
8835 DWConvMicrokernelTester()
8836 .cr(16)
8837 .kr(9)
8838 .channels(channels)
8839 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008840 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008841 }
8842 }
8843
8844 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
8845 TEST_REQUIRES_X86_AVX;
8846 for (uint32_t channels = 17; channels < 32; channels++) {
8847 DWConvMicrokernelTester()
8848 .cr(16)
8849 .kr(9)
8850 .channels(channels)
8851 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008853 }
8854 }
8855
8856 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel) {
8857 TEST_REQUIRES_X86_AVX;
8858 for (size_t channels = 1; channels <= 80; channels += 15) {
8859 DWConvMicrokernelTester()
8860 .cr(16)
8861 .kr(9)
8862 .channels(channels)
8863 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008864 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008865 }
8866 }
8867
8868 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_step) {
8869 TEST_REQUIRES_X86_AVX;
8870 for (size_t channels = 1; channels <= 80; channels += 15) {
8871 for (size_t step = 2; step <= 9; step++) {
8872 DWConvMicrokernelTester()
8873 .cr(16)
8874 .kr(9)
8875 .channels(channels)
8876 .width(3)
8877 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08008878 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008879 }
8880 }
8881 }
8882
8883 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
8884 TEST_REQUIRES_X86_AVX;
8885 for (size_t channels = 1; channels <= 80; channels += 15) {
8886 DWConvMicrokernelTester()
8887 .cr(16)
8888 .kr(9)
8889 .channels(16)
8890 .width(5)
8891 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008892 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008893 }
8894 }
8895
8896 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmin) {
8897 TEST_REQUIRES_X86_AVX;
8898 for (size_t channels = 1; channels <= 80; channels += 15) {
8899 DWConvMicrokernelTester()
8900 .cr(16)
8901 .kr(9)
8902 .channels(channels)
8903 .width(3)
8904 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008905 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008906 }
8907 }
8908
8909 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmax) {
8910 TEST_REQUIRES_X86_AVX;
8911 for (size_t channels = 1; channels <= 80; channels += 15) {
8912 DWConvMicrokernelTester()
8913 .cr(16)
8914 .kr(9)
8915 .channels(channels)
8916 .width(3)
8917 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008918 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008919 }
8920 }
8921
8922 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, input_offset) {
8923 TEST_REQUIRES_X86_AVX;
8924 for (uint32_t channels = 32; channels < 256; channels += 48) {
8925 DWConvMicrokernelTester()
8926 .cr(16)
8927 .kr(9)
8928 .channels(channels)
8929 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08008930 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008931 }
8932 }
8933
8934 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, zero) {
8935 TEST_REQUIRES_X86_AVX;
8936 for (uint32_t mz = 0; mz < 9; mz++) {
8937 for (uint32_t channels = 32; channels < 256; channels += 48) {
8938 DWConvMicrokernelTester()
8939 .cr(16)
8940 .kr(9)
8941 .channels(channels)
8942 .input_offset(304)
8943 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008944 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008945 }
8946 }
8947 }
8948#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8949
8950
8951#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8952 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_eq_24) {
8953 TEST_REQUIRES_X86_AVX;
8954 DWConvMicrokernelTester()
8955 .cr(24)
8956 .kr(9)
8957 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08008958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008959 }
8960
8961 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24) {
8962 TEST_REQUIRES_X86_AVX;
8963 for (uint32_t channels = 48; channels < 384; channels += 72) {
8964 DWConvMicrokernelTester()
8965 .cr(24)
8966 .kr(9)
8967 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08008968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008969 }
8970 }
8971
8972 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmin) {
8973 TEST_REQUIRES_X86_AVX;
8974 for (uint32_t channels = 48; channels < 384; channels += 72) {
8975 DWConvMicrokernelTester()
8976 .cr(24)
8977 .kr(9)
8978 .channels(channels)
8979 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008980 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008981 }
8982 }
8983
8984 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmax) {
8985 TEST_REQUIRES_X86_AVX;
8986 for (uint32_t channels = 48; channels < 384; channels += 72) {
8987 DWConvMicrokernelTester()
8988 .cr(24)
8989 .kr(9)
8990 .channels(channels)
8991 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07008993 }
8994 }
8995
8996 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_lt_24) {
8997 TEST_REQUIRES_X86_AVX;
8998 for (uint32_t channels = 1; channels < 24; channels++) {
8999 DWConvMicrokernelTester()
9000 .cr(24)
9001 .kr(9)
9002 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009003 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009004 }
9005 }
9006
9007 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24) {
9008 TEST_REQUIRES_X86_AVX;
9009 for (uint32_t channels = 25; channels < 48; channels++) {
9010 DWConvMicrokernelTester()
9011 .cr(24)
9012 .kr(9)
9013 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009014 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009015 }
9016 }
9017
9018 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmin) {
9019 TEST_REQUIRES_X86_AVX;
9020 for (uint32_t channels = 25; channels < 48; channels++) {
9021 DWConvMicrokernelTester()
9022 .cr(24)
9023 .kr(9)
9024 .channels(channels)
9025 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009026 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009027 }
9028 }
9029
9030 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmax) {
9031 TEST_REQUIRES_X86_AVX;
9032 for (uint32_t channels = 25; channels < 48; channels++) {
9033 DWConvMicrokernelTester()
9034 .cr(24)
9035 .kr(9)
9036 .channels(channels)
9037 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009039 }
9040 }
9041
9042 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel) {
9043 TEST_REQUIRES_X86_AVX;
9044 for (size_t channels = 1; channels <= 120; channels += 23) {
9045 DWConvMicrokernelTester()
9046 .cr(24)
9047 .kr(9)
9048 .channels(channels)
9049 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009050 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009051 }
9052 }
9053
9054 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_step) {
9055 TEST_REQUIRES_X86_AVX;
9056 for (size_t channels = 1; channels <= 120; channels += 23) {
9057 for (size_t step = 2; step <= 9; step++) {
9058 DWConvMicrokernelTester()
9059 .cr(24)
9060 .kr(9)
9061 .channels(channels)
9062 .width(3)
9063 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009064 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009065 }
9066 }
9067 }
9068
9069 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_output_stride) {
9070 TEST_REQUIRES_X86_AVX;
9071 for (size_t channels = 1; channels <= 120; channels += 23) {
9072 DWConvMicrokernelTester()
9073 .cr(24)
9074 .kr(9)
9075 .channels(24)
9076 .width(5)
9077 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08009078 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009079 }
9080 }
9081
9082 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmin) {
9083 TEST_REQUIRES_X86_AVX;
9084 for (size_t channels = 1; channels <= 120; channels += 23) {
9085 DWConvMicrokernelTester()
9086 .cr(24)
9087 .kr(9)
9088 .channels(channels)
9089 .width(3)
9090 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009091 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009092 }
9093 }
9094
9095 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmax) {
9096 TEST_REQUIRES_X86_AVX;
9097 for (size_t channels = 1; channels <= 120; channels += 23) {
9098 DWConvMicrokernelTester()
9099 .cr(24)
9100 .kr(9)
9101 .channels(channels)
9102 .width(3)
9103 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009104 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009105 }
9106 }
9107
9108 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, input_offset) {
9109 TEST_REQUIRES_X86_AVX;
9110 for (uint32_t channels = 48; channels < 384; channels += 72) {
9111 DWConvMicrokernelTester()
9112 .cr(24)
9113 .kr(9)
9114 .channels(channels)
9115 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08009116 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009117 }
9118 }
9119
9120 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, zero) {
9121 TEST_REQUIRES_X86_AVX;
9122 for (uint32_t mz = 0; mz < 9; mz++) {
9123 for (uint32_t channels = 48; channels < 384; channels += 72) {
9124 DWConvMicrokernelTester()
9125 .cr(24)
9126 .kr(9)
9127 .channels(channels)
9128 .input_offset(464)
9129 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009131 }
9132 }
9133 }
9134#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9135
9136
9137#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9138 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_eq_8) {
9139 TEST_REQUIRES_X86_XOP;
9140 DWConvMicrokernelTester()
9141 .cr(8)
9142 .kr(9)
9143 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009145 }
9146
9147 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8) {
9148 TEST_REQUIRES_X86_XOP;
9149 for (uint32_t channels = 16; channels < 128; channels += 24) {
9150 DWConvMicrokernelTester()
9151 .cr(8)
9152 .kr(9)
9153 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009155 }
9156 }
9157
9158 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
9159 TEST_REQUIRES_X86_XOP;
9160 for (uint32_t channels = 16; channels < 128; channels += 24) {
9161 DWConvMicrokernelTester()
9162 .cr(8)
9163 .kr(9)
9164 .channels(channels)
9165 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009166 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009167 }
9168 }
9169
9170 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
9171 TEST_REQUIRES_X86_XOP;
9172 for (uint32_t channels = 16; channels < 128; channels += 24) {
9173 DWConvMicrokernelTester()
9174 .cr(8)
9175 .kr(9)
9176 .channels(channels)
9177 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009179 }
9180 }
9181
9182 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_lt_8) {
9183 TEST_REQUIRES_X86_XOP;
9184 for (uint32_t channels = 1; channels < 8; channels++) {
9185 DWConvMicrokernelTester()
9186 .cr(8)
9187 .kr(9)
9188 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009189 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009190 }
9191 }
9192
9193 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8) {
9194 TEST_REQUIRES_X86_XOP;
9195 for (uint32_t channels = 9; channels < 16; channels++) {
9196 DWConvMicrokernelTester()
9197 .cr(8)
9198 .kr(9)
9199 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009201 }
9202 }
9203
9204 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
9205 TEST_REQUIRES_X86_XOP;
9206 for (uint32_t channels = 9; channels < 16; channels++) {
9207 DWConvMicrokernelTester()
9208 .cr(8)
9209 .kr(9)
9210 .channels(channels)
9211 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009212 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009213 }
9214 }
9215
9216 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
9217 TEST_REQUIRES_X86_XOP;
9218 for (uint32_t channels = 9; channels < 16; channels++) {
9219 DWConvMicrokernelTester()
9220 .cr(8)
9221 .kr(9)
9222 .channels(channels)
9223 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009225 }
9226 }
9227
9228 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel) {
9229 TEST_REQUIRES_X86_XOP;
9230 for (size_t channels = 1; channels <= 40; channels += 7) {
9231 DWConvMicrokernelTester()
9232 .cr(8)
9233 .kr(9)
9234 .channels(channels)
9235 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009236 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009237 }
9238 }
9239
9240 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_step) {
9241 TEST_REQUIRES_X86_XOP;
9242 for (size_t channels = 1; channels <= 40; channels += 7) {
9243 for (size_t step = 2; step <= 9; step++) {
9244 DWConvMicrokernelTester()
9245 .cr(8)
9246 .kr(9)
9247 .channels(channels)
9248 .width(3)
9249 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009250 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009251 }
9252 }
9253 }
9254
9255 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
9256 TEST_REQUIRES_X86_XOP;
9257 for (size_t channels = 1; channels <= 40; channels += 7) {
9258 DWConvMicrokernelTester()
9259 .cr(8)
9260 .kr(9)
9261 .channels(8)
9262 .width(5)
9263 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009264 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009265 }
9266 }
9267
9268 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmin) {
9269 TEST_REQUIRES_X86_XOP;
9270 for (size_t channels = 1; channels <= 40; channels += 7) {
9271 DWConvMicrokernelTester()
9272 .cr(8)
9273 .kr(9)
9274 .channels(channels)
9275 .width(3)
9276 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009277 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009278 }
9279 }
9280
9281 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmax) {
9282 TEST_REQUIRES_X86_XOP;
9283 for (size_t channels = 1; channels <= 40; channels += 7) {
9284 DWConvMicrokernelTester()
9285 .cr(8)
9286 .kr(9)
9287 .channels(channels)
9288 .width(3)
9289 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009290 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009291 }
9292 }
9293
9294 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, input_offset) {
9295 TEST_REQUIRES_X86_XOP;
9296 for (uint32_t channels = 16; channels < 128; channels += 24) {
9297 DWConvMicrokernelTester()
9298 .cr(8)
9299 .kr(9)
9300 .channels(channels)
9301 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08009302 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009303 }
9304 }
9305
9306 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, zero) {
9307 TEST_REQUIRES_X86_XOP;
9308 for (uint32_t mz = 0; mz < 9; mz++) {
9309 for (uint32_t channels = 16; channels < 128; channels += 24) {
9310 DWConvMicrokernelTester()
9311 .cr(8)
9312 .kr(9)
9313 .channels(channels)
9314 .input_offset(176)
9315 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009316 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009317 }
9318 }
9319 }
9320#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9321
9322
9323#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9324 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_eq_16) {
9325 TEST_REQUIRES_X86_XOP;
9326 DWConvMicrokernelTester()
9327 .cr(16)
9328 .kr(9)
9329 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08009330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009331 }
9332
9333 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16) {
9334 TEST_REQUIRES_X86_XOP;
9335 for (uint32_t channels = 32; channels < 256; channels += 48) {
9336 DWConvMicrokernelTester()
9337 .cr(16)
9338 .kr(9)
9339 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009341 }
9342 }
9343
9344 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
9345 TEST_REQUIRES_X86_XOP;
9346 for (uint32_t channels = 32; channels < 256; channels += 48) {
9347 DWConvMicrokernelTester()
9348 .cr(16)
9349 .kr(9)
9350 .channels(channels)
9351 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009352 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009353 }
9354 }
9355
9356 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
9357 TEST_REQUIRES_X86_XOP;
9358 for (uint32_t channels = 32; channels < 256; channels += 48) {
9359 DWConvMicrokernelTester()
9360 .cr(16)
9361 .kr(9)
9362 .channels(channels)
9363 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009365 }
9366 }
9367
9368 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_lt_16) {
9369 TEST_REQUIRES_X86_XOP;
9370 for (uint32_t channels = 1; channels < 16; channels++) {
9371 DWConvMicrokernelTester()
9372 .cr(16)
9373 .kr(9)
9374 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009375 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009376 }
9377 }
9378
9379 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16) {
9380 TEST_REQUIRES_X86_XOP;
9381 for (uint32_t channels = 17; channels < 32; channels++) {
9382 DWConvMicrokernelTester()
9383 .cr(16)
9384 .kr(9)
9385 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009386 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009387 }
9388 }
9389
9390 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
9391 TEST_REQUIRES_X86_XOP;
9392 for (uint32_t channels = 17; channels < 32; channels++) {
9393 DWConvMicrokernelTester()
9394 .cr(16)
9395 .kr(9)
9396 .channels(channels)
9397 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009398 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009399 }
9400 }
9401
9402 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
9403 TEST_REQUIRES_X86_XOP;
9404 for (uint32_t channels = 17; channels < 32; channels++) {
9405 DWConvMicrokernelTester()
9406 .cr(16)
9407 .kr(9)
9408 .channels(channels)
9409 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009411 }
9412 }
9413
9414 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel) {
9415 TEST_REQUIRES_X86_XOP;
9416 for (size_t channels = 1; channels <= 80; channels += 15) {
9417 DWConvMicrokernelTester()
9418 .cr(16)
9419 .kr(9)
9420 .channels(channels)
9421 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009422 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009423 }
9424 }
9425
9426 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_step) {
9427 TEST_REQUIRES_X86_XOP;
9428 for (size_t channels = 1; channels <= 80; channels += 15) {
9429 for (size_t step = 2; step <= 9; step++) {
9430 DWConvMicrokernelTester()
9431 .cr(16)
9432 .kr(9)
9433 .channels(channels)
9434 .width(3)
9435 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009436 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009437 }
9438 }
9439 }
9440
9441 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
9442 TEST_REQUIRES_X86_XOP;
9443 for (size_t channels = 1; channels <= 80; channels += 15) {
9444 DWConvMicrokernelTester()
9445 .cr(16)
9446 .kr(9)
9447 .channels(16)
9448 .width(5)
9449 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009450 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009451 }
9452 }
9453
9454 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmin) {
9455 TEST_REQUIRES_X86_XOP;
9456 for (size_t channels = 1; channels <= 80; channels += 15) {
9457 DWConvMicrokernelTester()
9458 .cr(16)
9459 .kr(9)
9460 .channels(channels)
9461 .width(3)
9462 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009463 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009464 }
9465 }
9466
9467 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmax) {
9468 TEST_REQUIRES_X86_XOP;
9469 for (size_t channels = 1; channels <= 80; channels += 15) {
9470 DWConvMicrokernelTester()
9471 .cr(16)
9472 .kr(9)
9473 .channels(channels)
9474 .width(3)
9475 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009476 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009477 }
9478 }
9479
9480 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, input_offset) {
9481 TEST_REQUIRES_X86_XOP;
9482 for (uint32_t channels = 32; channels < 256; channels += 48) {
9483 DWConvMicrokernelTester()
9484 .cr(16)
9485 .kr(9)
9486 .channels(channels)
9487 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -08009488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009489 }
9490 }
9491
9492 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, zero) {
9493 TEST_REQUIRES_X86_XOP;
9494 for (uint32_t mz = 0; mz < 9; mz++) {
9495 for (uint32_t channels = 32; channels < 256; channels += 48) {
9496 DWConvMicrokernelTester()
9497 .cr(16)
9498 .kr(9)
9499 .channels(channels)
9500 .input_offset(304)
9501 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009502 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009503 }
9504 }
9505 }
9506#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9507
9508
9509#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9510 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_eq_24) {
9511 TEST_REQUIRES_X86_XOP;
9512 DWConvMicrokernelTester()
9513 .cr(24)
9514 .kr(9)
9515 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -08009516 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009517 }
9518
9519 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24) {
9520 TEST_REQUIRES_X86_XOP;
9521 for (uint32_t channels = 48; channels < 384; channels += 72) {
9522 DWConvMicrokernelTester()
9523 .cr(24)
9524 .kr(9)
9525 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009527 }
9528 }
9529
9530 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmin) {
9531 TEST_REQUIRES_X86_XOP;
9532 for (uint32_t channels = 48; channels < 384; channels += 72) {
9533 DWConvMicrokernelTester()
9534 .cr(24)
9535 .kr(9)
9536 .channels(channels)
9537 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009539 }
9540 }
9541
9542 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmax) {
9543 TEST_REQUIRES_X86_XOP;
9544 for (uint32_t channels = 48; channels < 384; channels += 72) {
9545 DWConvMicrokernelTester()
9546 .cr(24)
9547 .kr(9)
9548 .channels(channels)
9549 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009551 }
9552 }
9553
9554 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_lt_24) {
9555 TEST_REQUIRES_X86_XOP;
9556 for (uint32_t channels = 1; channels < 24; channels++) {
9557 DWConvMicrokernelTester()
9558 .cr(24)
9559 .kr(9)
9560 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009561 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009562 }
9563 }
9564
9565 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24) {
9566 TEST_REQUIRES_X86_XOP;
9567 for (uint32_t channels = 25; channels < 48; channels++) {
9568 DWConvMicrokernelTester()
9569 .cr(24)
9570 .kr(9)
9571 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009573 }
9574 }
9575
9576 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmin) {
9577 TEST_REQUIRES_X86_XOP;
9578 for (uint32_t channels = 25; channels < 48; channels++) {
9579 DWConvMicrokernelTester()
9580 .cr(24)
9581 .kr(9)
9582 .channels(channels)
9583 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009584 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009585 }
9586 }
9587
9588 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmax) {
9589 TEST_REQUIRES_X86_XOP;
9590 for (uint32_t channels = 25; channels < 48; channels++) {
9591 DWConvMicrokernelTester()
9592 .cr(24)
9593 .kr(9)
9594 .channels(channels)
9595 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009597 }
9598 }
9599
9600 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel) {
9601 TEST_REQUIRES_X86_XOP;
9602 for (size_t channels = 1; channels <= 120; channels += 23) {
9603 DWConvMicrokernelTester()
9604 .cr(24)
9605 .kr(9)
9606 .channels(channels)
9607 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009608 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009609 }
9610 }
9611
9612 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_step) {
9613 TEST_REQUIRES_X86_XOP;
9614 for (size_t channels = 1; channels <= 120; channels += 23) {
9615 for (size_t step = 2; step <= 9; step++) {
9616 DWConvMicrokernelTester()
9617 .cr(24)
9618 .kr(9)
9619 .channels(channels)
9620 .width(3)
9621 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009622 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009623 }
9624 }
9625 }
9626
9627 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_output_stride) {
9628 TEST_REQUIRES_X86_XOP;
9629 for (size_t channels = 1; channels <= 120; channels += 23) {
9630 DWConvMicrokernelTester()
9631 .cr(24)
9632 .kr(9)
9633 .channels(24)
9634 .width(5)
9635 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08009636 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009637 }
9638 }
9639
9640 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmin) {
9641 TEST_REQUIRES_X86_XOP;
9642 for (size_t channels = 1; channels <= 120; channels += 23) {
9643 DWConvMicrokernelTester()
9644 .cr(24)
9645 .kr(9)
9646 .channels(channels)
9647 .width(3)
9648 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009649 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009650 }
9651 }
9652
9653 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmax) {
9654 TEST_REQUIRES_X86_XOP;
9655 for (size_t channels = 1; channels <= 120; channels += 23) {
9656 DWConvMicrokernelTester()
9657 .cr(24)
9658 .kr(9)
9659 .channels(channels)
9660 .width(3)
9661 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009662 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009663 }
9664 }
9665
9666 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, input_offset) {
9667 TEST_REQUIRES_X86_XOP;
9668 for (uint32_t channels = 48; channels < 384; channels += 72) {
9669 DWConvMicrokernelTester()
9670 .cr(24)
9671 .kr(9)
9672 .channels(channels)
9673 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -08009674 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009675 }
9676 }
9677
9678 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, zero) {
9679 TEST_REQUIRES_X86_XOP;
9680 for (uint32_t mz = 0; mz < 9; mz++) {
9681 for (uint32_t channels = 48; channels < 384; channels += 72) {
9682 DWConvMicrokernelTester()
9683 .cr(24)
9684 .kr(9)
9685 .channels(channels)
9686 .input_offset(464)
9687 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009688 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009689 }
9690 }
9691 }
9692#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9693
9694
9695#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9696 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_eq_8) {
9697 TEST_REQUIRES_X86_AVX2;
9698 DWConvMicrokernelTester()
9699 .cr(8)
9700 .kr(9)
9701 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009702 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009703 }
9704
9705 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8) {
9706 TEST_REQUIRES_X86_AVX2;
9707 for (uint32_t channels = 16; channels < 128; channels += 24) {
9708 DWConvMicrokernelTester()
9709 .cr(8)
9710 .kr(9)
9711 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009713 }
9714 }
9715
9716 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
9717 TEST_REQUIRES_X86_AVX2;
9718 for (uint32_t channels = 16; channels < 128; channels += 24) {
9719 DWConvMicrokernelTester()
9720 .cr(8)
9721 .kr(9)
9722 .channels(channels)
9723 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009724 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009725 }
9726 }
9727
9728 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
9729 TEST_REQUIRES_X86_AVX2;
9730 for (uint32_t channels = 16; channels < 128; channels += 24) {
9731 DWConvMicrokernelTester()
9732 .cr(8)
9733 .kr(9)
9734 .channels(channels)
9735 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009737 }
9738 }
9739
9740 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_lt_8) {
9741 TEST_REQUIRES_X86_AVX2;
9742 for (uint32_t channels = 1; channels < 8; channels++) {
9743 DWConvMicrokernelTester()
9744 .cr(8)
9745 .kr(9)
9746 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009747 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009748 }
9749 }
9750
9751 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8) {
9752 TEST_REQUIRES_X86_AVX2;
9753 for (uint32_t channels = 9; channels < 16; channels++) {
9754 DWConvMicrokernelTester()
9755 .cr(8)
9756 .kr(9)
9757 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009758 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009759 }
9760 }
9761
9762 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
9763 TEST_REQUIRES_X86_AVX2;
9764 for (uint32_t channels = 9; channels < 16; channels++) {
9765 DWConvMicrokernelTester()
9766 .cr(8)
9767 .kr(9)
9768 .channels(channels)
9769 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009770 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009771 }
9772 }
9773
9774 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
9775 TEST_REQUIRES_X86_AVX2;
9776 for (uint32_t channels = 9; channels < 16; channels++) {
9777 DWConvMicrokernelTester()
9778 .cr(8)
9779 .kr(9)
9780 .channels(channels)
9781 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009783 }
9784 }
9785
9786 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel) {
9787 TEST_REQUIRES_X86_AVX2;
9788 for (size_t channels = 1; channels <= 40; channels += 7) {
9789 DWConvMicrokernelTester()
9790 .cr(8)
9791 .kr(9)
9792 .channels(channels)
9793 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009794 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009795 }
9796 }
9797
9798 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_step) {
9799 TEST_REQUIRES_X86_AVX2;
9800 for (size_t channels = 1; channels <= 40; channels += 7) {
9801 for (size_t step = 2; step <= 9; step++) {
9802 DWConvMicrokernelTester()
9803 .cr(8)
9804 .kr(9)
9805 .channels(channels)
9806 .width(3)
9807 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009808 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009809 }
9810 }
9811 }
9812
9813 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
9814 TEST_REQUIRES_X86_AVX2;
9815 for (size_t channels = 1; channels <= 40; channels += 7) {
9816 DWConvMicrokernelTester()
9817 .cr(8)
9818 .kr(9)
9819 .channels(8)
9820 .width(5)
9821 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009822 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009823 }
9824 }
9825
9826 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
9827 TEST_REQUIRES_X86_AVX2;
9828 for (size_t channels = 1; channels <= 40; channels += 7) {
9829 DWConvMicrokernelTester()
9830 .cr(8)
9831 .kr(9)
9832 .channels(channels)
9833 .width(3)
9834 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009835 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009836 }
9837 }
9838
9839 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
9840 TEST_REQUIRES_X86_AVX2;
9841 for (size_t channels = 1; channels <= 40; channels += 7) {
9842 DWConvMicrokernelTester()
9843 .cr(8)
9844 .kr(9)
9845 .channels(channels)
9846 .width(3)
9847 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009848 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009849 }
9850 }
9851
9852 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_offset) {
9853 TEST_REQUIRES_X86_AVX2;
9854 for (uint32_t channels = 16; channels < 128; channels += 24) {
9855 DWConvMicrokernelTester()
9856 .cr(8)
9857 .kr(9)
9858 .channels(channels)
9859 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -08009860 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009861 }
9862 }
9863
9864 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, zero) {
9865 TEST_REQUIRES_X86_AVX2;
9866 for (uint32_t mz = 0; mz < 9; mz++) {
9867 for (uint32_t channels = 16; channels < 128; channels += 24) {
9868 DWConvMicrokernelTester()
9869 .cr(8)
9870 .kr(9)
9871 .channels(channels)
9872 .input_offset(176)
9873 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009874 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009875 }
9876 }
9877 }
9878#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9879
9880
9881#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9882 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_eq_16) {
9883 TEST_REQUIRES_X86_AVX2;
9884 DWConvMicrokernelTester()
9885 .cr(16)
9886 .kr(9)
9887 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08009888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009889 }
9890
9891 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16) {
9892 TEST_REQUIRES_X86_AVX2;
9893 for (uint32_t channels = 32; channels < 256; channels += 48) {
9894 DWConvMicrokernelTester()
9895 .cr(16)
9896 .kr(9)
9897 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009899 }
9900 }
9901
9902 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
9903 TEST_REQUIRES_X86_AVX2;
9904 for (uint32_t channels = 32; channels < 256; channels += 48) {
9905 DWConvMicrokernelTester()
9906 .cr(16)
9907 .kr(9)
9908 .channels(channels)
9909 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009910 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009911 }
9912 }
9913
9914 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
9915 TEST_REQUIRES_X86_AVX2;
9916 for (uint32_t channels = 32; channels < 256; channels += 48) {
9917 DWConvMicrokernelTester()
9918 .cr(16)
9919 .kr(9)
9920 .channels(channels)
9921 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009923 }
9924 }
9925
9926 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_lt_16) {
9927 TEST_REQUIRES_X86_AVX2;
9928 for (uint32_t channels = 1; channels < 16; channels++) {
9929 DWConvMicrokernelTester()
9930 .cr(16)
9931 .kr(9)
9932 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009933 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009934 }
9935 }
9936
9937 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16) {
9938 TEST_REQUIRES_X86_AVX2;
9939 for (uint32_t channels = 17; channels < 32; channels++) {
9940 DWConvMicrokernelTester()
9941 .cr(16)
9942 .kr(9)
9943 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -08009944 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009945 }
9946 }
9947
9948 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
9949 TEST_REQUIRES_X86_AVX2;
9950 for (uint32_t channels = 17; channels < 32; channels++) {
9951 DWConvMicrokernelTester()
9952 .cr(16)
9953 .kr(9)
9954 .channels(channels)
9955 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009956 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009957 }
9958 }
9959
9960 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
9961 TEST_REQUIRES_X86_AVX2;
9962 for (uint32_t channels = 17; channels < 32; channels++) {
9963 DWConvMicrokernelTester()
9964 .cr(16)
9965 .kr(9)
9966 .channels(channels)
9967 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009969 }
9970 }
9971
9972 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel) {
9973 TEST_REQUIRES_X86_AVX2;
9974 for (size_t channels = 1; channels <= 80; channels += 15) {
9975 DWConvMicrokernelTester()
9976 .cr(16)
9977 .kr(9)
9978 .channels(channels)
9979 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009980 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009981 }
9982 }
9983
9984 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_step) {
9985 TEST_REQUIRES_X86_AVX2;
9986 for (size_t channels = 1; channels <= 80; channels += 15) {
9987 for (size_t step = 2; step <= 9; step++) {
9988 DWConvMicrokernelTester()
9989 .cr(16)
9990 .kr(9)
9991 .channels(channels)
9992 .width(3)
9993 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -08009994 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -07009995 }
9996 }
9997 }
9998
9999 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
10000 TEST_REQUIRES_X86_AVX2;
10001 for (size_t channels = 1; channels <= 80; channels += 15) {
10002 DWConvMicrokernelTester()
10003 .cr(16)
10004 .kr(9)
10005 .channels(16)
10006 .width(5)
10007 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010008 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010009 }
10010 }
10011
10012 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
10013 TEST_REQUIRES_X86_AVX2;
10014 for (size_t channels = 1; channels <= 80; channels += 15) {
10015 DWConvMicrokernelTester()
10016 .cr(16)
10017 .kr(9)
10018 .channels(channels)
10019 .width(3)
10020 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010021 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010022 }
10023 }
10024
10025 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
10026 TEST_REQUIRES_X86_AVX2;
10027 for (size_t channels = 1; channels <= 80; channels += 15) {
10028 DWConvMicrokernelTester()
10029 .cr(16)
10030 .kr(9)
10031 .channels(channels)
10032 .width(3)
10033 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010034 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010035 }
10036 }
10037
10038 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_offset) {
10039 TEST_REQUIRES_X86_AVX2;
10040 for (uint32_t channels = 32; channels < 256; channels += 48) {
10041 DWConvMicrokernelTester()
10042 .cr(16)
10043 .kr(9)
10044 .channels(channels)
10045 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080010046 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010047 }
10048 }
10049
10050 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, zero) {
10051 TEST_REQUIRES_X86_AVX2;
10052 for (uint32_t mz = 0; mz < 9; mz++) {
10053 for (uint32_t channels = 32; channels < 256; channels += 48) {
10054 DWConvMicrokernelTester()
10055 .cr(16)
10056 .kr(9)
10057 .channels(channels)
10058 .input_offset(304)
10059 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010060 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010061 }
10062 }
10063 }
10064#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10065
10066
10067#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10068 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_eq_24) {
10069 TEST_REQUIRES_X86_AVX2;
10070 DWConvMicrokernelTester()
10071 .cr(24)
10072 .kr(9)
10073 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080010074 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010075 }
10076
10077 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24) {
10078 TEST_REQUIRES_X86_AVX2;
10079 for (uint32_t channels = 48; channels < 384; channels += 72) {
10080 DWConvMicrokernelTester()
10081 .cr(24)
10082 .kr(9)
10083 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010085 }
10086 }
10087
10088 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
10089 TEST_REQUIRES_X86_AVX2;
10090 for (uint32_t channels = 48; channels < 384; channels += 72) {
10091 DWConvMicrokernelTester()
10092 .cr(24)
10093 .kr(9)
10094 .channels(channels)
10095 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010096 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010097 }
10098 }
10099
10100 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
10101 TEST_REQUIRES_X86_AVX2;
10102 for (uint32_t channels = 48; channels < 384; channels += 72) {
10103 DWConvMicrokernelTester()
10104 .cr(24)
10105 .kr(9)
10106 .channels(channels)
10107 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010109 }
10110 }
10111
10112 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_lt_24) {
10113 TEST_REQUIRES_X86_AVX2;
10114 for (uint32_t channels = 1; channels < 24; channels++) {
10115 DWConvMicrokernelTester()
10116 .cr(24)
10117 .kr(9)
10118 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010119 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010120 }
10121 }
10122
10123 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24) {
10124 TEST_REQUIRES_X86_AVX2;
10125 for (uint32_t channels = 25; channels < 48; channels++) {
10126 DWConvMicrokernelTester()
10127 .cr(24)
10128 .kr(9)
10129 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010131 }
10132 }
10133
10134 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
10135 TEST_REQUIRES_X86_AVX2;
10136 for (uint32_t channels = 25; channels < 48; channels++) {
10137 DWConvMicrokernelTester()
10138 .cr(24)
10139 .kr(9)
10140 .channels(channels)
10141 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010142 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010143 }
10144 }
10145
10146 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
10147 TEST_REQUIRES_X86_AVX2;
10148 for (uint32_t channels = 25; channels < 48; channels++) {
10149 DWConvMicrokernelTester()
10150 .cr(24)
10151 .kr(9)
10152 .channels(channels)
10153 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010155 }
10156 }
10157
10158 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel) {
10159 TEST_REQUIRES_X86_AVX2;
10160 for (size_t channels = 1; channels <= 120; channels += 23) {
10161 DWConvMicrokernelTester()
10162 .cr(24)
10163 .kr(9)
10164 .channels(channels)
10165 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010166 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010167 }
10168 }
10169
10170 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_step) {
10171 TEST_REQUIRES_X86_AVX2;
10172 for (size_t channels = 1; channels <= 120; channels += 23) {
10173 for (size_t step = 2; step <= 9; step++) {
10174 DWConvMicrokernelTester()
10175 .cr(24)
10176 .kr(9)
10177 .channels(channels)
10178 .width(3)
10179 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010180 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010181 }
10182 }
10183 }
10184
10185 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
10186 TEST_REQUIRES_X86_AVX2;
10187 for (size_t channels = 1; channels <= 120; channels += 23) {
10188 DWConvMicrokernelTester()
10189 .cr(24)
10190 .kr(9)
10191 .channels(24)
10192 .width(5)
10193 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080010194 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010195 }
10196 }
10197
10198 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
10199 TEST_REQUIRES_X86_AVX2;
10200 for (size_t channels = 1; channels <= 120; channels += 23) {
10201 DWConvMicrokernelTester()
10202 .cr(24)
10203 .kr(9)
10204 .channels(channels)
10205 .width(3)
10206 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010207 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010208 }
10209 }
10210
10211 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
10212 TEST_REQUIRES_X86_AVX2;
10213 for (size_t channels = 1; channels <= 120; channels += 23) {
10214 DWConvMicrokernelTester()
10215 .cr(24)
10216 .kr(9)
10217 .channels(channels)
10218 .width(3)
10219 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010220 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010221 }
10222 }
10223
10224 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, input_offset) {
10225 TEST_REQUIRES_X86_AVX2;
10226 for (uint32_t channels = 48; channels < 384; channels += 72) {
10227 DWConvMicrokernelTester()
10228 .cr(24)
10229 .kr(9)
10230 .channels(channels)
10231 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080010232 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010233 }
10234 }
10235
10236 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, zero) {
10237 TEST_REQUIRES_X86_AVX2;
10238 for (uint32_t mz = 0; mz < 9; mz++) {
10239 for (uint32_t channels = 48; channels < 384; channels += 72) {
10240 DWConvMicrokernelTester()
10241 .cr(24)
10242 .kr(9)
10243 .channels(channels)
10244 .input_offset(464)
10245 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010246 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010247 }
10248 }
10249 }
10250#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10251
10252
10253#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10254 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_eq_32) {
10255 TEST_REQUIRES_X86_AVX2;
10256 DWConvMicrokernelTester()
10257 .cr(32)
10258 .kr(9)
10259 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080010260 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010261 }
10262
10263 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32) {
10264 TEST_REQUIRES_X86_AVX2;
10265 for (uint32_t channels = 64; channels < 512; channels += 96) {
10266 DWConvMicrokernelTester()
10267 .cr(32)
10268 .kr(9)
10269 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010271 }
10272 }
10273
10274 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
10275 TEST_REQUIRES_X86_AVX2;
10276 for (uint32_t channels = 64; channels < 512; channels += 96) {
10277 DWConvMicrokernelTester()
10278 .cr(32)
10279 .kr(9)
10280 .channels(channels)
10281 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010282 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010283 }
10284 }
10285
10286 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
10287 TEST_REQUIRES_X86_AVX2;
10288 for (uint32_t channels = 64; channels < 512; channels += 96) {
10289 DWConvMicrokernelTester()
10290 .cr(32)
10291 .kr(9)
10292 .channels(channels)
10293 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010295 }
10296 }
10297
10298 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_lt_32) {
10299 TEST_REQUIRES_X86_AVX2;
10300 for (uint32_t channels = 1; channels < 32; channels++) {
10301 DWConvMicrokernelTester()
10302 .cr(32)
10303 .kr(9)
10304 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010305 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010306 }
10307 }
10308
10309 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32) {
10310 TEST_REQUIRES_X86_AVX2;
10311 for (uint32_t channels = 33; channels < 64; channels++) {
10312 DWConvMicrokernelTester()
10313 .cr(32)
10314 .kr(9)
10315 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010316 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010317 }
10318 }
10319
10320 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
10321 TEST_REQUIRES_X86_AVX2;
10322 for (uint32_t channels = 33; channels < 64; channels++) {
10323 DWConvMicrokernelTester()
10324 .cr(32)
10325 .kr(9)
10326 .channels(channels)
10327 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010328 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010329 }
10330 }
10331
10332 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
10333 TEST_REQUIRES_X86_AVX2;
10334 for (uint32_t channels = 33; channels < 64; channels++) {
10335 DWConvMicrokernelTester()
10336 .cr(32)
10337 .kr(9)
10338 .channels(channels)
10339 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010341 }
10342 }
10343
10344 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel) {
10345 TEST_REQUIRES_X86_AVX2;
10346 for (size_t channels = 1; channels <= 160; channels += 31) {
10347 DWConvMicrokernelTester()
10348 .cr(32)
10349 .kr(9)
10350 .channels(channels)
10351 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010352 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010353 }
10354 }
10355
10356 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_step) {
10357 TEST_REQUIRES_X86_AVX2;
10358 for (size_t channels = 1; channels <= 160; channels += 31) {
10359 for (size_t step = 2; step <= 9; step++) {
10360 DWConvMicrokernelTester()
10361 .cr(32)
10362 .kr(9)
10363 .channels(channels)
10364 .width(3)
10365 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010366 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010367 }
10368 }
10369 }
10370
10371 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
10372 TEST_REQUIRES_X86_AVX2;
10373 for (size_t channels = 1; channels <= 160; channels += 31) {
10374 DWConvMicrokernelTester()
10375 .cr(32)
10376 .kr(9)
10377 .channels(32)
10378 .width(5)
10379 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080010380 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010381 }
10382 }
10383
10384 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
10385 TEST_REQUIRES_X86_AVX2;
10386 for (size_t channels = 1; channels <= 160; channels += 31) {
10387 DWConvMicrokernelTester()
10388 .cr(32)
10389 .kr(9)
10390 .channels(channels)
10391 .width(3)
10392 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010393 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010394 }
10395 }
10396
10397 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
10398 TEST_REQUIRES_X86_AVX2;
10399 for (size_t channels = 1; channels <= 160; channels += 31) {
10400 DWConvMicrokernelTester()
10401 .cr(32)
10402 .kr(9)
10403 .channels(channels)
10404 .width(3)
10405 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010406 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010407 }
10408 }
10409
10410 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_offset) {
10411 TEST_REQUIRES_X86_AVX2;
10412 for (uint32_t channels = 64; channels < 512; channels += 96) {
10413 DWConvMicrokernelTester()
10414 .cr(32)
10415 .kr(9)
10416 .channels(channels)
10417 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080010418 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010419 }
10420 }
10421
10422 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, zero) {
10423 TEST_REQUIRES_X86_AVX2;
10424 for (uint32_t mz = 0; mz < 9; mz++) {
10425 for (uint32_t channels = 64; channels < 512; channels += 96) {
10426 DWConvMicrokernelTester()
10427 .cr(32)
10428 .kr(9)
10429 .channels(channels)
10430 .input_offset(592)
10431 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010432 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010433 }
10434 }
10435 }
10436#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10437
10438
10439#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10440 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_eq_16) {
10441 TEST_REQUIRES_X86_AVX512SKX;
10442 DWConvMicrokernelTester()
10443 .cr(16)
10444 .kr(9)
10445 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010446 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010447 }
10448
10449 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16) {
10450 TEST_REQUIRES_X86_AVX512SKX;
10451 for (uint32_t channels = 32; channels < 256; channels += 48) {
10452 DWConvMicrokernelTester()
10453 .cr(16)
10454 .kr(9)
10455 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010456 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010457 }
10458 }
10459
10460 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
10461 TEST_REQUIRES_X86_AVX512SKX;
10462 for (uint32_t channels = 32; channels < 256; channels += 48) {
10463 DWConvMicrokernelTester()
10464 .cr(16)
10465 .kr(9)
10466 .channels(channels)
10467 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010468 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010469 }
10470 }
10471
10472 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
10473 TEST_REQUIRES_X86_AVX512SKX;
10474 for (uint32_t channels = 32; channels < 256; channels += 48) {
10475 DWConvMicrokernelTester()
10476 .cr(16)
10477 .kr(9)
10478 .channels(channels)
10479 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010481 }
10482 }
10483
10484 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_lt_16) {
10485 TEST_REQUIRES_X86_AVX512SKX;
10486 for (uint32_t channels = 1; channels < 16; channels++) {
10487 DWConvMicrokernelTester()
10488 .cr(16)
10489 .kr(9)
10490 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010491 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010492 }
10493 }
10494
10495 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16) {
10496 TEST_REQUIRES_X86_AVX512SKX;
10497 for (uint32_t channels = 17; channels < 32; channels++) {
10498 DWConvMicrokernelTester()
10499 .cr(16)
10500 .kr(9)
10501 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010502 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010503 }
10504 }
10505
10506 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
10507 TEST_REQUIRES_X86_AVX512SKX;
10508 for (uint32_t channels = 17; channels < 32; channels++) {
10509 DWConvMicrokernelTester()
10510 .cr(16)
10511 .kr(9)
10512 .channels(channels)
10513 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010514 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010515 }
10516 }
10517
10518 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
10519 TEST_REQUIRES_X86_AVX512SKX;
10520 for (uint32_t channels = 17; channels < 32; channels++) {
10521 DWConvMicrokernelTester()
10522 .cr(16)
10523 .kr(9)
10524 .channels(channels)
10525 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010527 }
10528 }
10529
10530 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel) {
10531 TEST_REQUIRES_X86_AVX512SKX;
10532 for (size_t channels = 1; channels <= 80; channels += 15) {
10533 DWConvMicrokernelTester()
10534 .cr(16)
10535 .kr(9)
10536 .channels(channels)
10537 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010539 }
10540 }
10541
10542 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
10543 TEST_REQUIRES_X86_AVX512SKX;
10544 for (size_t channels = 1; channels <= 80; channels += 15) {
10545 for (size_t step = 2; step <= 9; step++) {
10546 DWConvMicrokernelTester()
10547 .cr(16)
10548 .kr(9)
10549 .channels(channels)
10550 .width(3)
10551 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010552 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010553 }
10554 }
10555 }
10556
10557 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
10558 TEST_REQUIRES_X86_AVX512SKX;
10559 for (size_t channels = 1; channels <= 80; channels += 15) {
10560 DWConvMicrokernelTester()
10561 .cr(16)
10562 .kr(9)
10563 .channels(16)
10564 .width(5)
10565 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010566 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010567 }
10568 }
10569
10570 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
10571 TEST_REQUIRES_X86_AVX512SKX;
10572 for (size_t channels = 1; channels <= 80; channels += 15) {
10573 DWConvMicrokernelTester()
10574 .cr(16)
10575 .kr(9)
10576 .channels(channels)
10577 .width(3)
10578 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010579 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010580 }
10581 }
10582
10583 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
10584 TEST_REQUIRES_X86_AVX512SKX;
10585 for (size_t channels = 1; channels <= 80; channels += 15) {
10586 DWConvMicrokernelTester()
10587 .cr(16)
10588 .kr(9)
10589 .channels(channels)
10590 .width(3)
10591 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010592 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010593 }
10594 }
10595
10596 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_offset) {
10597 TEST_REQUIRES_X86_AVX512SKX;
10598 for (uint32_t channels = 32; channels < 256; channels += 48) {
10599 DWConvMicrokernelTester()
10600 .cr(16)
10601 .kr(9)
10602 .channels(channels)
10603 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080010604 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010605 }
10606 }
10607
10608 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, zero) {
10609 TEST_REQUIRES_X86_AVX512SKX;
10610 for (uint32_t mz = 0; mz < 9; mz++) {
10611 for (uint32_t channels = 32; channels < 256; channels += 48) {
10612 DWConvMicrokernelTester()
10613 .cr(16)
10614 .kr(9)
10615 .channels(channels)
10616 .input_offset(304)
10617 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010618 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010619 }
10620 }
10621 }
10622#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10623
10624
10625#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10626 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_eq_32) {
10627 TEST_REQUIRES_X86_AVX512SKX;
10628 DWConvMicrokernelTester()
10629 .cr(32)
10630 .kr(9)
10631 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080010632 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010633 }
10634
10635 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32) {
10636 TEST_REQUIRES_X86_AVX512SKX;
10637 for (uint32_t channels = 64; channels < 512; channels += 96) {
10638 DWConvMicrokernelTester()
10639 .cr(32)
10640 .kr(9)
10641 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010643 }
10644 }
10645
10646 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
10647 TEST_REQUIRES_X86_AVX512SKX;
10648 for (uint32_t channels = 64; channels < 512; channels += 96) {
10649 DWConvMicrokernelTester()
10650 .cr(32)
10651 .kr(9)
10652 .channels(channels)
10653 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010654 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010655 }
10656 }
10657
10658 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
10659 TEST_REQUIRES_X86_AVX512SKX;
10660 for (uint32_t channels = 64; channels < 512; channels += 96) {
10661 DWConvMicrokernelTester()
10662 .cr(32)
10663 .kr(9)
10664 .channels(channels)
10665 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010667 }
10668 }
10669
10670 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_lt_32) {
10671 TEST_REQUIRES_X86_AVX512SKX;
10672 for (uint32_t channels = 1; channels < 32; channels++) {
10673 DWConvMicrokernelTester()
10674 .cr(32)
10675 .kr(9)
10676 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010677 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010678 }
10679 }
10680
10681 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32) {
10682 TEST_REQUIRES_X86_AVX512SKX;
10683 for (uint32_t channels = 33; channels < 64; channels++) {
10684 DWConvMicrokernelTester()
10685 .cr(32)
10686 .kr(9)
10687 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010688 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010689 }
10690 }
10691
10692 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
10693 TEST_REQUIRES_X86_AVX512SKX;
10694 for (uint32_t channels = 33; channels < 64; channels++) {
10695 DWConvMicrokernelTester()
10696 .cr(32)
10697 .kr(9)
10698 .channels(channels)
10699 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010700 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010701 }
10702 }
10703
10704 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
10705 TEST_REQUIRES_X86_AVX512SKX;
10706 for (uint32_t channels = 33; channels < 64; channels++) {
10707 DWConvMicrokernelTester()
10708 .cr(32)
10709 .kr(9)
10710 .channels(channels)
10711 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010713 }
10714 }
10715
10716 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel) {
10717 TEST_REQUIRES_X86_AVX512SKX;
10718 for (size_t channels = 1; channels <= 160; channels += 31) {
10719 DWConvMicrokernelTester()
10720 .cr(32)
10721 .kr(9)
10722 .channels(channels)
10723 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010724 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010725 }
10726 }
10727
10728 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
10729 TEST_REQUIRES_X86_AVX512SKX;
10730 for (size_t channels = 1; channels <= 160; channels += 31) {
10731 for (size_t step = 2; step <= 9; step++) {
10732 DWConvMicrokernelTester()
10733 .cr(32)
10734 .kr(9)
10735 .channels(channels)
10736 .width(3)
10737 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010738 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010739 }
10740 }
10741 }
10742
10743 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
10744 TEST_REQUIRES_X86_AVX512SKX;
10745 for (size_t channels = 1; channels <= 160; channels += 31) {
10746 DWConvMicrokernelTester()
10747 .cr(32)
10748 .kr(9)
10749 .channels(32)
10750 .width(5)
10751 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080010752 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010753 }
10754 }
10755
10756 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
10757 TEST_REQUIRES_X86_AVX512SKX;
10758 for (size_t channels = 1; channels <= 160; channels += 31) {
10759 DWConvMicrokernelTester()
10760 .cr(32)
10761 .kr(9)
10762 .channels(channels)
10763 .width(3)
10764 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010765 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010766 }
10767 }
10768
10769 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
10770 TEST_REQUIRES_X86_AVX512SKX;
10771 for (size_t channels = 1; channels <= 160; channels += 31) {
10772 DWConvMicrokernelTester()
10773 .cr(32)
10774 .kr(9)
10775 .channels(channels)
10776 .width(3)
10777 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010778 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010779 }
10780 }
10781
10782 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_offset) {
10783 TEST_REQUIRES_X86_AVX512SKX;
10784 for (uint32_t channels = 64; channels < 512; channels += 96) {
10785 DWConvMicrokernelTester()
10786 .cr(32)
10787 .kr(9)
10788 .channels(channels)
10789 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080010790 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010791 }
10792 }
10793
10794 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, zero) {
10795 TEST_REQUIRES_X86_AVX512SKX;
10796 for (uint32_t mz = 0; mz < 9; mz++) {
10797 for (uint32_t channels = 64; channels < 512; channels += 96) {
10798 DWConvMicrokernelTester()
10799 .cr(32)
10800 .kr(9)
10801 .channels(channels)
10802 .input_offset(592)
10803 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010804 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070010805 }
10806 }
10807 }
10808#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10809
10810
Marat Dukhan4c617792021-12-21 15:47:58 -080010811#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070010812 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_eq_8) {
10813 DWConvMicrokernelTester()
10814 .cr(8)
10815 .kr(9)
10816 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010817 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010818 }
10819
10820 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8) {
10821 for (uint32_t channels = 16; channels < 128; channels += 24) {
10822 DWConvMicrokernelTester()
10823 .cr(8)
10824 .kr(9)
10825 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010826 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010827 }
10828 }
10829
10830 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
10831 for (uint32_t channels = 16; channels < 128; channels += 24) {
10832 DWConvMicrokernelTester()
10833 .cr(8)
10834 .kr(9)
10835 .channels(channels)
10836 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010837 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010838 }
10839 }
10840
10841 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
10842 for (uint32_t channels = 16; channels < 128; channels += 24) {
10843 DWConvMicrokernelTester()
10844 .cr(8)
10845 .kr(9)
10846 .channels(channels)
10847 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010848 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010849 }
10850 }
10851
10852 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_lt_8) {
10853 for (uint32_t channels = 1; channels < 8; channels++) {
10854 DWConvMicrokernelTester()
10855 .cr(8)
10856 .kr(9)
10857 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010858 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010859 }
10860 }
10861
10862 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8) {
10863 for (uint32_t channels = 9; channels < 16; channels++) {
10864 DWConvMicrokernelTester()
10865 .cr(8)
10866 .kr(9)
10867 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010868 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010869 }
10870 }
10871
10872 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
10873 for (uint32_t channels = 9; channels < 16; channels++) {
10874 DWConvMicrokernelTester()
10875 .cr(8)
10876 .kr(9)
10877 .channels(channels)
10878 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010879 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010880 }
10881 }
10882
10883 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
10884 for (uint32_t channels = 9; channels < 16; channels++) {
10885 DWConvMicrokernelTester()
10886 .cr(8)
10887 .kr(9)
10888 .channels(channels)
10889 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010890 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010891 }
10892 }
10893
10894 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel) {
10895 for (size_t channels = 1; channels <= 40; channels += 7) {
10896 DWConvMicrokernelTester()
10897 .cr(8)
10898 .kr(9)
10899 .channels(channels)
10900 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010901 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010902 }
10903 }
10904
10905 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
10906 for (size_t channels = 1; channels <= 40; channels += 7) {
10907 for (size_t step = 2; step <= 9; step++) {
10908 DWConvMicrokernelTester()
10909 .cr(8)
10910 .kr(9)
10911 .channels(channels)
10912 .width(3)
10913 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080010914 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010915 }
10916 }
10917 }
10918
10919 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
10920 for (size_t channels = 1; channels <= 40; channels += 7) {
10921 DWConvMicrokernelTester()
10922 .cr(8)
10923 .kr(9)
10924 .channels(8)
10925 .width(5)
10926 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010927 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010928 }
10929 }
10930
10931 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
10932 for (size_t channels = 1; channels <= 40; channels += 7) {
10933 DWConvMicrokernelTester()
10934 .cr(8)
10935 .kr(9)
10936 .channels(channels)
10937 .width(3)
10938 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010939 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010940 }
10941 }
10942
10943 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
10944 for (size_t channels = 1; channels <= 40; channels += 7) {
10945 DWConvMicrokernelTester()
10946 .cr(8)
10947 .kr(9)
10948 .channels(channels)
10949 .width(3)
10950 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010951 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010952 }
10953 }
10954
10955 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, input_offset) {
10956 for (uint32_t channels = 16; channels < 128; channels += 24) {
10957 DWConvMicrokernelTester()
10958 .cr(8)
10959 .kr(9)
10960 .channels(channels)
10961 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080010962 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010963 }
10964 }
10965
10966 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, zero) {
10967 for (uint32_t mz = 0; mz < 9; mz++) {
10968 for (uint32_t channels = 16; channels < 128; channels += 24) {
10969 DWConvMicrokernelTester()
10970 .cr(8)
10971 .kr(9)
10972 .channels(channels)
10973 .input_offset(176)
10974 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010975 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010976 }
10977 }
10978 }
Marat Dukhan4c617792021-12-21 15:47:58 -080010979#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070010980
10981
Marat Dukhan4c617792021-12-21 15:47:58 -080010982#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070010983 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_eq_16) {
10984 DWConvMicrokernelTester()
10985 .cr(16)
10986 .kr(9)
10987 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010988 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010989 }
10990
10991 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16) {
10992 for (uint32_t channels = 32; channels < 256; channels += 48) {
10993 DWConvMicrokernelTester()
10994 .cr(16)
10995 .kr(9)
10996 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080010997 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070010998 }
10999 }
11000
11001 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
11002 for (uint32_t channels = 32; channels < 256; channels += 48) {
11003 DWConvMicrokernelTester()
11004 .cr(16)
11005 .kr(9)
11006 .channels(channels)
11007 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011008 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011009 }
11010 }
11011
11012 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
11013 for (uint32_t channels = 32; channels < 256; channels += 48) {
11014 DWConvMicrokernelTester()
11015 .cr(16)
11016 .kr(9)
11017 .channels(channels)
11018 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011019 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011020 }
11021 }
11022
11023 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_lt_16) {
11024 for (uint32_t channels = 1; channels < 16; channels++) {
11025 DWConvMicrokernelTester()
11026 .cr(16)
11027 .kr(9)
11028 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011029 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011030 }
11031 }
11032
11033 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16) {
11034 for (uint32_t channels = 17; channels < 32; channels++) {
11035 DWConvMicrokernelTester()
11036 .cr(16)
11037 .kr(9)
11038 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011039 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011040 }
11041 }
11042
11043 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
11044 for (uint32_t channels = 17; channels < 32; channels++) {
11045 DWConvMicrokernelTester()
11046 .cr(16)
11047 .kr(9)
11048 .channels(channels)
11049 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011050 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011051 }
11052 }
11053
11054 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
11055 for (uint32_t channels = 17; channels < 32; channels++) {
11056 DWConvMicrokernelTester()
11057 .cr(16)
11058 .kr(9)
11059 .channels(channels)
11060 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011061 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011062 }
11063 }
11064
11065 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel) {
11066 for (size_t channels = 1; channels <= 80; channels += 15) {
11067 DWConvMicrokernelTester()
11068 .cr(16)
11069 .kr(9)
11070 .channels(channels)
11071 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011072 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011073 }
11074 }
11075
11076 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
11077 for (size_t channels = 1; channels <= 80; channels += 15) {
11078 for (size_t step = 2; step <= 9; step++) {
11079 DWConvMicrokernelTester()
11080 .cr(16)
11081 .kr(9)
11082 .channels(channels)
11083 .width(3)
11084 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011085 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011086 }
11087 }
11088 }
11089
11090 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
11091 for (size_t channels = 1; channels <= 80; channels += 15) {
11092 DWConvMicrokernelTester()
11093 .cr(16)
11094 .kr(9)
11095 .channels(16)
11096 .width(5)
11097 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011099 }
11100 }
11101
11102 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
11103 for (size_t channels = 1; channels <= 80; channels += 15) {
11104 DWConvMicrokernelTester()
11105 .cr(16)
11106 .kr(9)
11107 .channels(channels)
11108 .width(3)
11109 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011110 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011111 }
11112 }
11113
11114 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
11115 for (size_t channels = 1; channels <= 80; channels += 15) {
11116 DWConvMicrokernelTester()
11117 .cr(16)
11118 .kr(9)
11119 .channels(channels)
11120 .width(3)
11121 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011122 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011123 }
11124 }
11125
11126 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, input_offset) {
11127 for (uint32_t channels = 32; channels < 256; channels += 48) {
11128 DWConvMicrokernelTester()
11129 .cr(16)
11130 .kr(9)
11131 .channels(channels)
11132 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080011133 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011134 }
11135 }
11136
11137 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, zero) {
11138 for (uint32_t mz = 0; mz < 9; mz++) {
11139 for (uint32_t channels = 32; channels < 256; channels += 48) {
11140 DWConvMicrokernelTester()
11141 .cr(16)
11142 .kr(9)
11143 .channels(channels)
11144 .input_offset(304)
11145 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011146 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011147 }
11148 }
11149 }
Marat Dukhan4c617792021-12-21 15:47:58 -080011150#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070011151
11152
Marat Dukhan4c617792021-12-21 15:47:58 -080011153#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070011154 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_eq_24) {
11155 DWConvMicrokernelTester()
11156 .cr(24)
11157 .kr(9)
11158 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080011159 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011160 }
11161
11162 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24) {
11163 for (uint32_t channels = 48; channels < 384; channels += 72) {
11164 DWConvMicrokernelTester()
11165 .cr(24)
11166 .kr(9)
11167 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011169 }
11170 }
11171
11172 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
11173 for (uint32_t channels = 48; channels < 384; channels += 72) {
11174 DWConvMicrokernelTester()
11175 .cr(24)
11176 .kr(9)
11177 .channels(channels)
11178 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011179 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011180 }
11181 }
11182
11183 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
11184 for (uint32_t channels = 48; channels < 384; channels += 72) {
11185 DWConvMicrokernelTester()
11186 .cr(24)
11187 .kr(9)
11188 .channels(channels)
11189 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011190 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011191 }
11192 }
11193
11194 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_lt_24) {
11195 for (uint32_t channels = 1; channels < 24; channels++) {
11196 DWConvMicrokernelTester()
11197 .cr(24)
11198 .kr(9)
11199 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011201 }
11202 }
11203
11204 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24) {
11205 for (uint32_t channels = 25; channels < 48; channels++) {
11206 DWConvMicrokernelTester()
11207 .cr(24)
11208 .kr(9)
11209 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011210 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011211 }
11212 }
11213
11214 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
11215 for (uint32_t channels = 25; channels < 48; channels++) {
11216 DWConvMicrokernelTester()
11217 .cr(24)
11218 .kr(9)
11219 .channels(channels)
11220 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011221 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011222 }
11223 }
11224
11225 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
11226 for (uint32_t channels = 25; channels < 48; channels++) {
11227 DWConvMicrokernelTester()
11228 .cr(24)
11229 .kr(9)
11230 .channels(channels)
11231 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011232 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011233 }
11234 }
11235
11236 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel) {
11237 for (size_t channels = 1; channels <= 120; channels += 23) {
11238 DWConvMicrokernelTester()
11239 .cr(24)
11240 .kr(9)
11241 .channels(channels)
11242 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011243 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011244 }
11245 }
11246
11247 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
11248 for (size_t channels = 1; channels <= 120; channels += 23) {
11249 for (size_t step = 2; step <= 9; step++) {
11250 DWConvMicrokernelTester()
11251 .cr(24)
11252 .kr(9)
11253 .channels(channels)
11254 .width(3)
11255 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011256 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011257 }
11258 }
11259 }
11260
11261 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
11262 for (size_t channels = 1; channels <= 120; channels += 23) {
11263 DWConvMicrokernelTester()
11264 .cr(24)
11265 .kr(9)
11266 .channels(24)
11267 .width(5)
11268 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080011269 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011270 }
11271 }
11272
11273 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
11274 for (size_t channels = 1; channels <= 120; channels += 23) {
11275 DWConvMicrokernelTester()
11276 .cr(24)
11277 .kr(9)
11278 .channels(channels)
11279 .width(3)
11280 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011281 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011282 }
11283 }
11284
11285 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
11286 for (size_t channels = 1; channels <= 120; channels += 23) {
11287 DWConvMicrokernelTester()
11288 .cr(24)
11289 .kr(9)
11290 .channels(channels)
11291 .width(3)
11292 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011293 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011294 }
11295 }
11296
11297 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, input_offset) {
11298 for (uint32_t channels = 48; channels < 384; channels += 72) {
11299 DWConvMicrokernelTester()
11300 .cr(24)
11301 .kr(9)
11302 .channels(channels)
11303 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080011304 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011305 }
11306 }
11307
11308 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, zero) {
11309 for (uint32_t mz = 0; mz < 9; mz++) {
11310 for (uint32_t channels = 48; channels < 384; channels += 72) {
11311 DWConvMicrokernelTester()
11312 .cr(24)
11313 .kr(9)
11314 .channels(channels)
11315 .input_offset(464)
11316 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011317 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070011318 }
11319 }
11320 }
Marat Dukhan4c617792021-12-21 15:47:58 -080011321#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070011322
11323
Marat Dukhan4c617792021-12-21 15:47:58 -080011324#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070011325 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_eq_8) {
11326 DWConvMicrokernelTester()
11327 .cr(8)
11328 .kr(9)
11329 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011331 }
11332
11333 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8) {
11334 for (uint32_t channels = 16; channels < 128; channels += 24) {
11335 DWConvMicrokernelTester()
11336 .cr(8)
11337 .kr(9)
11338 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011339 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011340 }
11341 }
11342
11343 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
11344 for (uint32_t channels = 16; channels < 128; channels += 24) {
11345 DWConvMicrokernelTester()
11346 .cr(8)
11347 .kr(9)
11348 .channels(channels)
11349 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011350 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011351 }
11352 }
11353
11354 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
11355 for (uint32_t channels = 16; channels < 128; channels += 24) {
11356 DWConvMicrokernelTester()
11357 .cr(8)
11358 .kr(9)
11359 .channels(channels)
11360 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011361 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011362 }
11363 }
11364
11365 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_lt_8) {
11366 for (uint32_t channels = 1; channels < 8; channels++) {
11367 DWConvMicrokernelTester()
11368 .cr(8)
11369 .kr(9)
11370 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011371 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011372 }
11373 }
11374
11375 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8) {
11376 for (uint32_t channels = 9; channels < 16; channels++) {
11377 DWConvMicrokernelTester()
11378 .cr(8)
11379 .kr(9)
11380 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011381 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011382 }
11383 }
11384
11385 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
11386 for (uint32_t channels = 9; channels < 16; channels++) {
11387 DWConvMicrokernelTester()
11388 .cr(8)
11389 .kr(9)
11390 .channels(channels)
11391 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011392 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011393 }
11394 }
11395
11396 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
11397 for (uint32_t channels = 9; channels < 16; channels++) {
11398 DWConvMicrokernelTester()
11399 .cr(8)
11400 .kr(9)
11401 .channels(channels)
11402 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011403 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011404 }
11405 }
11406
11407 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel) {
11408 for (size_t channels = 1; channels <= 40; channels += 7) {
11409 DWConvMicrokernelTester()
11410 .cr(8)
11411 .kr(9)
11412 .channels(channels)
11413 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011414 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011415 }
11416 }
11417
11418 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
11419 for (size_t channels = 1; channels <= 40; channels += 7) {
11420 for (size_t step = 2; step <= 9; step++) {
11421 DWConvMicrokernelTester()
11422 .cr(8)
11423 .kr(9)
11424 .channels(channels)
11425 .width(3)
11426 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011427 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011428 }
11429 }
11430 }
11431
11432 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
11433 for (size_t channels = 1; channels <= 40; channels += 7) {
11434 DWConvMicrokernelTester()
11435 .cr(8)
11436 .kr(9)
11437 .channels(8)
11438 .width(5)
11439 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011440 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011441 }
11442 }
11443
11444 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
11445 for (size_t channels = 1; channels <= 40; channels += 7) {
11446 DWConvMicrokernelTester()
11447 .cr(8)
11448 .kr(9)
11449 .channels(channels)
11450 .width(3)
11451 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011452 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011453 }
11454 }
11455
11456 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
11457 for (size_t channels = 1; channels <= 40; channels += 7) {
11458 DWConvMicrokernelTester()
11459 .cr(8)
11460 .kr(9)
11461 .channels(channels)
11462 .width(3)
11463 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011464 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011465 }
11466 }
11467
11468 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, input_offset) {
11469 for (uint32_t channels = 16; channels < 128; channels += 24) {
11470 DWConvMicrokernelTester()
11471 .cr(8)
11472 .kr(9)
11473 .channels(channels)
11474 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080011475 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011476 }
11477 }
11478
11479 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, zero) {
11480 for (uint32_t mz = 0; mz < 9; mz++) {
11481 for (uint32_t channels = 16; channels < 128; channels += 24) {
11482 DWConvMicrokernelTester()
11483 .cr(8)
11484 .kr(9)
11485 .channels(channels)
11486 .input_offset(176)
11487 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011489 }
11490 }
11491 }
Marat Dukhan4c617792021-12-21 15:47:58 -080011492#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070011493
11494
Marat Dukhan4c617792021-12-21 15:47:58 -080011495#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070011496 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_eq_16) {
11497 DWConvMicrokernelTester()
11498 .cr(16)
11499 .kr(9)
11500 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080011501 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011502 }
11503
11504 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16) {
11505 for (uint32_t channels = 32; channels < 256; channels += 48) {
11506 DWConvMicrokernelTester()
11507 .cr(16)
11508 .kr(9)
11509 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011510 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011511 }
11512 }
11513
11514 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
11515 for (uint32_t channels = 32; channels < 256; channels += 48) {
11516 DWConvMicrokernelTester()
11517 .cr(16)
11518 .kr(9)
11519 .channels(channels)
11520 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011521 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011522 }
11523 }
11524
11525 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
11526 for (uint32_t channels = 32; channels < 256; channels += 48) {
11527 DWConvMicrokernelTester()
11528 .cr(16)
11529 .kr(9)
11530 .channels(channels)
11531 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011532 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011533 }
11534 }
11535
11536 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_lt_16) {
11537 for (uint32_t channels = 1; channels < 16; channels++) {
11538 DWConvMicrokernelTester()
11539 .cr(16)
11540 .kr(9)
11541 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011542 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011543 }
11544 }
11545
11546 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16) {
11547 for (uint32_t channels = 17; channels < 32; channels++) {
11548 DWConvMicrokernelTester()
11549 .cr(16)
11550 .kr(9)
11551 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011552 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011553 }
11554 }
11555
11556 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
11557 for (uint32_t channels = 17; channels < 32; channels++) {
11558 DWConvMicrokernelTester()
11559 .cr(16)
11560 .kr(9)
11561 .channels(channels)
11562 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011563 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011564 }
11565 }
11566
11567 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
11568 for (uint32_t channels = 17; channels < 32; channels++) {
11569 DWConvMicrokernelTester()
11570 .cr(16)
11571 .kr(9)
11572 .channels(channels)
11573 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011575 }
11576 }
11577
11578 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel) {
11579 for (size_t channels = 1; channels <= 80; channels += 15) {
11580 DWConvMicrokernelTester()
11581 .cr(16)
11582 .kr(9)
11583 .channels(channels)
11584 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011585 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011586 }
11587 }
11588
11589 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
11590 for (size_t channels = 1; channels <= 80; channels += 15) {
11591 for (size_t step = 2; step <= 9; step++) {
11592 DWConvMicrokernelTester()
11593 .cr(16)
11594 .kr(9)
11595 .channels(channels)
11596 .width(3)
11597 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011599 }
11600 }
11601 }
11602
11603 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
11604 for (size_t channels = 1; channels <= 80; channels += 15) {
11605 DWConvMicrokernelTester()
11606 .cr(16)
11607 .kr(9)
11608 .channels(16)
11609 .width(5)
11610 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011611 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011612 }
11613 }
11614
11615 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
11616 for (size_t channels = 1; channels <= 80; channels += 15) {
11617 DWConvMicrokernelTester()
11618 .cr(16)
11619 .kr(9)
11620 .channels(channels)
11621 .width(3)
11622 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011623 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011624 }
11625 }
11626
11627 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
11628 for (size_t channels = 1; channels <= 80; channels += 15) {
11629 DWConvMicrokernelTester()
11630 .cr(16)
11631 .kr(9)
11632 .channels(channels)
11633 .width(3)
11634 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011635 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011636 }
11637 }
11638
11639 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, input_offset) {
11640 for (uint32_t channels = 32; channels < 256; channels += 48) {
11641 DWConvMicrokernelTester()
11642 .cr(16)
11643 .kr(9)
11644 .channels(channels)
11645 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080011646 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011647 }
11648 }
11649
11650 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, zero) {
11651 for (uint32_t mz = 0; mz < 9; mz++) {
11652 for (uint32_t channels = 32; channels < 256; channels += 48) {
11653 DWConvMicrokernelTester()
11654 .cr(16)
11655 .kr(9)
11656 .channels(channels)
11657 .input_offset(304)
11658 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011659 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011660 }
11661 }
11662 }
Marat Dukhan4c617792021-12-21 15:47:58 -080011663#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070011664
11665
Marat Dukhan4c617792021-12-21 15:47:58 -080011666#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070011667 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_eq_24) {
11668 DWConvMicrokernelTester()
11669 .cr(24)
11670 .kr(9)
11671 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080011672 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011673 }
11674
11675 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24) {
11676 for (uint32_t channels = 48; channels < 384; channels += 72) {
11677 DWConvMicrokernelTester()
11678 .cr(24)
11679 .kr(9)
11680 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011681 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011682 }
11683 }
11684
11685 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
11686 for (uint32_t channels = 48; channels < 384; channels += 72) {
11687 DWConvMicrokernelTester()
11688 .cr(24)
11689 .kr(9)
11690 .channels(channels)
11691 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011692 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011693 }
11694 }
11695
11696 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
11697 for (uint32_t channels = 48; channels < 384; channels += 72) {
11698 DWConvMicrokernelTester()
11699 .cr(24)
11700 .kr(9)
11701 .channels(channels)
11702 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011703 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011704 }
11705 }
11706
11707 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_lt_24) {
11708 for (uint32_t channels = 1; channels < 24; channels++) {
11709 DWConvMicrokernelTester()
11710 .cr(24)
11711 .kr(9)
11712 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011713 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011714 }
11715 }
11716
11717 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24) {
11718 for (uint32_t channels = 25; channels < 48; channels++) {
11719 DWConvMicrokernelTester()
11720 .cr(24)
11721 .kr(9)
11722 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011723 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011724 }
11725 }
11726
11727 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
11728 for (uint32_t channels = 25; channels < 48; channels++) {
11729 DWConvMicrokernelTester()
11730 .cr(24)
11731 .kr(9)
11732 .channels(channels)
11733 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011734 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011735 }
11736 }
11737
11738 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
11739 for (uint32_t channels = 25; channels < 48; channels++) {
11740 DWConvMicrokernelTester()
11741 .cr(24)
11742 .kr(9)
11743 .channels(channels)
11744 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011745 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011746 }
11747 }
11748
11749 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel) {
11750 for (size_t channels = 1; channels <= 120; channels += 23) {
11751 DWConvMicrokernelTester()
11752 .cr(24)
11753 .kr(9)
11754 .channels(channels)
11755 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011756 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011757 }
11758 }
11759
11760 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
11761 for (size_t channels = 1; channels <= 120; channels += 23) {
11762 for (size_t step = 2; step <= 9; step++) {
11763 DWConvMicrokernelTester()
11764 .cr(24)
11765 .kr(9)
11766 .channels(channels)
11767 .width(3)
11768 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011769 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011770 }
11771 }
11772 }
11773
11774 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
11775 for (size_t channels = 1; channels <= 120; channels += 23) {
11776 DWConvMicrokernelTester()
11777 .cr(24)
11778 .kr(9)
11779 .channels(24)
11780 .width(5)
11781 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080011782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011783 }
11784 }
11785
11786 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
11787 for (size_t channels = 1; channels <= 120; channels += 23) {
11788 DWConvMicrokernelTester()
11789 .cr(24)
11790 .kr(9)
11791 .channels(channels)
11792 .width(3)
11793 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011794 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011795 }
11796 }
11797
11798 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
11799 for (size_t channels = 1; channels <= 120; channels += 23) {
11800 DWConvMicrokernelTester()
11801 .cr(24)
11802 .kr(9)
11803 .channels(channels)
11804 .width(3)
11805 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011807 }
11808 }
11809
11810 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, input_offset) {
11811 for (uint32_t channels = 48; channels < 384; channels += 72) {
11812 DWConvMicrokernelTester()
11813 .cr(24)
11814 .kr(9)
11815 .channels(channels)
11816 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080011817 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011818 }
11819 }
11820
11821 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, zero) {
11822 for (uint32_t mz = 0; mz < 9; mz++) {
11823 for (uint32_t channels = 48; channels < 384; channels += 72) {
11824 DWConvMicrokernelTester()
11825 .cr(24)
11826 .kr(9)
11827 .channels(channels)
11828 .input_offset(464)
11829 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070011831 }
11832 }
11833 }
Marat Dukhan4c617792021-12-21 15:47:58 -080011834#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070011835
11836
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011837#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11838 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_eq_1) {
11839 DWConvMicrokernelTester()
11840 .cr(1)
11841 .kr(9)
11842 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011843 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011844 }
11845
11846 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1) {
11847 for (uint32_t channels = 2; channels < 10; channels++) {
11848 DWConvMicrokernelTester()
11849 .cr(1)
11850 .kr(9)
11851 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011853 }
11854 }
11855
11856 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmin) {
11857 for (uint32_t channels = 2; channels < 10; channels++) {
11858 DWConvMicrokernelTester()
11859 .cr(1)
11860 .kr(9)
11861 .channels(channels)
11862 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011863 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011864 }
11865 }
11866
11867 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmax) {
11868 for (uint32_t channels = 2; channels < 10; channels++) {
11869 DWConvMicrokernelTester()
11870 .cr(1)
11871 .kr(9)
11872 .channels(channels)
11873 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011874 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011875 }
11876 }
11877
11878 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel) {
11879 for (size_t channels = 1; channels <= 5; channels += 1) {
11880 DWConvMicrokernelTester()
11881 .cr(1)
11882 .kr(9)
11883 .channels(channels)
11884 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011885 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011886 }
11887 }
11888
11889 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_step) {
11890 for (size_t channels = 1; channels <= 5; channels += 1) {
11891 for (size_t step = 2; step <= 9; step++) {
11892 DWConvMicrokernelTester()
11893 .cr(1)
11894 .kr(9)
11895 .channels(channels)
11896 .width(3)
11897 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080011898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011899 }
11900 }
11901 }
11902
11903 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_output_stride) {
11904 for (size_t channels = 1; channels <= 5; channels += 1) {
11905 DWConvMicrokernelTester()
11906 .cr(1)
11907 .kr(9)
11908 .channels(1)
11909 .width(5)
11910 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011911 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011912 }
11913 }
11914
11915 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmin) {
11916 for (size_t channels = 1; channels <= 5; channels += 1) {
11917 DWConvMicrokernelTester()
11918 .cr(1)
11919 .kr(9)
11920 .channels(channels)
11921 .width(3)
11922 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011923 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011924 }
11925 }
11926
11927 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmax) {
11928 for (size_t channels = 1; channels <= 5; channels += 1) {
11929 DWConvMicrokernelTester()
11930 .cr(1)
11931 .kr(9)
11932 .channels(channels)
11933 .width(3)
11934 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011935 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011936 }
11937 }
11938
11939 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, input_offset) {
11940 for (uint32_t channels = 2; channels < 16; channels += 3) {
11941 DWConvMicrokernelTester()
11942 .cr(1)
11943 .kr(9)
11944 .channels(channels)
11945 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080011946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011947 }
11948 }
11949
11950 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, zero) {
11951 for (uint32_t mz = 0; mz < 9; mz++) {
11952 for (uint32_t channels = 2; channels < 16; channels += 3) {
11953 DWConvMicrokernelTester()
11954 .cr(1)
11955 .kr(9)
11956 .channels(channels)
11957 .input_offset(48)
11958 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011959 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011960 }
11961 }
11962 }
11963#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11964
11965
11966#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11967 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_eq_2) {
11968 DWConvMicrokernelTester()
11969 .cr(2)
11970 .kr(9)
11971 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080011972 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011973 }
11974
11975 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2) {
11976 for (uint32_t channels = 4; channels < 32; channels += 6) {
11977 DWConvMicrokernelTester()
11978 .cr(2)
11979 .kr(9)
11980 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080011981 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011982 }
11983 }
11984
11985 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmin) {
11986 for (uint32_t channels = 4; channels < 32; channels += 6) {
11987 DWConvMicrokernelTester()
11988 .cr(2)
11989 .kr(9)
11990 .channels(channels)
11991 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080011993 }
11994 }
11995
11996 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmax) {
11997 for (uint32_t channels = 4; channels < 32; channels += 6) {
11998 DWConvMicrokernelTester()
11999 .cr(2)
12000 .kr(9)
12001 .channels(channels)
12002 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012003 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012004 }
12005 }
12006
12007 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_lt_2) {
12008 for (uint32_t channels = 1; channels < 2; channels++) {
12009 DWConvMicrokernelTester()
12010 .cr(2)
12011 .kr(9)
12012 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012013 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012014 }
12015 }
12016
12017 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2) {
12018 for (uint32_t channels = 3; channels < 4; channels++) {
12019 DWConvMicrokernelTester()
12020 .cr(2)
12021 .kr(9)
12022 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012023 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012024 }
12025 }
12026
12027 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmin) {
12028 for (uint32_t channels = 3; channels < 4; channels++) {
12029 DWConvMicrokernelTester()
12030 .cr(2)
12031 .kr(9)
12032 .channels(channels)
12033 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012034 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012035 }
12036 }
12037
12038 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmax) {
12039 for (uint32_t channels = 3; channels < 4; channels++) {
12040 DWConvMicrokernelTester()
12041 .cr(2)
12042 .kr(9)
12043 .channels(channels)
12044 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012045 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012046 }
12047 }
12048
12049 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel) {
12050 for (size_t channels = 1; channels <= 10; channels += 1) {
12051 DWConvMicrokernelTester()
12052 .cr(2)
12053 .kr(9)
12054 .channels(channels)
12055 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012056 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012057 }
12058 }
12059
12060 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_step) {
12061 for (size_t channels = 1; channels <= 10; channels += 1) {
12062 for (size_t step = 2; step <= 9; step++) {
12063 DWConvMicrokernelTester()
12064 .cr(2)
12065 .kr(9)
12066 .channels(channels)
12067 .width(3)
12068 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012069 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012070 }
12071 }
12072 }
12073
12074 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_output_stride) {
12075 for (size_t channels = 1; channels <= 10; channels += 1) {
12076 DWConvMicrokernelTester()
12077 .cr(2)
12078 .kr(9)
12079 .channels(2)
12080 .width(5)
12081 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080012082 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012083 }
12084 }
12085
12086 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmin) {
12087 for (size_t channels = 1; channels <= 10; channels += 1) {
12088 DWConvMicrokernelTester()
12089 .cr(2)
12090 .kr(9)
12091 .channels(channels)
12092 .width(3)
12093 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012095 }
12096 }
12097
12098 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmax) {
12099 for (size_t channels = 1; channels <= 10; channels += 1) {
12100 DWConvMicrokernelTester()
12101 .cr(2)
12102 .kr(9)
12103 .channels(channels)
12104 .width(3)
12105 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012106 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012107 }
12108 }
12109
12110 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, input_offset) {
12111 for (uint32_t channels = 4; channels < 32; channels += 6) {
12112 DWConvMicrokernelTester()
12113 .cr(2)
12114 .kr(9)
12115 .channels(channels)
12116 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080012117 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012118 }
12119 }
12120
12121 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, zero) {
12122 for (uint32_t mz = 0; mz < 9; mz++) {
12123 for (uint32_t channels = 4; channels < 32; channels += 6) {
12124 DWConvMicrokernelTester()
12125 .cr(2)
12126 .kr(9)
12127 .channels(channels)
12128 .input_offset(80)
12129 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012131 }
12132 }
12133 }
12134#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12135
12136
12137#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12138 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_eq_4) {
12139 DWConvMicrokernelTester()
12140 .cr(4)
12141 .kr(9)
12142 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080012143 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012144 }
12145
12146 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4) {
12147 for (uint32_t channels = 8; channels < 64; channels += 12) {
12148 DWConvMicrokernelTester()
12149 .cr(4)
12150 .kr(9)
12151 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012152 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012153 }
12154 }
12155
12156 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmin) {
12157 for (uint32_t channels = 8; channels < 64; channels += 12) {
12158 DWConvMicrokernelTester()
12159 .cr(4)
12160 .kr(9)
12161 .channels(channels)
12162 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012163 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012164 }
12165 }
12166
12167 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmax) {
12168 for (uint32_t channels = 8; channels < 64; channels += 12) {
12169 DWConvMicrokernelTester()
12170 .cr(4)
12171 .kr(9)
12172 .channels(channels)
12173 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012175 }
12176 }
12177
12178 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_lt_4) {
12179 for (uint32_t channels = 1; channels < 4; channels++) {
12180 DWConvMicrokernelTester()
12181 .cr(4)
12182 .kr(9)
12183 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012184 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012185 }
12186 }
12187
12188 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4) {
12189 for (uint32_t channels = 5; channels < 8; channels++) {
12190 DWConvMicrokernelTester()
12191 .cr(4)
12192 .kr(9)
12193 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012194 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012195 }
12196 }
12197
12198 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmin) {
12199 for (uint32_t channels = 5; channels < 8; channels++) {
12200 DWConvMicrokernelTester()
12201 .cr(4)
12202 .kr(9)
12203 .channels(channels)
12204 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012205 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012206 }
12207 }
12208
12209 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmax) {
12210 for (uint32_t channels = 5; channels < 8; channels++) {
12211 DWConvMicrokernelTester()
12212 .cr(4)
12213 .kr(9)
12214 .channels(channels)
12215 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012216 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012217 }
12218 }
12219
12220 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel) {
12221 for (size_t channels = 1; channels <= 20; channels += 3) {
12222 DWConvMicrokernelTester()
12223 .cr(4)
12224 .kr(9)
12225 .channels(channels)
12226 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012227 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012228 }
12229 }
12230
12231 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_step) {
12232 for (size_t channels = 1; channels <= 20; channels += 3) {
12233 for (size_t step = 2; step <= 9; step++) {
12234 DWConvMicrokernelTester()
12235 .cr(4)
12236 .kr(9)
12237 .channels(channels)
12238 .width(3)
12239 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012240 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012241 }
12242 }
12243 }
12244
12245 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_output_stride) {
12246 for (size_t channels = 1; channels <= 20; channels += 3) {
12247 DWConvMicrokernelTester()
12248 .cr(4)
12249 .kr(9)
12250 .channels(4)
12251 .width(5)
12252 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080012253 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012254 }
12255 }
12256
12257 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmin) {
12258 for (size_t channels = 1; channels <= 20; channels += 3) {
12259 DWConvMicrokernelTester()
12260 .cr(4)
12261 .kr(9)
12262 .channels(channels)
12263 .width(3)
12264 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012265 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012266 }
12267 }
12268
12269 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmax) {
12270 for (size_t channels = 1; channels <= 20; channels += 3) {
12271 DWConvMicrokernelTester()
12272 .cr(4)
12273 .kr(9)
12274 .channels(channels)
12275 .width(3)
12276 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012277 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012278 }
12279 }
12280
12281 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, input_offset) {
12282 for (uint32_t channels = 8; channels < 64; channels += 12) {
12283 DWConvMicrokernelTester()
12284 .cr(4)
12285 .kr(9)
12286 .channels(channels)
12287 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080012288 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012289 }
12290 }
12291
12292 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, zero) {
12293 for (uint32_t mz = 0; mz < 9; mz++) {
12294 for (uint32_t channels = 8; channels < 64; channels += 12) {
12295 DWConvMicrokernelTester()
12296 .cr(4)
12297 .kr(9)
12298 .channels(channels)
12299 .input_offset(112)
12300 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012301 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080012302 }
12303 }
12304 }
12305#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12306
12307
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012308TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
Marat Dukhan57547062021-06-30 16:53:29 -070012309 DWConvMicrokernelTester()
12310 .cr(1)
12311 .kr(9)
12312 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012313 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012314}
12315
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012316TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1) {
Marat Dukhan57547062021-06-30 16:53:29 -070012317 for (uint32_t channels = 2; channels < 10; channels++) {
12318 DWConvMicrokernelTester()
12319 .cr(1)
12320 .kr(9)
12321 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012322 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012323 }
12324}
12325
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012326TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070012327 for (uint32_t channels = 2; channels < 10; channels++) {
12328 DWConvMicrokernelTester()
12329 .cr(1)
12330 .kr(9)
12331 .channels(channels)
12332 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012333 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012334 }
12335}
12336
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012337TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070012338 for (uint32_t channels = 2; channels < 10; channels++) {
12339 DWConvMicrokernelTester()
12340 .cr(1)
12341 .kr(9)
12342 .channels(channels)
12343 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012344 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012345 }
12346}
12347
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012348TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel) {
Marat Dukhan57547062021-06-30 16:53:29 -070012349 for (size_t channels = 1; channels <= 5; channels += 1) {
12350 DWConvMicrokernelTester()
12351 .cr(1)
12352 .kr(9)
12353 .channels(channels)
12354 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012355 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012356 }
12357}
12358
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012359TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan57547062021-06-30 16:53:29 -070012360 for (size_t channels = 1; channels <= 5; channels += 1) {
12361 for (size_t step = 2; step <= 9; step++) {
12362 DWConvMicrokernelTester()
12363 .cr(1)
12364 .kr(9)
12365 .channels(channels)
12366 .width(3)
12367 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012368 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012369 }
12370 }
12371}
12372
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012373TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan57547062021-06-30 16:53:29 -070012374 for (size_t channels = 1; channels <= 5; channels += 1) {
12375 DWConvMicrokernelTester()
12376 .cr(1)
12377 .kr(9)
12378 .channels(1)
12379 .width(5)
12380 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012381 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012382 }
12383}
12384
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012385TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070012386 for (size_t channels = 1; channels <= 5; channels += 1) {
12387 DWConvMicrokernelTester()
12388 .cr(1)
12389 .kr(9)
12390 .channels(channels)
12391 .width(3)
12392 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012393 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012394 }
12395}
12396
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012397TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070012398 for (size_t channels = 1; channels <= 5; channels += 1) {
12399 DWConvMicrokernelTester()
12400 .cr(1)
12401 .kr(9)
12402 .channels(channels)
12403 .width(3)
12404 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012405 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012406 }
12407}
12408
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012409TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, input_offset) {
Marat Dukhan57547062021-06-30 16:53:29 -070012410 for (uint32_t channels = 2; channels < 16; channels += 3) {
12411 DWConvMicrokernelTester()
12412 .cr(1)
12413 .kr(9)
12414 .channels(channels)
12415 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080012416 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012417 }
12418}
12419
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012420TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, zero) {
Marat Dukhan57547062021-06-30 16:53:29 -070012421 for (uint32_t mz = 0; mz < 9; mz++) {
12422 for (uint32_t channels = 2; channels < 16; channels += 3) {
12423 DWConvMicrokernelTester()
12424 .cr(1)
12425 .kr(9)
12426 .channels(channels)
12427 .input_offset(48)
12428 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012429 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012430 }
12431 }
12432}
12433
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012434TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_eq_2) {
Marat Dukhan57547062021-06-30 16:53:29 -070012435 DWConvMicrokernelTester()
12436 .cr(2)
12437 .kr(9)
12438 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080012439 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012440}
12441
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012442TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2) {
Marat Dukhan57547062021-06-30 16:53:29 -070012443 for (uint32_t channels = 4; channels < 32; channels += 6) {
12444 DWConvMicrokernelTester()
12445 .cr(2)
12446 .kr(9)
12447 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012448 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012449 }
12450}
12451
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012452TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070012453 for (uint32_t channels = 4; channels < 32; channels += 6) {
12454 DWConvMicrokernelTester()
12455 .cr(2)
12456 .kr(9)
12457 .channels(channels)
12458 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012459 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012460 }
12461}
12462
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012463TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070012464 for (uint32_t channels = 4; channels < 32; channels += 6) {
12465 DWConvMicrokernelTester()
12466 .cr(2)
12467 .kr(9)
12468 .channels(channels)
12469 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012471 }
12472}
12473
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012474TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_lt_2) {
Marat Dukhan57547062021-06-30 16:53:29 -070012475 for (uint32_t channels = 1; channels < 2; channels++) {
12476 DWConvMicrokernelTester()
12477 .cr(2)
12478 .kr(9)
12479 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012481 }
12482}
12483
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012484TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2) {
Marat Dukhan57547062021-06-30 16:53:29 -070012485 for (uint32_t channels = 3; channels < 4; channels++) {
12486 DWConvMicrokernelTester()
12487 .cr(2)
12488 .kr(9)
12489 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012491 }
12492}
12493
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012494TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070012495 for (uint32_t channels = 3; channels < 4; channels++) {
12496 DWConvMicrokernelTester()
12497 .cr(2)
12498 .kr(9)
12499 .channels(channels)
12500 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012501 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012502 }
12503}
12504
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012505TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070012506 for (uint32_t channels = 3; channels < 4; channels++) {
12507 DWConvMicrokernelTester()
12508 .cr(2)
12509 .kr(9)
12510 .channels(channels)
12511 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012512 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012513 }
12514}
12515
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012516TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel) {
Marat Dukhan57547062021-06-30 16:53:29 -070012517 for (size_t channels = 1; channels <= 10; channels += 1) {
12518 DWConvMicrokernelTester()
12519 .cr(2)
12520 .kr(9)
12521 .channels(channels)
12522 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012523 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012524 }
12525}
12526
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012527TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan57547062021-06-30 16:53:29 -070012528 for (size_t channels = 1; channels <= 10; channels += 1) {
12529 for (size_t step = 2; step <= 9; step++) {
12530 DWConvMicrokernelTester()
12531 .cr(2)
12532 .kr(9)
12533 .channels(channels)
12534 .width(3)
12535 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012536 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012537 }
12538 }
12539}
12540
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012541TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan57547062021-06-30 16:53:29 -070012542 for (size_t channels = 1; channels <= 10; channels += 1) {
12543 DWConvMicrokernelTester()
12544 .cr(2)
12545 .kr(9)
12546 .channels(2)
12547 .width(5)
12548 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080012549 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012550 }
12551}
12552
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012553TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070012554 for (size_t channels = 1; channels <= 10; channels += 1) {
12555 DWConvMicrokernelTester()
12556 .cr(2)
12557 .kr(9)
12558 .channels(channels)
12559 .width(3)
12560 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012561 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012562 }
12563}
12564
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012565TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070012566 for (size_t channels = 1; channels <= 10; channels += 1) {
12567 DWConvMicrokernelTester()
12568 .cr(2)
12569 .kr(9)
12570 .channels(channels)
12571 .width(3)
12572 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012573 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012574 }
12575}
12576
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012577TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, input_offset) {
Marat Dukhan57547062021-06-30 16:53:29 -070012578 for (uint32_t channels = 4; channels < 32; channels += 6) {
12579 DWConvMicrokernelTester()
12580 .cr(2)
12581 .kr(9)
12582 .channels(channels)
12583 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080012584 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012585 }
12586}
12587
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012588TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, zero) {
Marat Dukhan57547062021-06-30 16:53:29 -070012589 for (uint32_t mz = 0; mz < 9; mz++) {
12590 for (uint32_t channels = 4; channels < 32; channels += 6) {
12591 DWConvMicrokernelTester()
12592 .cr(2)
12593 .kr(9)
12594 .channels(channels)
12595 .input_offset(80)
12596 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012597 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012598 }
12599 }
12600}
12601
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012602TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_eq_4) {
Marat Dukhan57547062021-06-30 16:53:29 -070012603 DWConvMicrokernelTester()
12604 .cr(4)
12605 .kr(9)
12606 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080012607 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012608}
12609
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012610TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4) {
Marat Dukhan57547062021-06-30 16:53:29 -070012611 for (uint32_t channels = 8; channels < 64; channels += 12) {
12612 DWConvMicrokernelTester()
12613 .cr(4)
12614 .kr(9)
12615 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012616 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012617 }
12618}
12619
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012620TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070012621 for (uint32_t channels = 8; channels < 64; channels += 12) {
12622 DWConvMicrokernelTester()
12623 .cr(4)
12624 .kr(9)
12625 .channels(channels)
12626 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012627 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012628 }
12629}
12630
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012631TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070012632 for (uint32_t channels = 8; channels < 64; channels += 12) {
12633 DWConvMicrokernelTester()
12634 .cr(4)
12635 .kr(9)
12636 .channels(channels)
12637 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012638 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012639 }
12640}
12641
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012642TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_lt_4) {
Marat Dukhan57547062021-06-30 16:53:29 -070012643 for (uint32_t channels = 1; channels < 4; channels++) {
12644 DWConvMicrokernelTester()
12645 .cr(4)
12646 .kr(9)
12647 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012648 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012649 }
12650}
12651
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012652TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4) {
Marat Dukhan57547062021-06-30 16:53:29 -070012653 for (uint32_t channels = 5; channels < 8; channels++) {
12654 DWConvMicrokernelTester()
12655 .cr(4)
12656 .kr(9)
12657 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012658 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012659 }
12660}
12661
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012662TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070012663 for (uint32_t channels = 5; channels < 8; channels++) {
12664 DWConvMicrokernelTester()
12665 .cr(4)
12666 .kr(9)
12667 .channels(channels)
12668 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012669 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012670 }
12671}
12672
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012673TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070012674 for (uint32_t channels = 5; channels < 8; channels++) {
12675 DWConvMicrokernelTester()
12676 .cr(4)
12677 .kr(9)
12678 .channels(channels)
12679 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012680 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012681 }
12682}
12683
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012684TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel) {
Marat Dukhan57547062021-06-30 16:53:29 -070012685 for (size_t channels = 1; channels <= 20; channels += 3) {
12686 DWConvMicrokernelTester()
12687 .cr(4)
12688 .kr(9)
12689 .channels(channels)
12690 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012691 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012692 }
12693}
12694
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012695TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan57547062021-06-30 16:53:29 -070012696 for (size_t channels = 1; channels <= 20; channels += 3) {
12697 for (size_t step = 2; step <= 9; step++) {
12698 DWConvMicrokernelTester()
12699 .cr(4)
12700 .kr(9)
12701 .channels(channels)
12702 .width(3)
12703 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012704 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012705 }
12706 }
12707}
12708
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012709TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan57547062021-06-30 16:53:29 -070012710 for (size_t channels = 1; channels <= 20; channels += 3) {
12711 DWConvMicrokernelTester()
12712 .cr(4)
12713 .kr(9)
12714 .channels(4)
12715 .width(5)
12716 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080012717 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012718 }
12719}
12720
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012721TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070012722 for (size_t channels = 1; channels <= 20; channels += 3) {
12723 DWConvMicrokernelTester()
12724 .cr(4)
12725 .kr(9)
12726 .channels(channels)
12727 .width(3)
12728 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012729 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012730 }
12731}
12732
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012733TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070012734 for (size_t channels = 1; channels <= 20; channels += 3) {
12735 DWConvMicrokernelTester()
12736 .cr(4)
12737 .kr(9)
12738 .channels(channels)
12739 .width(3)
12740 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012741 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012742 }
12743}
12744
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012745TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, input_offset) {
Marat Dukhan57547062021-06-30 16:53:29 -070012746 for (uint32_t channels = 8; channels < 64; channels += 12) {
12747 DWConvMicrokernelTester()
12748 .cr(4)
12749 .kr(9)
12750 .channels(channels)
12751 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080012752 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012753 }
12754}
12755
Marat Dukhan2ac722e2022-01-04 01:54:20 -080012756TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, zero) {
Marat Dukhan57547062021-06-30 16:53:29 -070012757 for (uint32_t mz = 0; mz < 9; mz++) {
12758 for (uint32_t channels = 8; channels < 64; channels += 12) {
12759 DWConvMicrokernelTester()
12760 .cr(4)
12761 .kr(9)
12762 .channels(channels)
12763 .input_offset(112)
12764 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012765 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070012766 }
12767 }
12768}
12769
Marat Dukhan272d4d92022-01-04 15:07:14 -080012770TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
12771 DWConvMicrokernelTester()
12772 .cr(1)
12773 .kr(9)
12774 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012775 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012776}
12777
12778TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
12779 for (uint32_t channels = 2; channels < 10; channels++) {
12780 DWConvMicrokernelTester()
12781 .cr(1)
12782 .kr(9)
12783 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012784 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012785 }
12786}
12787
12788TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
12789 for (uint32_t channels = 2; channels < 10; channels++) {
12790 DWConvMicrokernelTester()
12791 .cr(1)
12792 .kr(9)
12793 .channels(channels)
12794 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012795 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012796 }
12797}
12798
12799TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
12800 for (uint32_t channels = 2; channels < 10; channels++) {
12801 DWConvMicrokernelTester()
12802 .cr(1)
12803 .kr(9)
12804 .channels(channels)
12805 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012807 }
12808}
12809
12810TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
12811 for (size_t channels = 1; channels <= 5; channels += 1) {
12812 DWConvMicrokernelTester()
12813 .cr(1)
12814 .kr(9)
12815 .channels(channels)
12816 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012817 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012818 }
12819}
12820
12821TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
12822 for (size_t channels = 1; channels <= 5; channels += 1) {
12823 for (size_t step = 2; step <= 9; step++) {
12824 DWConvMicrokernelTester()
12825 .cr(1)
12826 .kr(9)
12827 .channels(channels)
12828 .width(3)
12829 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012831 }
12832 }
12833}
12834
12835TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
12836 for (size_t channels = 1; channels <= 5; channels += 1) {
12837 DWConvMicrokernelTester()
12838 .cr(1)
12839 .kr(9)
12840 .channels(1)
12841 .width(5)
12842 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012843 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012844 }
12845}
12846
12847TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
12848 for (size_t channels = 1; channels <= 5; channels += 1) {
12849 DWConvMicrokernelTester()
12850 .cr(1)
12851 .kr(9)
12852 .channels(channels)
12853 .width(3)
12854 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012855 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012856 }
12857}
12858
12859TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
12860 for (size_t channels = 1; channels <= 5; channels += 1) {
12861 DWConvMicrokernelTester()
12862 .cr(1)
12863 .kr(9)
12864 .channels(channels)
12865 .width(3)
12866 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012867 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012868 }
12869}
12870
12871TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
12872 for (uint32_t channels = 2; channels < 16; channels += 3) {
12873 DWConvMicrokernelTester()
12874 .cr(1)
12875 .kr(9)
12876 .channels(channels)
12877 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080012878 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012879 }
12880}
12881
12882TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
12883 for (uint32_t mz = 0; mz < 9; mz++) {
12884 for (uint32_t channels = 2; channels < 16; channels += 3) {
12885 DWConvMicrokernelTester()
12886 .cr(1)
12887 .kr(9)
12888 .channels(channels)
12889 .input_offset(48)
12890 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012891 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012892 }
12893 }
12894}
12895
12896TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
12897 DWConvMicrokernelTester()
12898 .cr(2)
12899 .kr(9)
12900 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080012901 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012902}
12903
12904TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
12905 for (uint32_t channels = 4; channels < 32; channels += 6) {
12906 DWConvMicrokernelTester()
12907 .cr(2)
12908 .kr(9)
12909 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012910 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012911 }
12912}
12913
12914TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
12915 for (uint32_t channels = 4; channels < 32; channels += 6) {
12916 DWConvMicrokernelTester()
12917 .cr(2)
12918 .kr(9)
12919 .channels(channels)
12920 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012921 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012922 }
12923}
12924
12925TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
12926 for (uint32_t channels = 4; channels < 32; channels += 6) {
12927 DWConvMicrokernelTester()
12928 .cr(2)
12929 .kr(9)
12930 .channels(channels)
12931 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012932 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012933 }
12934}
12935
12936TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
12937 for (uint32_t channels = 1; channels < 2; channels++) {
12938 DWConvMicrokernelTester()
12939 .cr(2)
12940 .kr(9)
12941 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012942 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012943 }
12944}
12945
12946TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
12947 for (uint32_t channels = 3; channels < 4; channels++) {
12948 DWConvMicrokernelTester()
12949 .cr(2)
12950 .kr(9)
12951 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080012952 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012953 }
12954}
12955
12956TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
12957 for (uint32_t channels = 3; channels < 4; channels++) {
12958 DWConvMicrokernelTester()
12959 .cr(2)
12960 .kr(9)
12961 .channels(channels)
12962 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012963 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012964 }
12965}
12966
12967TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
12968 for (uint32_t channels = 3; channels < 4; channels++) {
12969 DWConvMicrokernelTester()
12970 .cr(2)
12971 .kr(9)
12972 .channels(channels)
12973 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012974 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012975 }
12976}
12977
12978TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
12979 for (size_t channels = 1; channels <= 10; channels += 1) {
12980 DWConvMicrokernelTester()
12981 .cr(2)
12982 .kr(9)
12983 .channels(channels)
12984 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012985 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012986 }
12987}
12988
12989TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
12990 for (size_t channels = 1; channels <= 10; channels += 1) {
12991 for (size_t step = 2; step <= 9; step++) {
12992 DWConvMicrokernelTester()
12993 .cr(2)
12994 .kr(9)
12995 .channels(channels)
12996 .width(3)
12997 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080012998 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080012999 }
13000 }
13001}
13002
13003TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
13004 for (size_t channels = 1; channels <= 10; channels += 1) {
13005 DWConvMicrokernelTester()
13006 .cr(2)
13007 .kr(9)
13008 .channels(2)
13009 .width(5)
13010 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080013011 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013012 }
13013}
13014
13015TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
13016 for (size_t channels = 1; channels <= 10; channels += 1) {
13017 DWConvMicrokernelTester()
13018 .cr(2)
13019 .kr(9)
13020 .channels(channels)
13021 .width(3)
13022 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013023 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013024 }
13025}
13026
13027TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
13028 for (size_t channels = 1; channels <= 10; channels += 1) {
13029 DWConvMicrokernelTester()
13030 .cr(2)
13031 .kr(9)
13032 .channels(channels)
13033 .width(3)
13034 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013035 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013036 }
13037}
13038
13039TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
13040 for (uint32_t channels = 4; channels < 32; channels += 6) {
13041 DWConvMicrokernelTester()
13042 .cr(2)
13043 .kr(9)
13044 .channels(channels)
13045 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080013046 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013047 }
13048}
13049
13050TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
13051 for (uint32_t mz = 0; mz < 9; mz++) {
13052 for (uint32_t channels = 4; channels < 32; channels += 6) {
13053 DWConvMicrokernelTester()
13054 .cr(2)
13055 .kr(9)
13056 .channels(channels)
13057 .input_offset(80)
13058 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013059 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013060 }
13061 }
13062}
13063
13064TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
13065 DWConvMicrokernelTester()
13066 .cr(4)
13067 .kr(9)
13068 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080013069 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013070}
13071
13072TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
13073 for (uint32_t channels = 8; channels < 64; channels += 12) {
13074 DWConvMicrokernelTester()
13075 .cr(4)
13076 .kr(9)
13077 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013078 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013079 }
13080}
13081
13082TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
13083 for (uint32_t channels = 8; channels < 64; channels += 12) {
13084 DWConvMicrokernelTester()
13085 .cr(4)
13086 .kr(9)
13087 .channels(channels)
13088 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013089 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013090 }
13091}
13092
13093TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
13094 for (uint32_t channels = 8; channels < 64; channels += 12) {
13095 DWConvMicrokernelTester()
13096 .cr(4)
13097 .kr(9)
13098 .channels(channels)
13099 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013100 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013101 }
13102}
13103
13104TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
13105 for (uint32_t channels = 1; channels < 4; channels++) {
13106 DWConvMicrokernelTester()
13107 .cr(4)
13108 .kr(9)
13109 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013110 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013111 }
13112}
13113
13114TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
13115 for (uint32_t channels = 5; channels < 8; channels++) {
13116 DWConvMicrokernelTester()
13117 .cr(4)
13118 .kr(9)
13119 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013120 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013121 }
13122}
13123
13124TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
13125 for (uint32_t channels = 5; channels < 8; channels++) {
13126 DWConvMicrokernelTester()
13127 .cr(4)
13128 .kr(9)
13129 .channels(channels)
13130 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013131 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013132 }
13133}
13134
13135TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
13136 for (uint32_t channels = 5; channels < 8; channels++) {
13137 DWConvMicrokernelTester()
13138 .cr(4)
13139 .kr(9)
13140 .channels(channels)
13141 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013142 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013143 }
13144}
13145
13146TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
13147 for (size_t channels = 1; channels <= 20; channels += 3) {
13148 DWConvMicrokernelTester()
13149 .cr(4)
13150 .kr(9)
13151 .channels(channels)
13152 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013153 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013154 }
13155}
13156
13157TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
13158 for (size_t channels = 1; channels <= 20; channels += 3) {
13159 for (size_t step = 2; step <= 9; step++) {
13160 DWConvMicrokernelTester()
13161 .cr(4)
13162 .kr(9)
13163 .channels(channels)
13164 .width(3)
13165 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013166 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013167 }
13168 }
13169}
13170
13171TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
13172 for (size_t channels = 1; channels <= 20; channels += 3) {
13173 DWConvMicrokernelTester()
13174 .cr(4)
13175 .kr(9)
13176 .channels(4)
13177 .width(5)
13178 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080013179 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013180 }
13181}
13182
13183TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
13184 for (size_t channels = 1; channels <= 20; channels += 3) {
13185 DWConvMicrokernelTester()
13186 .cr(4)
13187 .kr(9)
13188 .channels(channels)
13189 .width(3)
13190 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013191 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013192 }
13193}
13194
13195TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
13196 for (size_t channels = 1; channels <= 20; channels += 3) {
13197 DWConvMicrokernelTester()
13198 .cr(4)
13199 .kr(9)
13200 .channels(channels)
13201 .width(3)
13202 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013203 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013204 }
13205}
13206
13207TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
13208 for (uint32_t channels = 8; channels < 64; channels += 12) {
13209 DWConvMicrokernelTester()
13210 .cr(4)
13211 .kr(9)
13212 .channels(channels)
13213 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080013214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013215 }
13216}
13217
13218TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
13219 for (uint32_t mz = 0; mz < 9; mz++) {
13220 for (uint32_t channels = 8; channels < 64; channels += 12) {
13221 DWConvMicrokernelTester()
13222 .cr(4)
13223 .kr(9)
13224 .channels(channels)
13225 .input_offset(112)
13226 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013227 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013228 }
13229 }
13230}
13231
13232TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
13233 DWConvMicrokernelTester()
13234 .cr(1)
13235 .kr(9)
13236 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013237 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013238}
13239
13240TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
13241 for (uint32_t channels = 2; channels < 10; channels++) {
13242 DWConvMicrokernelTester()
13243 .cr(1)
13244 .kr(9)
13245 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013246 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013247 }
13248}
13249
13250TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
13251 for (uint32_t channels = 2; channels < 10; channels++) {
13252 DWConvMicrokernelTester()
13253 .cr(1)
13254 .kr(9)
13255 .channels(channels)
13256 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013257 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013258 }
13259}
13260
13261TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
13262 for (uint32_t channels = 2; channels < 10; channels++) {
13263 DWConvMicrokernelTester()
13264 .cr(1)
13265 .kr(9)
13266 .channels(channels)
13267 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013268 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013269 }
13270}
13271
13272TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
13273 for (size_t channels = 1; channels <= 5; channels += 1) {
13274 DWConvMicrokernelTester()
13275 .cr(1)
13276 .kr(9)
13277 .channels(channels)
13278 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013279 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013280 }
13281}
13282
13283TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
13284 for (size_t channels = 1; channels <= 5; channels += 1) {
13285 for (size_t step = 2; step <= 9; step++) {
13286 DWConvMicrokernelTester()
13287 .cr(1)
13288 .kr(9)
13289 .channels(channels)
13290 .width(3)
13291 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013292 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013293 }
13294 }
13295}
13296
13297TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
13298 for (size_t channels = 1; channels <= 5; channels += 1) {
13299 DWConvMicrokernelTester()
13300 .cr(1)
13301 .kr(9)
13302 .channels(1)
13303 .width(5)
13304 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013305 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013306 }
13307}
13308
13309TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
13310 for (size_t channels = 1; channels <= 5; channels += 1) {
13311 DWConvMicrokernelTester()
13312 .cr(1)
13313 .kr(9)
13314 .channels(channels)
13315 .width(3)
13316 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013317 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013318 }
13319}
13320
13321TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
13322 for (size_t channels = 1; channels <= 5; channels += 1) {
13323 DWConvMicrokernelTester()
13324 .cr(1)
13325 .kr(9)
13326 .channels(channels)
13327 .width(3)
13328 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013329 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013330 }
13331}
13332
13333TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
13334 for (uint32_t channels = 2; channels < 16; channels += 3) {
13335 DWConvMicrokernelTester()
13336 .cr(1)
13337 .kr(9)
13338 .channels(channels)
13339 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080013340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013341 }
13342}
13343
13344TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
13345 for (uint32_t mz = 0; mz < 9; mz++) {
13346 for (uint32_t channels = 2; channels < 16; channels += 3) {
13347 DWConvMicrokernelTester()
13348 .cr(1)
13349 .kr(9)
13350 .channels(channels)
13351 .input_offset(48)
13352 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013353 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013354 }
13355 }
13356}
13357
13358TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
13359 DWConvMicrokernelTester()
13360 .cr(2)
13361 .kr(9)
13362 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080013363 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013364}
13365
13366TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
13367 for (uint32_t channels = 4; channels < 32; channels += 6) {
13368 DWConvMicrokernelTester()
13369 .cr(2)
13370 .kr(9)
13371 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013372 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013373 }
13374}
13375
13376TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
13377 for (uint32_t channels = 4; channels < 32; channels += 6) {
13378 DWConvMicrokernelTester()
13379 .cr(2)
13380 .kr(9)
13381 .channels(channels)
13382 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013383 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013384 }
13385}
13386
13387TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
13388 for (uint32_t channels = 4; channels < 32; channels += 6) {
13389 DWConvMicrokernelTester()
13390 .cr(2)
13391 .kr(9)
13392 .channels(channels)
13393 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013394 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013395 }
13396}
13397
13398TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
13399 for (uint32_t channels = 1; channels < 2; channels++) {
13400 DWConvMicrokernelTester()
13401 .cr(2)
13402 .kr(9)
13403 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013404 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013405 }
13406}
13407
13408TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
13409 for (uint32_t channels = 3; channels < 4; channels++) {
13410 DWConvMicrokernelTester()
13411 .cr(2)
13412 .kr(9)
13413 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013414 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013415 }
13416}
13417
13418TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
13419 for (uint32_t channels = 3; channels < 4; channels++) {
13420 DWConvMicrokernelTester()
13421 .cr(2)
13422 .kr(9)
13423 .channels(channels)
13424 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013425 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013426 }
13427}
13428
13429TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
13430 for (uint32_t channels = 3; channels < 4; channels++) {
13431 DWConvMicrokernelTester()
13432 .cr(2)
13433 .kr(9)
13434 .channels(channels)
13435 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013436 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013437 }
13438}
13439
13440TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
13441 for (size_t channels = 1; channels <= 10; channels += 1) {
13442 DWConvMicrokernelTester()
13443 .cr(2)
13444 .kr(9)
13445 .channels(channels)
13446 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013447 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013448 }
13449}
13450
13451TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
13452 for (size_t channels = 1; channels <= 10; channels += 1) {
13453 for (size_t step = 2; step <= 9; step++) {
13454 DWConvMicrokernelTester()
13455 .cr(2)
13456 .kr(9)
13457 .channels(channels)
13458 .width(3)
13459 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013460 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013461 }
13462 }
13463}
13464
13465TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
13466 for (size_t channels = 1; channels <= 10; channels += 1) {
13467 DWConvMicrokernelTester()
13468 .cr(2)
13469 .kr(9)
13470 .channels(2)
13471 .width(5)
13472 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080013473 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013474 }
13475}
13476
13477TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
13478 for (size_t channels = 1; channels <= 10; channels += 1) {
13479 DWConvMicrokernelTester()
13480 .cr(2)
13481 .kr(9)
13482 .channels(channels)
13483 .width(3)
13484 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013485 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013486 }
13487}
13488
13489TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
13490 for (size_t channels = 1; channels <= 10; channels += 1) {
13491 DWConvMicrokernelTester()
13492 .cr(2)
13493 .kr(9)
13494 .channels(channels)
13495 .width(3)
13496 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013497 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013498 }
13499}
13500
13501TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
13502 for (uint32_t channels = 4; channels < 32; channels += 6) {
13503 DWConvMicrokernelTester()
13504 .cr(2)
13505 .kr(9)
13506 .channels(channels)
13507 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080013508 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013509 }
13510}
13511
13512TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
13513 for (uint32_t mz = 0; mz < 9; mz++) {
13514 for (uint32_t channels = 4; channels < 32; channels += 6) {
13515 DWConvMicrokernelTester()
13516 .cr(2)
13517 .kr(9)
13518 .channels(channels)
13519 .input_offset(80)
13520 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013521 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013522 }
13523 }
13524}
13525
13526TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
13527 DWConvMicrokernelTester()
13528 .cr(4)
13529 .kr(9)
13530 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080013531 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013532}
13533
13534TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
13535 for (uint32_t channels = 8; channels < 64; channels += 12) {
13536 DWConvMicrokernelTester()
13537 .cr(4)
13538 .kr(9)
13539 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013540 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013541 }
13542}
13543
13544TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
13545 for (uint32_t channels = 8; channels < 64; channels += 12) {
13546 DWConvMicrokernelTester()
13547 .cr(4)
13548 .kr(9)
13549 .channels(channels)
13550 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013551 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013552 }
13553}
13554
13555TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
13556 for (uint32_t channels = 8; channels < 64; channels += 12) {
13557 DWConvMicrokernelTester()
13558 .cr(4)
13559 .kr(9)
13560 .channels(channels)
13561 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013562 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013563 }
13564}
13565
13566TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
13567 for (uint32_t channels = 1; channels < 4; channels++) {
13568 DWConvMicrokernelTester()
13569 .cr(4)
13570 .kr(9)
13571 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013573 }
13574}
13575
13576TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
13577 for (uint32_t channels = 5; channels < 8; channels++) {
13578 DWConvMicrokernelTester()
13579 .cr(4)
13580 .kr(9)
13581 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013582 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013583 }
13584}
13585
13586TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
13587 for (uint32_t channels = 5; channels < 8; channels++) {
13588 DWConvMicrokernelTester()
13589 .cr(4)
13590 .kr(9)
13591 .channels(channels)
13592 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013593 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013594 }
13595}
13596
13597TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
13598 for (uint32_t channels = 5; channels < 8; channels++) {
13599 DWConvMicrokernelTester()
13600 .cr(4)
13601 .kr(9)
13602 .channels(channels)
13603 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013604 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013605 }
13606}
13607
13608TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
13609 for (size_t channels = 1; channels <= 20; channels += 3) {
13610 DWConvMicrokernelTester()
13611 .cr(4)
13612 .kr(9)
13613 .channels(channels)
13614 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013615 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013616 }
13617}
13618
13619TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
13620 for (size_t channels = 1; channels <= 20; channels += 3) {
13621 for (size_t step = 2; step <= 9; step++) {
13622 DWConvMicrokernelTester()
13623 .cr(4)
13624 .kr(9)
13625 .channels(channels)
13626 .width(3)
13627 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013628 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013629 }
13630 }
13631}
13632
13633TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
13634 for (size_t channels = 1; channels <= 20; channels += 3) {
13635 DWConvMicrokernelTester()
13636 .cr(4)
13637 .kr(9)
13638 .channels(4)
13639 .width(5)
13640 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080013641 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013642 }
13643}
13644
13645TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
13646 for (size_t channels = 1; channels <= 20; channels += 3) {
13647 DWConvMicrokernelTester()
13648 .cr(4)
13649 .kr(9)
13650 .channels(channels)
13651 .width(3)
13652 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013653 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013654 }
13655}
13656
13657TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
13658 for (size_t channels = 1; channels <= 20; channels += 3) {
13659 DWConvMicrokernelTester()
13660 .cr(4)
13661 .kr(9)
13662 .channels(channels)
13663 .width(3)
13664 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013665 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013666 }
13667}
13668
13669TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
13670 for (uint32_t channels = 8; channels < 64; channels += 12) {
13671 DWConvMicrokernelTester()
13672 .cr(4)
13673 .kr(9)
13674 .channels(channels)
13675 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080013676 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013677 }
13678}
13679
13680TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
13681 for (uint32_t mz = 0; mz < 9; mz++) {
13682 for (uint32_t channels = 8; channels < 64; channels += 12) {
13683 DWConvMicrokernelTester()
13684 .cr(4)
13685 .kr(9)
13686 .channels(channels)
13687 .input_offset(112)
13688 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013689 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080013690 }
13691 }
13692}
13693
Marat Dukhan59af5812021-06-29 18:09:57 -070013694#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013695 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_eq_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013696 TEST_REQUIRES_ARM_NEON;
13697 DWConvMicrokernelTester()
13698 .cr(8)
13699 .kr(25)
13700 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013701 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013702 }
13703
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013704 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_div_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013705 TEST_REQUIRES_ARM_NEON;
13706 for (uint32_t channels = 16; channels < 128; channels += 24) {
13707 DWConvMicrokernelTester()
13708 .cr(8)
13709 .kr(25)
13710 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013711 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013712 }
13713 }
13714
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013715 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013716 TEST_REQUIRES_ARM_NEON;
13717 for (uint32_t channels = 16; channels < 128; channels += 24) {
13718 DWConvMicrokernelTester()
13719 .cr(8)
13720 .kr(25)
13721 .channels(channels)
13722 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013723 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013724 }
13725 }
13726
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013727 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013728 TEST_REQUIRES_ARM_NEON;
13729 for (uint32_t channels = 16; channels < 128; channels += 24) {
13730 DWConvMicrokernelTester()
13731 .cr(8)
13732 .kr(25)
13733 .channels(channels)
13734 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013735 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013736 }
13737 }
13738
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013739 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_lt_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013740 TEST_REQUIRES_ARM_NEON;
13741 for (uint32_t channels = 1; channels < 8; channels++) {
13742 DWConvMicrokernelTester()
13743 .cr(8)
13744 .kr(25)
13745 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013746 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013747 }
13748 }
13749
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013750 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_gt_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013751 TEST_REQUIRES_ARM_NEON;
13752 for (uint32_t channels = 9; channels < 16; channels++) {
13753 DWConvMicrokernelTester()
13754 .cr(8)
13755 .kr(25)
13756 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013757 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013758 }
13759 }
13760
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013761 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013762 TEST_REQUIRES_ARM_NEON;
13763 for (uint32_t channels = 9; channels < 16; channels++) {
13764 DWConvMicrokernelTester()
13765 .cr(8)
13766 .kr(25)
13767 .channels(channels)
13768 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013769 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013770 }
13771 }
13772
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013773 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013774 TEST_REQUIRES_ARM_NEON;
13775 for (uint32_t channels = 9; channels < 16; channels++) {
13776 DWConvMicrokernelTester()
13777 .cr(8)
13778 .kr(25)
13779 .channels(channels)
13780 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013781 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013782 }
13783 }
13784
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013785 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013786 TEST_REQUIRES_ARM_NEON;
13787 for (size_t channels = 1; channels <= 40; channels += 7) {
13788 DWConvMicrokernelTester()
13789 .cr(8)
13790 .kr(25)
13791 .channels(channels)
13792 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013793 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013794 }
13795 }
13796
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013797 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_step) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013798 TEST_REQUIRES_ARM_NEON;
13799 for (size_t channels = 1; channels <= 40; channels += 7) {
13800 for (size_t step = 2; step <= 25; step++) {
13801 DWConvMicrokernelTester()
13802 .cr(8)
13803 .kr(25)
13804 .channels(channels)
13805 .width(3)
13806 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013807 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013808 }
13809 }
13810 }
13811
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013812 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_output_stride) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013813 TEST_REQUIRES_ARM_NEON;
13814 for (size_t channels = 1; channels <= 40; channels += 7) {
13815 DWConvMicrokernelTester()
13816 .cr(8)
13817 .kr(25)
13818 .channels(8)
13819 .width(5)
13820 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013821 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013822 }
13823 }
13824
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013825 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013826 TEST_REQUIRES_ARM_NEON;
13827 for (size_t channels = 1; channels <= 40; channels += 7) {
13828 DWConvMicrokernelTester()
13829 .cr(8)
13830 .kr(25)
13831 .channels(channels)
13832 .width(3)
13833 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013834 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013835 }
13836 }
13837
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013838 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013839 TEST_REQUIRES_ARM_NEON;
13840 for (size_t channels = 1; channels <= 40; channels += 7) {
13841 DWConvMicrokernelTester()
13842 .cr(8)
13843 .kr(25)
13844 .channels(channels)
13845 .width(3)
13846 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013847 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013848 }
13849 }
13850
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013851 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, input_offset) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013852 TEST_REQUIRES_ARM_NEON;
13853 for (uint32_t channels = 16; channels < 128; channels += 24) {
13854 DWConvMicrokernelTester()
13855 .cr(8)
13856 .kr(25)
13857 .channels(channels)
13858 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080013859 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013860 }
13861 }
13862
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013863 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, zero) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013864 TEST_REQUIRES_ARM_NEON;
13865 for (uint32_t mz = 0; mz < 25; mz++) {
13866 for (uint32_t channels = 16; channels < 128; channels += 24) {
13867 DWConvMicrokernelTester()
13868 .cr(8)
13869 .kr(25)
13870 .channels(channels)
13871 .input_offset(176)
13872 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013873 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013874 }
13875 }
13876 }
13877#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13878
13879
13880#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013881 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_eq_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013882 TEST_REQUIRES_ARM_NEON;
13883 DWConvMicrokernelTester()
13884 .cr(16)
13885 .kr(25)
13886 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080013887 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013888 }
13889
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013890 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_div_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013891 TEST_REQUIRES_ARM_NEON;
13892 for (uint32_t channels = 32; channels < 256; channels += 48) {
13893 DWConvMicrokernelTester()
13894 .cr(16)
13895 .kr(25)
13896 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013897 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013898 }
13899 }
13900
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013901 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013902 TEST_REQUIRES_ARM_NEON;
13903 for (uint32_t channels = 32; channels < 256; channels += 48) {
13904 DWConvMicrokernelTester()
13905 .cr(16)
13906 .kr(25)
13907 .channels(channels)
13908 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013909 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013910 }
13911 }
13912
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013913 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013914 TEST_REQUIRES_ARM_NEON;
13915 for (uint32_t channels = 32; channels < 256; channels += 48) {
13916 DWConvMicrokernelTester()
13917 .cr(16)
13918 .kr(25)
13919 .channels(channels)
13920 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013921 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013922 }
13923 }
13924
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013925 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_lt_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013926 TEST_REQUIRES_ARM_NEON;
13927 for (uint32_t channels = 1; channels < 16; channels++) {
13928 DWConvMicrokernelTester()
13929 .cr(16)
13930 .kr(25)
13931 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013932 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013933 }
13934 }
13935
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013936 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_gt_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013937 TEST_REQUIRES_ARM_NEON;
13938 for (uint32_t channels = 17; channels < 32; channels++) {
13939 DWConvMicrokernelTester()
13940 .cr(16)
13941 .kr(25)
13942 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080013943 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013944 }
13945 }
13946
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013947 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013948 TEST_REQUIRES_ARM_NEON;
13949 for (uint32_t channels = 17; channels < 32; channels++) {
13950 DWConvMicrokernelTester()
13951 .cr(16)
13952 .kr(25)
13953 .channels(channels)
13954 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013955 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013956 }
13957 }
13958
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013959 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013960 TEST_REQUIRES_ARM_NEON;
13961 for (uint32_t channels = 17; channels < 32; channels++) {
13962 DWConvMicrokernelTester()
13963 .cr(16)
13964 .kr(25)
13965 .channels(channels)
13966 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013967 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013968 }
13969 }
13970
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013971 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013972 TEST_REQUIRES_ARM_NEON;
13973 for (size_t channels = 1; channels <= 80; channels += 15) {
13974 DWConvMicrokernelTester()
13975 .cr(16)
13976 .kr(25)
13977 .channels(channels)
13978 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013979 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013980 }
13981 }
13982
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013983 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_step) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013984 TEST_REQUIRES_ARM_NEON;
13985 for (size_t channels = 1; channels <= 80; channels += 15) {
13986 for (size_t step = 2; step <= 25; step++) {
13987 DWConvMicrokernelTester()
13988 .cr(16)
13989 .kr(25)
13990 .channels(channels)
13991 .width(3)
13992 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080013993 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013994 }
13995 }
13996 }
13997
Marat Dukhan5f2939f2021-07-23 13:38:32 -070013998 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_output_stride) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070013999 TEST_REQUIRES_ARM_NEON;
14000 for (size_t channels = 1; channels <= 80; channels += 15) {
14001 DWConvMicrokernelTester()
14002 .cr(16)
14003 .kr(25)
14004 .channels(16)
14005 .width(5)
14006 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014007 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014008 }
14009 }
14010
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014011 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014012 TEST_REQUIRES_ARM_NEON;
14013 for (size_t channels = 1; channels <= 80; channels += 15) {
14014 DWConvMicrokernelTester()
14015 .cr(16)
14016 .kr(25)
14017 .channels(channels)
14018 .width(3)
14019 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014020 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014021 }
14022 }
14023
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014024 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014025 TEST_REQUIRES_ARM_NEON;
14026 for (size_t channels = 1; channels <= 80; channels += 15) {
14027 DWConvMicrokernelTester()
14028 .cr(16)
14029 .kr(25)
14030 .channels(channels)
14031 .width(3)
14032 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014033 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014034 }
14035 }
14036
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014037 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, input_offset) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014038 TEST_REQUIRES_ARM_NEON;
14039 for (uint32_t channels = 32; channels < 256; channels += 48) {
14040 DWConvMicrokernelTester()
14041 .cr(16)
14042 .kr(25)
14043 .channels(channels)
14044 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080014045 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014046 }
14047 }
14048
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014049 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, zero) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014050 TEST_REQUIRES_ARM_NEON;
14051 for (uint32_t mz = 0; mz < 25; mz++) {
14052 for (uint32_t channels = 32; channels < 256; channels += 48) {
14053 DWConvMicrokernelTester()
14054 .cr(16)
14055 .kr(25)
14056 .channels(channels)
14057 .input_offset(304)
14058 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014059 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014060 }
14061 }
14062 }
14063#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14064
14065
14066#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014067 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_eq_16) {
14068 TEST_REQUIRES_ARM_NEON;
14069 DWConvMicrokernelTester()
14070 .cr(16)
14071 .kr(25)
14072 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080014073 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014074 }
14075
14076 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_div_16) {
14077 TEST_REQUIRES_ARM_NEON;
14078 for (uint32_t channels = 32; channels < 256; channels += 48) {
14079 DWConvMicrokernelTester()
14080 .cr(16)
14081 .kr(25)
14082 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014083 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014084 }
14085 }
14086
14087 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmin) {
14088 TEST_REQUIRES_ARM_NEON;
14089 for (uint32_t channels = 32; channels < 256; channels += 48) {
14090 DWConvMicrokernelTester()
14091 .cr(16)
14092 .kr(25)
14093 .channels(channels)
14094 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014095 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014096 }
14097 }
14098
14099 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmax) {
14100 TEST_REQUIRES_ARM_NEON;
14101 for (uint32_t channels = 32; channels < 256; channels += 48) {
14102 DWConvMicrokernelTester()
14103 .cr(16)
14104 .kr(25)
14105 .channels(channels)
14106 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014107 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014108 }
14109 }
14110
14111 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_lt_16) {
14112 TEST_REQUIRES_ARM_NEON;
14113 for (uint32_t channels = 1; channels < 16; channels++) {
14114 DWConvMicrokernelTester()
14115 .cr(16)
14116 .kr(25)
14117 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014118 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014119 }
14120 }
14121
14122 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_gt_16) {
14123 TEST_REQUIRES_ARM_NEON;
14124 for (uint32_t channels = 17; channels < 32; channels++) {
14125 DWConvMicrokernelTester()
14126 .cr(16)
14127 .kr(25)
14128 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014129 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014130 }
14131 }
14132
14133 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmin) {
14134 TEST_REQUIRES_ARM_NEON;
14135 for (uint32_t channels = 17; channels < 32; channels++) {
14136 DWConvMicrokernelTester()
14137 .cr(16)
14138 .kr(25)
14139 .channels(channels)
14140 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014141 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014142 }
14143 }
14144
14145 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmax) {
14146 TEST_REQUIRES_ARM_NEON;
14147 for (uint32_t channels = 17; channels < 32; channels++) {
14148 DWConvMicrokernelTester()
14149 .cr(16)
14150 .kr(25)
14151 .channels(channels)
14152 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014153 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014154 }
14155 }
14156
14157 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel) {
14158 TEST_REQUIRES_ARM_NEON;
14159 for (size_t channels = 1; channels <= 80; channels += 15) {
14160 DWConvMicrokernelTester()
14161 .cr(16)
14162 .kr(25)
14163 .channels(channels)
14164 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014165 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014166 }
14167 }
14168
14169 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_step) {
14170 TEST_REQUIRES_ARM_NEON;
14171 for (size_t channels = 1; channels <= 80; channels += 15) {
14172 for (size_t step = 2; step <= 25; step++) {
14173 DWConvMicrokernelTester()
14174 .cr(16)
14175 .kr(25)
14176 .channels(channels)
14177 .width(3)
14178 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014179 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014180 }
14181 }
14182 }
14183
14184 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_output_stride) {
14185 TEST_REQUIRES_ARM_NEON;
14186 for (size_t channels = 1; channels <= 80; channels += 15) {
14187 DWConvMicrokernelTester()
14188 .cr(16)
14189 .kr(25)
14190 .channels(16)
14191 .width(5)
14192 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014193 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014194 }
14195 }
14196
14197 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_qmin) {
14198 TEST_REQUIRES_ARM_NEON;
14199 for (size_t channels = 1; channels <= 80; channels += 15) {
14200 DWConvMicrokernelTester()
14201 .cr(16)
14202 .kr(25)
14203 .channels(channels)
14204 .width(3)
14205 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014206 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014207 }
14208 }
14209
14210 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_qmax) {
14211 TEST_REQUIRES_ARM_NEON;
14212 for (size_t channels = 1; channels <= 80; channels += 15) {
14213 DWConvMicrokernelTester()
14214 .cr(16)
14215 .kr(25)
14216 .channels(channels)
14217 .width(3)
14218 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014219 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014220 }
14221 }
14222
14223 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, input_offset) {
14224 TEST_REQUIRES_ARM_NEON;
14225 for (uint32_t channels = 32; channels < 256; channels += 48) {
14226 DWConvMicrokernelTester()
14227 .cr(16)
14228 .kr(25)
14229 .channels(channels)
14230 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080014231 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014232 }
14233 }
14234
14235 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, zero) {
14236 TEST_REQUIRES_ARM_NEON;
14237 for (uint32_t mz = 0; mz < 25; mz++) {
14238 for (uint32_t channels = 32; channels < 256; channels += 48) {
14239 DWConvMicrokernelTester()
14240 .cr(16)
14241 .kr(25)
14242 .channels(channels)
14243 .input_offset(304)
14244 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014245 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014246 }
14247 }
14248 }
14249#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14250
14251
14252#if XNN_ARCH_ARM || XNN_ARCH_ARM64
14253 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_eq_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014254 TEST_REQUIRES_ARM_NEON_V8;
14255 DWConvMicrokernelTester()
14256 .cr(8)
14257 .kr(25)
14258 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014259 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014260 }
14261
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014262 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_div_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014263 TEST_REQUIRES_ARM_NEON_V8;
14264 for (uint32_t channels = 16; channels < 128; channels += 24) {
14265 DWConvMicrokernelTester()
14266 .cr(8)
14267 .kr(25)
14268 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014269 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014270 }
14271 }
14272
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014273 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_div_8_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014274 TEST_REQUIRES_ARM_NEON_V8;
14275 for (uint32_t channels = 16; channels < 128; channels += 24) {
14276 DWConvMicrokernelTester()
14277 .cr(8)
14278 .kr(25)
14279 .channels(channels)
14280 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014281 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014282 }
14283 }
14284
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014285 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_div_8_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014286 TEST_REQUIRES_ARM_NEON_V8;
14287 for (uint32_t channels = 16; channels < 128; channels += 24) {
14288 DWConvMicrokernelTester()
14289 .cr(8)
14290 .kr(25)
14291 .channels(channels)
14292 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014293 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014294 }
14295 }
14296
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014297 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_lt_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014298 TEST_REQUIRES_ARM_NEON_V8;
14299 for (uint32_t channels = 1; channels < 8; channels++) {
14300 DWConvMicrokernelTester()
14301 .cr(8)
14302 .kr(25)
14303 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014304 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014305 }
14306 }
14307
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014308 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_gt_8) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014309 TEST_REQUIRES_ARM_NEON_V8;
14310 for (uint32_t channels = 9; channels < 16; channels++) {
14311 DWConvMicrokernelTester()
14312 .cr(8)
14313 .kr(25)
14314 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014315 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014316 }
14317 }
14318
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014319 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_gt_8_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014320 TEST_REQUIRES_ARM_NEON_V8;
14321 for (uint32_t channels = 9; channels < 16; channels++) {
14322 DWConvMicrokernelTester()
14323 .cr(8)
14324 .kr(25)
14325 .channels(channels)
14326 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014327 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014328 }
14329 }
14330
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014331 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_gt_8_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014332 TEST_REQUIRES_ARM_NEON_V8;
14333 for (uint32_t channels = 9; channels < 16; channels++) {
14334 DWConvMicrokernelTester()
14335 .cr(8)
14336 .kr(25)
14337 .channels(channels)
14338 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014339 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014340 }
14341 }
14342
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014343 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014344 TEST_REQUIRES_ARM_NEON_V8;
14345 for (size_t channels = 1; channels <= 40; channels += 7) {
14346 DWConvMicrokernelTester()
14347 .cr(8)
14348 .kr(25)
14349 .channels(channels)
14350 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014351 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014352 }
14353 }
14354
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014355 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_step) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014356 TEST_REQUIRES_ARM_NEON_V8;
14357 for (size_t channels = 1; channels <= 40; channels += 7) {
14358 for (size_t step = 2; step <= 25; step++) {
14359 DWConvMicrokernelTester()
14360 .cr(8)
14361 .kr(25)
14362 .channels(channels)
14363 .width(3)
14364 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014365 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014366 }
14367 }
14368 }
14369
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014370 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014371 TEST_REQUIRES_ARM_NEON_V8;
14372 for (size_t channels = 1; channels <= 40; channels += 7) {
14373 DWConvMicrokernelTester()
14374 .cr(8)
14375 .kr(25)
14376 .channels(8)
14377 .width(5)
14378 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014379 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014380 }
14381 }
14382
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014383 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014384 TEST_REQUIRES_ARM_NEON_V8;
14385 for (size_t channels = 1; channels <= 40; channels += 7) {
14386 DWConvMicrokernelTester()
14387 .cr(8)
14388 .kr(25)
14389 .channels(channels)
14390 .width(3)
14391 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014392 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014393 }
14394 }
14395
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014396 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014397 TEST_REQUIRES_ARM_NEON_V8;
14398 for (size_t channels = 1; channels <= 40; channels += 7) {
14399 DWConvMicrokernelTester()
14400 .cr(8)
14401 .kr(25)
14402 .channels(channels)
14403 .width(3)
14404 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014405 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014406 }
14407 }
14408
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014409 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, input_offset) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014410 TEST_REQUIRES_ARM_NEON_V8;
14411 for (uint32_t channels = 16; channels < 128; channels += 24) {
14412 DWConvMicrokernelTester()
14413 .cr(8)
14414 .kr(25)
14415 .channels(channels)
14416 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080014417 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014418 }
14419 }
14420
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014421 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, zero) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014422 TEST_REQUIRES_ARM_NEON_V8;
14423 for (uint32_t mz = 0; mz < 25; mz++) {
14424 for (uint32_t channels = 16; channels < 128; channels += 24) {
14425 DWConvMicrokernelTester()
14426 .cr(8)
14427 .kr(25)
14428 .channels(channels)
14429 .input_offset(176)
14430 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014431 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014432 }
14433 }
14434 }
14435#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14436
14437
14438#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014439 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_eq_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014440 TEST_REQUIRES_ARM_NEON_V8;
14441 DWConvMicrokernelTester()
14442 .cr(16)
14443 .kr(25)
14444 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080014445 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014446 }
14447
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014448 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_div_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014449 TEST_REQUIRES_ARM_NEON_V8;
14450 for (uint32_t channels = 32; channels < 256; channels += 48) {
14451 DWConvMicrokernelTester()
14452 .cr(16)
14453 .kr(25)
14454 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014455 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014456 }
14457 }
14458
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014459 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_div_16_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014460 TEST_REQUIRES_ARM_NEON_V8;
14461 for (uint32_t channels = 32; channels < 256; channels += 48) {
14462 DWConvMicrokernelTester()
14463 .cr(16)
14464 .kr(25)
14465 .channels(channels)
14466 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014467 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014468 }
14469 }
14470
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014471 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_div_16_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014472 TEST_REQUIRES_ARM_NEON_V8;
14473 for (uint32_t channels = 32; channels < 256; channels += 48) {
14474 DWConvMicrokernelTester()
14475 .cr(16)
14476 .kr(25)
14477 .channels(channels)
14478 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014479 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014480 }
14481 }
14482
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014483 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_lt_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014484 TEST_REQUIRES_ARM_NEON_V8;
14485 for (uint32_t channels = 1; channels < 16; channels++) {
14486 DWConvMicrokernelTester()
14487 .cr(16)
14488 .kr(25)
14489 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014491 }
14492 }
14493
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014494 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_gt_16) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014495 TEST_REQUIRES_ARM_NEON_V8;
14496 for (uint32_t channels = 17; channels < 32; channels++) {
14497 DWConvMicrokernelTester()
14498 .cr(16)
14499 .kr(25)
14500 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014501 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014502 }
14503 }
14504
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014505 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_gt_16_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014506 TEST_REQUIRES_ARM_NEON_V8;
14507 for (uint32_t channels = 17; channels < 32; channels++) {
14508 DWConvMicrokernelTester()
14509 .cr(16)
14510 .kr(25)
14511 .channels(channels)
14512 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014513 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014514 }
14515 }
14516
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014517 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_gt_16_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014518 TEST_REQUIRES_ARM_NEON_V8;
14519 for (uint32_t channels = 17; channels < 32; channels++) {
14520 DWConvMicrokernelTester()
14521 .cr(16)
14522 .kr(25)
14523 .channels(channels)
14524 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014525 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014526 }
14527 }
14528
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014529 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014530 TEST_REQUIRES_ARM_NEON_V8;
14531 for (size_t channels = 1; channels <= 80; channels += 15) {
14532 DWConvMicrokernelTester()
14533 .cr(16)
14534 .kr(25)
14535 .channels(channels)
14536 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014537 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014538 }
14539 }
14540
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014541 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_step) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014542 TEST_REQUIRES_ARM_NEON_V8;
14543 for (size_t channels = 1; channels <= 80; channels += 15) {
14544 for (size_t step = 2; step <= 25; step++) {
14545 DWConvMicrokernelTester()
14546 .cr(16)
14547 .kr(25)
14548 .channels(channels)
14549 .width(3)
14550 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014551 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014552 }
14553 }
14554 }
14555
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014556 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014557 TEST_REQUIRES_ARM_NEON_V8;
14558 for (size_t channels = 1; channels <= 80; channels += 15) {
14559 DWConvMicrokernelTester()
14560 .cr(16)
14561 .kr(25)
14562 .channels(16)
14563 .width(5)
14564 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014565 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014566 }
14567 }
14568
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014569 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_qmin) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014570 TEST_REQUIRES_ARM_NEON_V8;
14571 for (size_t channels = 1; channels <= 80; channels += 15) {
14572 DWConvMicrokernelTester()
14573 .cr(16)
14574 .kr(25)
14575 .channels(channels)
14576 .width(3)
14577 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014578 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014579 }
14580 }
14581
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014582 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_qmax) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014583 TEST_REQUIRES_ARM_NEON_V8;
14584 for (size_t channels = 1; channels <= 80; channels += 15) {
14585 DWConvMicrokernelTester()
14586 .cr(16)
14587 .kr(25)
14588 .channels(channels)
14589 .width(3)
14590 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014591 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014592 }
14593 }
14594
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014595 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, input_offset) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014596 TEST_REQUIRES_ARM_NEON_V8;
14597 for (uint32_t channels = 32; channels < 256; channels += 48) {
14598 DWConvMicrokernelTester()
14599 .cr(16)
14600 .kr(25)
14601 .channels(channels)
14602 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080014603 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014604 }
14605 }
14606
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014607 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, zero) {
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014608 TEST_REQUIRES_ARM_NEON_V8;
14609 for (uint32_t mz = 0; mz < 25; mz++) {
14610 for (uint32_t channels = 32; channels < 256; channels += 48) {
14611 DWConvMicrokernelTester()
14612 .cr(16)
14613 .kr(25)
14614 .channels(channels)
14615 .input_offset(304)
14616 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014617 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4ba70b72021-07-19 11:20:16 -070014618 }
14619 }
14620 }
14621#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14622
14623
14624#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014625 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_eq_16) {
14626 TEST_REQUIRES_ARM_NEON_V8;
14627 DWConvMicrokernelTester()
14628 .cr(16)
14629 .kr(25)
14630 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080014631 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014632 }
14633
14634 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_div_16) {
14635 TEST_REQUIRES_ARM_NEON_V8;
14636 for (uint32_t channels = 32; channels < 256; channels += 48) {
14637 DWConvMicrokernelTester()
14638 .cr(16)
14639 .kr(25)
14640 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014641 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014642 }
14643 }
14644
14645 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_div_16_with_qmin) {
14646 TEST_REQUIRES_ARM_NEON_V8;
14647 for (uint32_t channels = 32; channels < 256; channels += 48) {
14648 DWConvMicrokernelTester()
14649 .cr(16)
14650 .kr(25)
14651 .channels(channels)
14652 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014653 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014654 }
14655 }
14656
14657 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_div_16_with_qmax) {
14658 TEST_REQUIRES_ARM_NEON_V8;
14659 for (uint32_t channels = 32; channels < 256; channels += 48) {
14660 DWConvMicrokernelTester()
14661 .cr(16)
14662 .kr(25)
14663 .channels(channels)
14664 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014665 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014666 }
14667 }
14668
14669 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_lt_16) {
14670 TEST_REQUIRES_ARM_NEON_V8;
14671 for (uint32_t channels = 1; channels < 16; channels++) {
14672 DWConvMicrokernelTester()
14673 .cr(16)
14674 .kr(25)
14675 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014676 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014677 }
14678 }
14679
14680 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_gt_16) {
14681 TEST_REQUIRES_ARM_NEON_V8;
14682 for (uint32_t channels = 17; channels < 32; channels++) {
14683 DWConvMicrokernelTester()
14684 .cr(16)
14685 .kr(25)
14686 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014687 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014688 }
14689 }
14690
14691 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_gt_16_with_qmin) {
14692 TEST_REQUIRES_ARM_NEON_V8;
14693 for (uint32_t channels = 17; channels < 32; channels++) {
14694 DWConvMicrokernelTester()
14695 .cr(16)
14696 .kr(25)
14697 .channels(channels)
14698 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014699 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014700 }
14701 }
14702
14703 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_gt_16_with_qmax) {
14704 TEST_REQUIRES_ARM_NEON_V8;
14705 for (uint32_t channels = 17; channels < 32; channels++) {
14706 DWConvMicrokernelTester()
14707 .cr(16)
14708 .kr(25)
14709 .channels(channels)
14710 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014711 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014712 }
14713 }
14714
14715 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel) {
14716 TEST_REQUIRES_ARM_NEON_V8;
14717 for (size_t channels = 1; channels <= 80; channels += 15) {
14718 DWConvMicrokernelTester()
14719 .cr(16)
14720 .kr(25)
14721 .channels(channels)
14722 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014723 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014724 }
14725 }
14726
14727 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_step) {
14728 TEST_REQUIRES_ARM_NEON_V8;
14729 for (size_t channels = 1; channels <= 80; channels += 15) {
14730 for (size_t step = 2; step <= 25; step++) {
14731 DWConvMicrokernelTester()
14732 .cr(16)
14733 .kr(25)
14734 .channels(channels)
14735 .width(3)
14736 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014737 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014738 }
14739 }
14740 }
14741
14742 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_output_stride) {
14743 TEST_REQUIRES_ARM_NEON_V8;
14744 for (size_t channels = 1; channels <= 80; channels += 15) {
14745 DWConvMicrokernelTester()
14746 .cr(16)
14747 .kr(25)
14748 .channels(16)
14749 .width(5)
14750 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014751 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014752 }
14753 }
14754
14755 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_qmin) {
14756 TEST_REQUIRES_ARM_NEON_V8;
14757 for (size_t channels = 1; channels <= 80; channels += 15) {
14758 DWConvMicrokernelTester()
14759 .cr(16)
14760 .kr(25)
14761 .channels(channels)
14762 .width(3)
14763 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014764 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014765 }
14766 }
14767
14768 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_qmax) {
14769 TEST_REQUIRES_ARM_NEON_V8;
14770 for (size_t channels = 1; channels <= 80; channels += 15) {
14771 DWConvMicrokernelTester()
14772 .cr(16)
14773 .kr(25)
14774 .channels(channels)
14775 .width(3)
14776 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014777 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014778 }
14779 }
14780
14781 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, input_offset) {
14782 TEST_REQUIRES_ARM_NEON_V8;
14783 for (uint32_t channels = 32; channels < 256; channels += 48) {
14784 DWConvMicrokernelTester()
14785 .cr(16)
14786 .kr(25)
14787 .channels(channels)
14788 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080014789 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014790 }
14791 }
14792
14793 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, zero) {
14794 TEST_REQUIRES_ARM_NEON_V8;
14795 for (uint32_t mz = 0; mz < 25; mz++) {
14796 for (uint32_t channels = 32; channels < 256; channels += 48) {
14797 DWConvMicrokernelTester()
14798 .cr(16)
14799 .kr(25)
14800 .channels(channels)
14801 .input_offset(304)
14802 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014803 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014804 }
14805 }
14806 }
14807#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14808
14809
14810#if XNN_ARCH_ARM || XNN_ARCH_ARM64
14811 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_eq_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014812 TEST_REQUIRES_ARM_NEON;
14813 DWConvMicrokernelTester()
14814 .cr(8)
14815 .kr(25)
14816 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014817 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014818 }
14819
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014820 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_div_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014821 TEST_REQUIRES_ARM_NEON;
14822 for (uint32_t channels = 16; channels < 128; channels += 24) {
14823 DWConvMicrokernelTester()
14824 .cr(8)
14825 .kr(25)
14826 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014827 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014828 }
14829 }
14830
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014831 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014832 TEST_REQUIRES_ARM_NEON;
14833 for (uint32_t channels = 16; channels < 128; channels += 24) {
14834 DWConvMicrokernelTester()
14835 .cr(8)
14836 .kr(25)
14837 .channels(channels)
14838 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014839 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014840 }
14841 }
14842
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014843 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014844 TEST_REQUIRES_ARM_NEON;
14845 for (uint32_t channels = 16; channels < 128; channels += 24) {
14846 DWConvMicrokernelTester()
14847 .cr(8)
14848 .kr(25)
14849 .channels(channels)
14850 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014851 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014852 }
14853 }
14854
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014855 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_lt_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014856 TEST_REQUIRES_ARM_NEON;
14857 for (uint32_t channels = 1; channels < 8; channels++) {
14858 DWConvMicrokernelTester()
14859 .cr(8)
14860 .kr(25)
14861 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014862 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014863 }
14864 }
14865
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014866 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_gt_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014867 TEST_REQUIRES_ARM_NEON;
14868 for (uint32_t channels = 9; channels < 16; channels++) {
14869 DWConvMicrokernelTester()
14870 .cr(8)
14871 .kr(25)
14872 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080014873 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014874 }
14875 }
14876
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014877 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014878 TEST_REQUIRES_ARM_NEON;
14879 for (uint32_t channels = 9; channels < 16; channels++) {
14880 DWConvMicrokernelTester()
14881 .cr(8)
14882 .kr(25)
14883 .channels(channels)
14884 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014885 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014886 }
14887 }
14888
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014889 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014890 TEST_REQUIRES_ARM_NEON;
14891 for (uint32_t channels = 9; channels < 16; channels++) {
14892 DWConvMicrokernelTester()
14893 .cr(8)
14894 .kr(25)
14895 .channels(channels)
14896 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014897 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014898 }
14899 }
14900
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014901 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014902 TEST_REQUIRES_ARM_NEON;
14903 for (size_t channels = 1; channels <= 40; channels += 7) {
14904 DWConvMicrokernelTester()
14905 .cr(8)
14906 .kr(25)
14907 .channels(channels)
14908 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014909 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014910 }
14911 }
14912
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014913 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_step) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014914 TEST_REQUIRES_ARM_NEON;
14915 for (size_t channels = 1; channels <= 40; channels += 7) {
14916 for (size_t step = 2; step <= 25; step++) {
14917 DWConvMicrokernelTester()
14918 .cr(8)
14919 .kr(25)
14920 .channels(channels)
14921 .width(3)
14922 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080014923 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014924 }
14925 }
14926 }
14927
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014928 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_output_stride) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014929 TEST_REQUIRES_ARM_NEON;
14930 for (size_t channels = 1; channels <= 40; channels += 7) {
14931 DWConvMicrokernelTester()
14932 .cr(8)
14933 .kr(25)
14934 .channels(8)
14935 .width(5)
14936 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014937 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014938 }
14939 }
14940
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014941 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014942 TEST_REQUIRES_ARM_NEON;
14943 for (size_t channels = 1; channels <= 40; channels += 7) {
14944 DWConvMicrokernelTester()
14945 .cr(8)
14946 .kr(25)
14947 .channels(channels)
14948 .width(3)
14949 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014950 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014951 }
14952 }
14953
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014954 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014955 TEST_REQUIRES_ARM_NEON;
14956 for (size_t channels = 1; channels <= 40; channels += 7) {
14957 DWConvMicrokernelTester()
14958 .cr(8)
14959 .kr(25)
14960 .channels(channels)
14961 .width(3)
14962 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014963 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014964 }
14965 }
14966
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014967 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, input_offset) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014968 TEST_REQUIRES_ARM_NEON;
14969 for (uint32_t channels = 16; channels < 128; channels += 24) {
14970 DWConvMicrokernelTester()
14971 .cr(8)
14972 .kr(25)
14973 .channels(channels)
14974 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080014975 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014976 }
14977 }
14978
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014979 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, zero) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014980 TEST_REQUIRES_ARM_NEON;
14981 for (uint32_t mz = 0; mz < 25; mz++) {
14982 for (uint32_t channels = 16; channels < 128; channels += 24) {
14983 DWConvMicrokernelTester()
14984 .cr(8)
14985 .kr(25)
14986 .channels(channels)
14987 .input_offset(176)
14988 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014989 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014990 }
14991 }
14992 }
14993#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14994
14995
14996#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -070014997 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_eq_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070014998 TEST_REQUIRES_ARM_NEON;
14999 DWConvMicrokernelTester()
15000 .cr(16)
15001 .kr(25)
15002 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015003 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015004 }
15005
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015006 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_div_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015007 TEST_REQUIRES_ARM_NEON;
15008 for (uint32_t channels = 32; channels < 256; channels += 48) {
15009 DWConvMicrokernelTester()
15010 .cr(16)
15011 .kr(25)
15012 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015013 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015014 }
15015 }
15016
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015017 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015018 TEST_REQUIRES_ARM_NEON;
15019 for (uint32_t channels = 32; channels < 256; channels += 48) {
15020 DWConvMicrokernelTester()
15021 .cr(16)
15022 .kr(25)
15023 .channels(channels)
15024 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015025 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015026 }
15027 }
15028
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015029 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015030 TEST_REQUIRES_ARM_NEON;
15031 for (uint32_t channels = 32; channels < 256; channels += 48) {
15032 DWConvMicrokernelTester()
15033 .cr(16)
15034 .kr(25)
15035 .channels(channels)
15036 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015037 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015038 }
15039 }
15040
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015041 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_lt_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015042 TEST_REQUIRES_ARM_NEON;
15043 for (uint32_t channels = 1; channels < 16; channels++) {
15044 DWConvMicrokernelTester()
15045 .cr(16)
15046 .kr(25)
15047 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015048 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015049 }
15050 }
15051
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015052 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_gt_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015053 TEST_REQUIRES_ARM_NEON;
15054 for (uint32_t channels = 17; channels < 32; channels++) {
15055 DWConvMicrokernelTester()
15056 .cr(16)
15057 .kr(25)
15058 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015059 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015060 }
15061 }
15062
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015063 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015064 TEST_REQUIRES_ARM_NEON;
15065 for (uint32_t channels = 17; channels < 32; channels++) {
15066 DWConvMicrokernelTester()
15067 .cr(16)
15068 .kr(25)
15069 .channels(channels)
15070 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015071 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015072 }
15073 }
15074
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015075 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015076 TEST_REQUIRES_ARM_NEON;
15077 for (uint32_t channels = 17; channels < 32; channels++) {
15078 DWConvMicrokernelTester()
15079 .cr(16)
15080 .kr(25)
15081 .channels(channels)
15082 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015083 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015084 }
15085 }
15086
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015087 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015088 TEST_REQUIRES_ARM_NEON;
15089 for (size_t channels = 1; channels <= 80; channels += 15) {
15090 DWConvMicrokernelTester()
15091 .cr(16)
15092 .kr(25)
15093 .channels(channels)
15094 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015095 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015096 }
15097 }
15098
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015099 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_step) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015100 TEST_REQUIRES_ARM_NEON;
15101 for (size_t channels = 1; channels <= 80; channels += 15) {
15102 for (size_t step = 2; step <= 25; step++) {
15103 DWConvMicrokernelTester()
15104 .cr(16)
15105 .kr(25)
15106 .channels(channels)
15107 .width(3)
15108 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015109 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015110 }
15111 }
15112 }
15113
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015114 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_output_stride) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015115 TEST_REQUIRES_ARM_NEON;
15116 for (size_t channels = 1; channels <= 80; channels += 15) {
15117 DWConvMicrokernelTester()
15118 .cr(16)
15119 .kr(25)
15120 .channels(16)
15121 .width(5)
15122 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015123 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015124 }
15125 }
15126
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015127 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015128 TEST_REQUIRES_ARM_NEON;
15129 for (size_t channels = 1; channels <= 80; channels += 15) {
15130 DWConvMicrokernelTester()
15131 .cr(16)
15132 .kr(25)
15133 .channels(channels)
15134 .width(3)
15135 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015136 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015137 }
15138 }
15139
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015140 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015141 TEST_REQUIRES_ARM_NEON;
15142 for (size_t channels = 1; channels <= 80; channels += 15) {
15143 DWConvMicrokernelTester()
15144 .cr(16)
15145 .kr(25)
15146 .channels(channels)
15147 .width(3)
15148 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015149 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015150 }
15151 }
15152
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015153 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, input_offset) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015154 TEST_REQUIRES_ARM_NEON;
15155 for (uint32_t channels = 32; channels < 256; channels += 48) {
15156 DWConvMicrokernelTester()
15157 .cr(16)
15158 .kr(25)
15159 .channels(channels)
15160 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080015161 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015162 }
15163 }
15164
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015165 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, zero) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015166 TEST_REQUIRES_ARM_NEON;
15167 for (uint32_t mz = 0; mz < 25; mz++) {
15168 for (uint32_t channels = 32; channels < 256; channels += 48) {
15169 DWConvMicrokernelTester()
15170 .cr(16)
15171 .kr(25)
15172 .channels(channels)
15173 .input_offset(304)
15174 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015175 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015176 }
15177 }
15178 }
15179#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15180
15181
15182#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015183 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_eq_16) {
15184 TEST_REQUIRES_ARM_NEON;
15185 DWConvMicrokernelTester()
15186 .cr(16)
15187 .kr(25)
15188 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015189 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015190 }
15191
15192 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_div_16) {
15193 TEST_REQUIRES_ARM_NEON;
15194 for (uint32_t channels = 32; channels < 256; channels += 48) {
15195 DWConvMicrokernelTester()
15196 .cr(16)
15197 .kr(25)
15198 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015199 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015200 }
15201 }
15202
15203 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmin) {
15204 TEST_REQUIRES_ARM_NEON;
15205 for (uint32_t channels = 32; channels < 256; channels += 48) {
15206 DWConvMicrokernelTester()
15207 .cr(16)
15208 .kr(25)
15209 .channels(channels)
15210 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015211 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015212 }
15213 }
15214
15215 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmax) {
15216 TEST_REQUIRES_ARM_NEON;
15217 for (uint32_t channels = 32; channels < 256; channels += 48) {
15218 DWConvMicrokernelTester()
15219 .cr(16)
15220 .kr(25)
15221 .channels(channels)
15222 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015223 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015224 }
15225 }
15226
15227 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_lt_16) {
15228 TEST_REQUIRES_ARM_NEON;
15229 for (uint32_t channels = 1; channels < 16; channels++) {
15230 DWConvMicrokernelTester()
15231 .cr(16)
15232 .kr(25)
15233 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015234 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015235 }
15236 }
15237
15238 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_gt_16) {
15239 TEST_REQUIRES_ARM_NEON;
15240 for (uint32_t channels = 17; channels < 32; channels++) {
15241 DWConvMicrokernelTester()
15242 .cr(16)
15243 .kr(25)
15244 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015245 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015246 }
15247 }
15248
15249 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmin) {
15250 TEST_REQUIRES_ARM_NEON;
15251 for (uint32_t channels = 17; channels < 32; channels++) {
15252 DWConvMicrokernelTester()
15253 .cr(16)
15254 .kr(25)
15255 .channels(channels)
15256 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015257 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015258 }
15259 }
15260
15261 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmax) {
15262 TEST_REQUIRES_ARM_NEON;
15263 for (uint32_t channels = 17; channels < 32; channels++) {
15264 DWConvMicrokernelTester()
15265 .cr(16)
15266 .kr(25)
15267 .channels(channels)
15268 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015269 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015270 }
15271 }
15272
15273 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel) {
15274 TEST_REQUIRES_ARM_NEON;
15275 for (size_t channels = 1; channels <= 80; channels += 15) {
15276 DWConvMicrokernelTester()
15277 .cr(16)
15278 .kr(25)
15279 .channels(channels)
15280 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015281 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015282 }
15283 }
15284
15285 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_step) {
15286 TEST_REQUIRES_ARM_NEON;
15287 for (size_t channels = 1; channels <= 80; channels += 15) {
15288 for (size_t step = 2; step <= 25; step++) {
15289 DWConvMicrokernelTester()
15290 .cr(16)
15291 .kr(25)
15292 .channels(channels)
15293 .width(3)
15294 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015295 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015296 }
15297 }
15298 }
15299
15300 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_output_stride) {
15301 TEST_REQUIRES_ARM_NEON;
15302 for (size_t channels = 1; channels <= 80; channels += 15) {
15303 DWConvMicrokernelTester()
15304 .cr(16)
15305 .kr(25)
15306 .channels(16)
15307 .width(5)
15308 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015309 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015310 }
15311 }
15312
15313 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_qmin) {
15314 TEST_REQUIRES_ARM_NEON;
15315 for (size_t channels = 1; channels <= 80; channels += 15) {
15316 DWConvMicrokernelTester()
15317 .cr(16)
15318 .kr(25)
15319 .channels(channels)
15320 .width(3)
15321 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015322 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015323 }
15324 }
15325
15326 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_qmax) {
15327 TEST_REQUIRES_ARM_NEON;
15328 for (size_t channels = 1; channels <= 80; channels += 15) {
15329 DWConvMicrokernelTester()
15330 .cr(16)
15331 .kr(25)
15332 .channels(channels)
15333 .width(3)
15334 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015335 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015336 }
15337 }
15338
15339 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, input_offset) {
15340 TEST_REQUIRES_ARM_NEON;
15341 for (uint32_t channels = 32; channels < 256; channels += 48) {
15342 DWConvMicrokernelTester()
15343 .cr(16)
15344 .kr(25)
15345 .channels(channels)
15346 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080015347 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015348 }
15349 }
15350
15351 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, zero) {
15352 TEST_REQUIRES_ARM_NEON;
15353 for (uint32_t mz = 0; mz < 25; mz++) {
15354 for (uint32_t channels = 32; channels < 256; channels += 48) {
15355 DWConvMicrokernelTester()
15356 .cr(16)
15357 .kr(25)
15358 .channels(channels)
15359 .input_offset(304)
15360 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015361 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015362 }
15363 }
15364 }
15365#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15366
15367
15368#if XNN_ARCH_ARM || XNN_ARCH_ARM64
15369 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_eq_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015370 TEST_REQUIRES_ARM_NEON_V8;
15371 DWConvMicrokernelTester()
15372 .cr(8)
15373 .kr(25)
15374 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015375 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015376 }
15377
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015378 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_div_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015379 TEST_REQUIRES_ARM_NEON_V8;
15380 for (uint32_t channels = 16; channels < 128; channels += 24) {
15381 DWConvMicrokernelTester()
15382 .cr(8)
15383 .kr(25)
15384 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015385 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015386 }
15387 }
15388
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015389 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_div_8_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015390 TEST_REQUIRES_ARM_NEON_V8;
15391 for (uint32_t channels = 16; channels < 128; channels += 24) {
15392 DWConvMicrokernelTester()
15393 .cr(8)
15394 .kr(25)
15395 .channels(channels)
15396 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015397 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015398 }
15399 }
15400
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015401 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_div_8_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015402 TEST_REQUIRES_ARM_NEON_V8;
15403 for (uint32_t channels = 16; channels < 128; channels += 24) {
15404 DWConvMicrokernelTester()
15405 .cr(8)
15406 .kr(25)
15407 .channels(channels)
15408 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015409 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015410 }
15411 }
15412
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015413 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_lt_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015414 TEST_REQUIRES_ARM_NEON_V8;
15415 for (uint32_t channels = 1; channels < 8; channels++) {
15416 DWConvMicrokernelTester()
15417 .cr(8)
15418 .kr(25)
15419 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015420 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015421 }
15422 }
15423
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015424 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_gt_8) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015425 TEST_REQUIRES_ARM_NEON_V8;
15426 for (uint32_t channels = 9; channels < 16; channels++) {
15427 DWConvMicrokernelTester()
15428 .cr(8)
15429 .kr(25)
15430 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015431 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015432 }
15433 }
15434
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015435 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_gt_8_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015436 TEST_REQUIRES_ARM_NEON_V8;
15437 for (uint32_t channels = 9; channels < 16; channels++) {
15438 DWConvMicrokernelTester()
15439 .cr(8)
15440 .kr(25)
15441 .channels(channels)
15442 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015443 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015444 }
15445 }
15446
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015447 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_gt_8_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015448 TEST_REQUIRES_ARM_NEON_V8;
15449 for (uint32_t channels = 9; channels < 16; channels++) {
15450 DWConvMicrokernelTester()
15451 .cr(8)
15452 .kr(25)
15453 .channels(channels)
15454 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015455 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015456 }
15457 }
15458
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015459 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015460 TEST_REQUIRES_ARM_NEON_V8;
15461 for (size_t channels = 1; channels <= 40; channels += 7) {
15462 DWConvMicrokernelTester()
15463 .cr(8)
15464 .kr(25)
15465 .channels(channels)
15466 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015467 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015468 }
15469 }
15470
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015471 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_step) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015472 TEST_REQUIRES_ARM_NEON_V8;
15473 for (size_t channels = 1; channels <= 40; channels += 7) {
15474 for (size_t step = 2; step <= 25; step++) {
15475 DWConvMicrokernelTester()
15476 .cr(8)
15477 .kr(25)
15478 .channels(channels)
15479 .width(3)
15480 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015481 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015482 }
15483 }
15484 }
15485
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015486 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015487 TEST_REQUIRES_ARM_NEON_V8;
15488 for (size_t channels = 1; channels <= 40; channels += 7) {
15489 DWConvMicrokernelTester()
15490 .cr(8)
15491 .kr(25)
15492 .channels(8)
15493 .width(5)
15494 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015495 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015496 }
15497 }
15498
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015499 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015500 TEST_REQUIRES_ARM_NEON_V8;
15501 for (size_t channels = 1; channels <= 40; channels += 7) {
15502 DWConvMicrokernelTester()
15503 .cr(8)
15504 .kr(25)
15505 .channels(channels)
15506 .width(3)
15507 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015508 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015509 }
15510 }
15511
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015512 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015513 TEST_REQUIRES_ARM_NEON_V8;
15514 for (size_t channels = 1; channels <= 40; channels += 7) {
15515 DWConvMicrokernelTester()
15516 .cr(8)
15517 .kr(25)
15518 .channels(channels)
15519 .width(3)
15520 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015521 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015522 }
15523 }
15524
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015525 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, input_offset) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015526 TEST_REQUIRES_ARM_NEON_V8;
15527 for (uint32_t channels = 16; channels < 128; channels += 24) {
15528 DWConvMicrokernelTester()
15529 .cr(8)
15530 .kr(25)
15531 .channels(channels)
15532 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080015533 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015534 }
15535 }
15536
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015537 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, zero) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015538 TEST_REQUIRES_ARM_NEON_V8;
15539 for (uint32_t mz = 0; mz < 25; mz++) {
15540 for (uint32_t channels = 16; channels < 128; channels += 24) {
15541 DWConvMicrokernelTester()
15542 .cr(8)
15543 .kr(25)
15544 .channels(channels)
15545 .input_offset(176)
15546 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015547 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015548 }
15549 }
15550 }
15551#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15552
15553
15554#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015555 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_eq_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015556 TEST_REQUIRES_ARM_NEON_V8;
15557 DWConvMicrokernelTester()
15558 .cr(16)
15559 .kr(25)
15560 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015561 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015562 }
15563
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015564 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_div_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015565 TEST_REQUIRES_ARM_NEON_V8;
15566 for (uint32_t channels = 32; channels < 256; channels += 48) {
15567 DWConvMicrokernelTester()
15568 .cr(16)
15569 .kr(25)
15570 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015571 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015572 }
15573 }
15574
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015575 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_div_16_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015576 TEST_REQUIRES_ARM_NEON_V8;
15577 for (uint32_t channels = 32; channels < 256; channels += 48) {
15578 DWConvMicrokernelTester()
15579 .cr(16)
15580 .kr(25)
15581 .channels(channels)
15582 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015583 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015584 }
15585 }
15586
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015587 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_div_16_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015588 TEST_REQUIRES_ARM_NEON_V8;
15589 for (uint32_t channels = 32; channels < 256; channels += 48) {
15590 DWConvMicrokernelTester()
15591 .cr(16)
15592 .kr(25)
15593 .channels(channels)
15594 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015595 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015596 }
15597 }
15598
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015599 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_lt_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015600 TEST_REQUIRES_ARM_NEON_V8;
15601 for (uint32_t channels = 1; channels < 16; channels++) {
15602 DWConvMicrokernelTester()
15603 .cr(16)
15604 .kr(25)
15605 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015606 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015607 }
15608 }
15609
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015610 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_gt_16) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015611 TEST_REQUIRES_ARM_NEON_V8;
15612 for (uint32_t channels = 17; channels < 32; channels++) {
15613 DWConvMicrokernelTester()
15614 .cr(16)
15615 .kr(25)
15616 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015617 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015618 }
15619 }
15620
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015621 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_gt_16_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015622 TEST_REQUIRES_ARM_NEON_V8;
15623 for (uint32_t channels = 17; channels < 32; channels++) {
15624 DWConvMicrokernelTester()
15625 .cr(16)
15626 .kr(25)
15627 .channels(channels)
15628 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015629 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015630 }
15631 }
15632
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015633 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_gt_16_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015634 TEST_REQUIRES_ARM_NEON_V8;
15635 for (uint32_t channels = 17; channels < 32; channels++) {
15636 DWConvMicrokernelTester()
15637 .cr(16)
15638 .kr(25)
15639 .channels(channels)
15640 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015641 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015642 }
15643 }
15644
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015645 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015646 TEST_REQUIRES_ARM_NEON_V8;
15647 for (size_t channels = 1; channels <= 80; channels += 15) {
15648 DWConvMicrokernelTester()
15649 .cr(16)
15650 .kr(25)
15651 .channels(channels)
15652 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015653 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015654 }
15655 }
15656
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015657 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_step) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015658 TEST_REQUIRES_ARM_NEON_V8;
15659 for (size_t channels = 1; channels <= 80; channels += 15) {
15660 for (size_t step = 2; step <= 25; step++) {
15661 DWConvMicrokernelTester()
15662 .cr(16)
15663 .kr(25)
15664 .channels(channels)
15665 .width(3)
15666 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015667 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015668 }
15669 }
15670 }
15671
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015672 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015673 TEST_REQUIRES_ARM_NEON_V8;
15674 for (size_t channels = 1; channels <= 80; channels += 15) {
15675 DWConvMicrokernelTester()
15676 .cr(16)
15677 .kr(25)
15678 .channels(16)
15679 .width(5)
15680 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015681 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015682 }
15683 }
15684
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015685 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_qmin) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015686 TEST_REQUIRES_ARM_NEON_V8;
15687 for (size_t channels = 1; channels <= 80; channels += 15) {
15688 DWConvMicrokernelTester()
15689 .cr(16)
15690 .kr(25)
15691 .channels(channels)
15692 .width(3)
15693 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015694 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015695 }
15696 }
15697
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015698 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_qmax) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015699 TEST_REQUIRES_ARM_NEON_V8;
15700 for (size_t channels = 1; channels <= 80; channels += 15) {
15701 DWConvMicrokernelTester()
15702 .cr(16)
15703 .kr(25)
15704 .channels(channels)
15705 .width(3)
15706 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015707 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015708 }
15709 }
15710
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015711 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, input_offset) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015712 TEST_REQUIRES_ARM_NEON_V8;
15713 for (uint32_t channels = 32; channels < 256; channels += 48) {
15714 DWConvMicrokernelTester()
15715 .cr(16)
15716 .kr(25)
15717 .channels(channels)
15718 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080015719 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015720 }
15721 }
15722
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015723 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, zero) {
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015724 TEST_REQUIRES_ARM_NEON_V8;
15725 for (uint32_t mz = 0; mz < 25; mz++) {
15726 for (uint32_t channels = 32; channels < 256; channels += 48) {
15727 DWConvMicrokernelTester()
15728 .cr(16)
15729 .kr(25)
15730 .channels(channels)
15731 .input_offset(304)
15732 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015733 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015734 }
15735 }
15736 }
15737#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15738
15739
15740#if XNN_ARCH_ARM || XNN_ARCH_ARM64
15741 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_eq_16) {
15742 TEST_REQUIRES_ARM_NEON_V8;
15743 DWConvMicrokernelTester()
15744 .cr(16)
15745 .kr(25)
15746 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015747 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015748 }
15749
15750 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_div_16) {
15751 TEST_REQUIRES_ARM_NEON_V8;
15752 for (uint32_t channels = 32; channels < 256; channels += 48) {
15753 DWConvMicrokernelTester()
15754 .cr(16)
15755 .kr(25)
15756 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015757 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015758 }
15759 }
15760
15761 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_div_16_with_qmin) {
15762 TEST_REQUIRES_ARM_NEON_V8;
15763 for (uint32_t channels = 32; channels < 256; channels += 48) {
15764 DWConvMicrokernelTester()
15765 .cr(16)
15766 .kr(25)
15767 .channels(channels)
15768 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015769 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015770 }
15771 }
15772
15773 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_div_16_with_qmax) {
15774 TEST_REQUIRES_ARM_NEON_V8;
15775 for (uint32_t channels = 32; channels < 256; channels += 48) {
15776 DWConvMicrokernelTester()
15777 .cr(16)
15778 .kr(25)
15779 .channels(channels)
15780 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015781 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015782 }
15783 }
15784
15785 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_lt_16) {
15786 TEST_REQUIRES_ARM_NEON_V8;
15787 for (uint32_t channels = 1; channels < 16; channels++) {
15788 DWConvMicrokernelTester()
15789 .cr(16)
15790 .kr(25)
15791 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015792 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015793 }
15794 }
15795
15796 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_gt_16) {
15797 TEST_REQUIRES_ARM_NEON_V8;
15798 for (uint32_t channels = 17; channels < 32; channels++) {
15799 DWConvMicrokernelTester()
15800 .cr(16)
15801 .kr(25)
15802 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015803 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015804 }
15805 }
15806
15807 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_gt_16_with_qmin) {
15808 TEST_REQUIRES_ARM_NEON_V8;
15809 for (uint32_t channels = 17; channels < 32; channels++) {
15810 DWConvMicrokernelTester()
15811 .cr(16)
15812 .kr(25)
15813 .channels(channels)
15814 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015815 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015816 }
15817 }
15818
15819 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_gt_16_with_qmax) {
15820 TEST_REQUIRES_ARM_NEON_V8;
15821 for (uint32_t channels = 17; channels < 32; channels++) {
15822 DWConvMicrokernelTester()
15823 .cr(16)
15824 .kr(25)
15825 .channels(channels)
15826 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015827 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015828 }
15829 }
15830
15831 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel) {
15832 TEST_REQUIRES_ARM_NEON_V8;
15833 for (size_t channels = 1; channels <= 80; channels += 15) {
15834 DWConvMicrokernelTester()
15835 .cr(16)
15836 .kr(25)
15837 .channels(channels)
15838 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015839 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015840 }
15841 }
15842
15843 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_step) {
15844 TEST_REQUIRES_ARM_NEON_V8;
15845 for (size_t channels = 1; channels <= 80; channels += 15) {
15846 for (size_t step = 2; step <= 25; step++) {
15847 DWConvMicrokernelTester()
15848 .cr(16)
15849 .kr(25)
15850 .channels(channels)
15851 .width(3)
15852 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080015853 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015854 }
15855 }
15856 }
15857
15858 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_output_stride) {
15859 TEST_REQUIRES_ARM_NEON_V8;
15860 for (size_t channels = 1; channels <= 80; channels += 15) {
15861 DWConvMicrokernelTester()
15862 .cr(16)
15863 .kr(25)
15864 .channels(16)
15865 .width(5)
15866 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015867 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015868 }
15869 }
15870
15871 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_qmin) {
15872 TEST_REQUIRES_ARM_NEON_V8;
15873 for (size_t channels = 1; channels <= 80; channels += 15) {
15874 DWConvMicrokernelTester()
15875 .cr(16)
15876 .kr(25)
15877 .channels(channels)
15878 .width(3)
15879 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015880 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015881 }
15882 }
15883
15884 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_qmax) {
15885 TEST_REQUIRES_ARM_NEON_V8;
15886 for (size_t channels = 1; channels <= 80; channels += 15) {
15887 DWConvMicrokernelTester()
15888 .cr(16)
15889 .kr(25)
15890 .channels(channels)
15891 .width(3)
15892 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015893 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015894 }
15895 }
15896
15897 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, input_offset) {
15898 TEST_REQUIRES_ARM_NEON_V8;
15899 for (uint32_t channels = 32; channels < 256; channels += 48) {
15900 DWConvMicrokernelTester()
15901 .cr(16)
15902 .kr(25)
15903 .channels(channels)
15904 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080015905 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan5f2939f2021-07-23 13:38:32 -070015906 }
15907 }
15908
15909 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, zero) {
15910 TEST_REQUIRES_ARM_NEON_V8;
15911 for (uint32_t mz = 0; mz < 25; mz++) {
15912 for (uint32_t channels = 32; channels < 256; channels += 48) {
15913 DWConvMicrokernelTester()
15914 .cr(16)
15915 .kr(25)
15916 .channels(channels)
15917 .input_offset(304)
15918 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015919 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancaccd8e2021-07-22 23:09:00 -070015920 }
15921 }
15922 }
15923#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15924
15925
15926#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan59af5812021-06-29 18:09:57 -070015927 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_eq_8) {
15928 TEST_REQUIRES_ARM_NEON;
15929 DWConvMicrokernelTester()
15930 .cr(8)
15931 .kr(25)
15932 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015933 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070015934 }
15935
15936 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8) {
15937 TEST_REQUIRES_ARM_NEON;
15938 for (uint32_t channels = 16; channels < 128; channels += 24) {
15939 DWConvMicrokernelTester()
15940 .cr(8)
15941 .kr(25)
15942 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015943 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070015944 }
15945 }
15946
15947 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
15948 TEST_REQUIRES_ARM_NEON;
15949 for (uint32_t channels = 16; channels < 128; channels += 24) {
15950 DWConvMicrokernelTester()
15951 .cr(8)
15952 .kr(25)
15953 .channels(channels)
15954 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015955 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070015956 }
15957 }
15958
15959 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
15960 TEST_REQUIRES_ARM_NEON;
15961 for (uint32_t channels = 16; channels < 128; channels += 24) {
15962 DWConvMicrokernelTester()
15963 .cr(8)
15964 .kr(25)
15965 .channels(channels)
15966 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015967 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070015968 }
15969 }
15970
15971 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_lt_8) {
15972 TEST_REQUIRES_ARM_NEON;
15973 for (uint32_t channels = 1; channels < 8; channels++) {
15974 DWConvMicrokernelTester()
15975 .cr(8)
15976 .kr(25)
15977 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015978 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070015979 }
15980 }
15981
15982 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8) {
15983 TEST_REQUIRES_ARM_NEON;
15984 for (uint32_t channels = 9; channels < 16; channels++) {
15985 DWConvMicrokernelTester()
15986 .cr(8)
15987 .kr(25)
15988 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080015989 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070015990 }
15991 }
15992
15993 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
15994 TEST_REQUIRES_ARM_NEON;
15995 for (uint32_t channels = 9; channels < 16; channels++) {
15996 DWConvMicrokernelTester()
15997 .cr(8)
15998 .kr(25)
15999 .channels(channels)
16000 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016001 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016002 }
16003 }
16004
16005 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
16006 TEST_REQUIRES_ARM_NEON;
16007 for (uint32_t channels = 9; channels < 16; channels++) {
16008 DWConvMicrokernelTester()
16009 .cr(8)
16010 .kr(25)
16011 .channels(channels)
16012 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016013 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016014 }
16015 }
16016
16017 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel) {
16018 TEST_REQUIRES_ARM_NEON;
16019 for (size_t channels = 1; channels <= 40; channels += 7) {
16020 DWConvMicrokernelTester()
16021 .cr(8)
16022 .kr(25)
16023 .channels(channels)
16024 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016025 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016026 }
16027 }
16028
16029 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_step) {
16030 TEST_REQUIRES_ARM_NEON;
16031 for (size_t channels = 1; channels <= 40; channels += 7) {
16032 for (size_t step = 2; step <= 25; step++) {
16033 DWConvMicrokernelTester()
16034 .cr(8)
16035 .kr(25)
16036 .channels(channels)
16037 .width(3)
16038 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016039 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016040 }
16041 }
16042 }
16043
16044 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
16045 TEST_REQUIRES_ARM_NEON;
16046 for (size_t channels = 1; channels <= 40; channels += 7) {
16047 DWConvMicrokernelTester()
16048 .cr(8)
16049 .kr(25)
16050 .channels(8)
16051 .width(5)
16052 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016053 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016054 }
16055 }
16056
16057 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmin) {
16058 TEST_REQUIRES_ARM_NEON;
16059 for (size_t channels = 1; channels <= 40; channels += 7) {
16060 DWConvMicrokernelTester()
16061 .cr(8)
16062 .kr(25)
16063 .channels(channels)
16064 .width(3)
16065 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016066 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016067 }
16068 }
16069
16070 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmax) {
16071 TEST_REQUIRES_ARM_NEON;
16072 for (size_t channels = 1; channels <= 40; channels += 7) {
16073 DWConvMicrokernelTester()
16074 .cr(8)
16075 .kr(25)
16076 .channels(channels)
16077 .width(3)
16078 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016079 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016080 }
16081 }
16082
16083 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, input_offset) {
16084 TEST_REQUIRES_ARM_NEON;
16085 for (uint32_t channels = 16; channels < 128; channels += 24) {
16086 DWConvMicrokernelTester()
16087 .cr(8)
16088 .kr(25)
16089 .channels(channels)
16090 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080016091 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016092 }
16093 }
16094
16095 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, zero) {
16096 TEST_REQUIRES_ARM_NEON;
16097 for (uint32_t mz = 0; mz < 25; mz++) {
16098 for (uint32_t channels = 16; channels < 128; channels += 24) {
16099 DWConvMicrokernelTester()
16100 .cr(8)
16101 .kr(25)
16102 .channels(channels)
16103 .input_offset(176)
16104 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016105 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016106 }
16107 }
16108 }
16109#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16110
16111
16112#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16113 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_eq_16) {
16114 TEST_REQUIRES_ARM_NEON;
16115 DWConvMicrokernelTester()
16116 .cr(16)
16117 .kr(25)
16118 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080016119 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016120 }
16121
16122 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16) {
16123 TEST_REQUIRES_ARM_NEON;
16124 for (uint32_t channels = 32; channels < 256; channels += 48) {
16125 DWConvMicrokernelTester()
16126 .cr(16)
16127 .kr(25)
16128 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016129 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016130 }
16131 }
16132
16133 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
16134 TEST_REQUIRES_ARM_NEON;
16135 for (uint32_t channels = 32; channels < 256; channels += 48) {
16136 DWConvMicrokernelTester()
16137 .cr(16)
16138 .kr(25)
16139 .channels(channels)
16140 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016141 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016142 }
16143 }
16144
16145 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
16146 TEST_REQUIRES_ARM_NEON;
16147 for (uint32_t channels = 32; channels < 256; channels += 48) {
16148 DWConvMicrokernelTester()
16149 .cr(16)
16150 .kr(25)
16151 .channels(channels)
16152 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016153 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016154 }
16155 }
16156
16157 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_lt_16) {
16158 TEST_REQUIRES_ARM_NEON;
16159 for (uint32_t channels = 1; channels < 16; channels++) {
16160 DWConvMicrokernelTester()
16161 .cr(16)
16162 .kr(25)
16163 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016164 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016165 }
16166 }
16167
16168 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16) {
16169 TEST_REQUIRES_ARM_NEON;
16170 for (uint32_t channels = 17; channels < 32; channels++) {
16171 DWConvMicrokernelTester()
16172 .cr(16)
16173 .kr(25)
16174 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016175 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016176 }
16177 }
16178
16179 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
16180 TEST_REQUIRES_ARM_NEON;
16181 for (uint32_t channels = 17; channels < 32; channels++) {
16182 DWConvMicrokernelTester()
16183 .cr(16)
16184 .kr(25)
16185 .channels(channels)
16186 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016187 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016188 }
16189 }
16190
16191 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
16192 TEST_REQUIRES_ARM_NEON;
16193 for (uint32_t channels = 17; channels < 32; channels++) {
16194 DWConvMicrokernelTester()
16195 .cr(16)
16196 .kr(25)
16197 .channels(channels)
16198 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016199 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016200 }
16201 }
16202
16203 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel) {
16204 TEST_REQUIRES_ARM_NEON;
16205 for (size_t channels = 1; channels <= 80; channels += 15) {
16206 DWConvMicrokernelTester()
16207 .cr(16)
16208 .kr(25)
16209 .channels(channels)
16210 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016211 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016212 }
16213 }
16214
16215 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_step) {
16216 TEST_REQUIRES_ARM_NEON;
16217 for (size_t channels = 1; channels <= 80; channels += 15) {
16218 for (size_t step = 2; step <= 25; step++) {
16219 DWConvMicrokernelTester()
16220 .cr(16)
16221 .kr(25)
16222 .channels(channels)
16223 .width(3)
16224 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016225 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016226 }
16227 }
16228 }
16229
16230 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
16231 TEST_REQUIRES_ARM_NEON;
16232 for (size_t channels = 1; channels <= 80; channels += 15) {
16233 DWConvMicrokernelTester()
16234 .cr(16)
16235 .kr(25)
16236 .channels(16)
16237 .width(5)
16238 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016239 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016240 }
16241 }
16242
16243 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmin) {
16244 TEST_REQUIRES_ARM_NEON;
16245 for (size_t channels = 1; channels <= 80; channels += 15) {
16246 DWConvMicrokernelTester()
16247 .cr(16)
16248 .kr(25)
16249 .channels(channels)
16250 .width(3)
16251 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016252 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016253 }
16254 }
16255
16256 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmax) {
16257 TEST_REQUIRES_ARM_NEON;
16258 for (size_t channels = 1; channels <= 80; channels += 15) {
16259 DWConvMicrokernelTester()
16260 .cr(16)
16261 .kr(25)
16262 .channels(channels)
16263 .width(3)
16264 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016265 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016266 }
16267 }
16268
16269 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, input_offset) {
16270 TEST_REQUIRES_ARM_NEON;
16271 for (uint32_t channels = 32; channels < 256; channels += 48) {
16272 DWConvMicrokernelTester()
16273 .cr(16)
16274 .kr(25)
16275 .channels(channels)
16276 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080016277 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016278 }
16279 }
16280
16281 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, zero) {
16282 TEST_REQUIRES_ARM_NEON;
16283 for (uint32_t mz = 0; mz < 25; mz++) {
16284 for (uint32_t channels = 32; channels < 256; channels += 48) {
16285 DWConvMicrokernelTester()
16286 .cr(16)
16287 .kr(25)
16288 .channels(channels)
16289 .input_offset(304)
16290 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016291 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016292 }
16293 }
16294 }
16295#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16296
16297
16298#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16299 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_eq_24) {
16300 TEST_REQUIRES_ARM_NEON;
16301 DWConvMicrokernelTester()
16302 .cr(24)
16303 .kr(25)
16304 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080016305 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016306 }
16307
16308 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24) {
16309 TEST_REQUIRES_ARM_NEON;
16310 for (uint32_t channels = 48; channels < 384; channels += 72) {
16311 DWConvMicrokernelTester()
16312 .cr(24)
16313 .kr(25)
16314 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016315 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016316 }
16317 }
16318
16319 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
16320 TEST_REQUIRES_ARM_NEON;
16321 for (uint32_t channels = 48; channels < 384; channels += 72) {
16322 DWConvMicrokernelTester()
16323 .cr(24)
16324 .kr(25)
16325 .channels(channels)
16326 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016327 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016328 }
16329 }
16330
16331 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
16332 TEST_REQUIRES_ARM_NEON;
16333 for (uint32_t channels = 48; channels < 384; channels += 72) {
16334 DWConvMicrokernelTester()
16335 .cr(24)
16336 .kr(25)
16337 .channels(channels)
16338 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016339 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016340 }
16341 }
16342
16343 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_lt_24) {
16344 TEST_REQUIRES_ARM_NEON;
16345 for (uint32_t channels = 1; channels < 24; channels++) {
16346 DWConvMicrokernelTester()
16347 .cr(24)
16348 .kr(25)
16349 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016350 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016351 }
16352 }
16353
16354 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24) {
16355 TEST_REQUIRES_ARM_NEON;
16356 for (uint32_t channels = 25; channels < 48; channels++) {
16357 DWConvMicrokernelTester()
16358 .cr(24)
16359 .kr(25)
16360 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016361 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016362 }
16363 }
16364
16365 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
16366 TEST_REQUIRES_ARM_NEON;
16367 for (uint32_t channels = 25; channels < 48; channels++) {
16368 DWConvMicrokernelTester()
16369 .cr(24)
16370 .kr(25)
16371 .channels(channels)
16372 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016373 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016374 }
16375 }
16376
16377 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
16378 TEST_REQUIRES_ARM_NEON;
16379 for (uint32_t channels = 25; channels < 48; channels++) {
16380 DWConvMicrokernelTester()
16381 .cr(24)
16382 .kr(25)
16383 .channels(channels)
16384 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016385 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016386 }
16387 }
16388
16389 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel) {
16390 TEST_REQUIRES_ARM_NEON;
16391 for (size_t channels = 1; channels <= 120; channels += 23) {
16392 DWConvMicrokernelTester()
16393 .cr(24)
16394 .kr(25)
16395 .channels(channels)
16396 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016397 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016398 }
16399 }
16400
16401 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_step) {
16402 TEST_REQUIRES_ARM_NEON;
16403 for (size_t channels = 1; channels <= 120; channels += 23) {
16404 for (size_t step = 2; step <= 25; step++) {
16405 DWConvMicrokernelTester()
16406 .cr(24)
16407 .kr(25)
16408 .channels(channels)
16409 .width(3)
16410 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016411 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016412 }
16413 }
16414 }
16415
16416 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
16417 TEST_REQUIRES_ARM_NEON;
16418 for (size_t channels = 1; channels <= 120; channels += 23) {
16419 DWConvMicrokernelTester()
16420 .cr(24)
16421 .kr(25)
16422 .channels(24)
16423 .width(5)
16424 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080016425 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016426 }
16427 }
16428
16429 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmin) {
16430 TEST_REQUIRES_ARM_NEON;
16431 for (size_t channels = 1; channels <= 120; channels += 23) {
16432 DWConvMicrokernelTester()
16433 .cr(24)
16434 .kr(25)
16435 .channels(channels)
16436 .width(3)
16437 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016438 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016439 }
16440 }
16441
16442 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmax) {
16443 TEST_REQUIRES_ARM_NEON;
16444 for (size_t channels = 1; channels <= 120; channels += 23) {
16445 DWConvMicrokernelTester()
16446 .cr(24)
16447 .kr(25)
16448 .channels(channels)
16449 .width(3)
16450 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016451 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016452 }
16453 }
16454
16455 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, input_offset) {
16456 TEST_REQUIRES_ARM_NEON;
16457 for (uint32_t channels = 48; channels < 384; channels += 72) {
16458 DWConvMicrokernelTester()
16459 .cr(24)
16460 .kr(25)
16461 .channels(channels)
16462 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080016463 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016464 }
16465 }
16466
16467 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, zero) {
16468 TEST_REQUIRES_ARM_NEON;
16469 for (uint32_t mz = 0; mz < 25; mz++) {
16470 for (uint32_t channels = 48; channels < 384; channels += 72) {
16471 DWConvMicrokernelTester()
16472 .cr(24)
16473 .kr(25)
16474 .channels(channels)
16475 .input_offset(464)
16476 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016477 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016478 }
16479 }
16480 }
16481#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16482
16483
16484#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16485 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_eq_32) {
16486 TEST_REQUIRES_ARM_NEON;
16487 DWConvMicrokernelTester()
16488 .cr(32)
16489 .kr(25)
16490 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080016491 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016492 }
16493
16494 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32) {
16495 TEST_REQUIRES_ARM_NEON;
16496 for (uint32_t channels = 64; channels < 512; channels += 96) {
16497 DWConvMicrokernelTester()
16498 .cr(32)
16499 .kr(25)
16500 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016501 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016502 }
16503 }
16504
16505 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
16506 TEST_REQUIRES_ARM_NEON;
16507 for (uint32_t channels = 64; channels < 512; channels += 96) {
16508 DWConvMicrokernelTester()
16509 .cr(32)
16510 .kr(25)
16511 .channels(channels)
16512 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016513 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016514 }
16515 }
16516
16517 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
16518 TEST_REQUIRES_ARM_NEON;
16519 for (uint32_t channels = 64; channels < 512; channels += 96) {
16520 DWConvMicrokernelTester()
16521 .cr(32)
16522 .kr(25)
16523 .channels(channels)
16524 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016525 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016526 }
16527 }
16528
16529 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_lt_32) {
16530 TEST_REQUIRES_ARM_NEON;
16531 for (uint32_t channels = 1; channels < 32; channels++) {
16532 DWConvMicrokernelTester()
16533 .cr(32)
16534 .kr(25)
16535 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016536 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016537 }
16538 }
16539
16540 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32) {
16541 TEST_REQUIRES_ARM_NEON;
16542 for (uint32_t channels = 33; channels < 64; channels++) {
16543 DWConvMicrokernelTester()
16544 .cr(32)
16545 .kr(25)
16546 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016547 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016548 }
16549 }
16550
16551 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
16552 TEST_REQUIRES_ARM_NEON;
16553 for (uint32_t channels = 33; channels < 64; channels++) {
16554 DWConvMicrokernelTester()
16555 .cr(32)
16556 .kr(25)
16557 .channels(channels)
16558 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016559 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016560 }
16561 }
16562
16563 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
16564 TEST_REQUIRES_ARM_NEON;
16565 for (uint32_t channels = 33; channels < 64; channels++) {
16566 DWConvMicrokernelTester()
16567 .cr(32)
16568 .kr(25)
16569 .channels(channels)
16570 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016571 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016572 }
16573 }
16574
16575 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel) {
16576 TEST_REQUIRES_ARM_NEON;
16577 for (size_t channels = 1; channels <= 160; channels += 31) {
16578 DWConvMicrokernelTester()
16579 .cr(32)
16580 .kr(25)
16581 .channels(channels)
16582 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016583 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016584 }
16585 }
16586
16587 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_step) {
16588 TEST_REQUIRES_ARM_NEON;
16589 for (size_t channels = 1; channels <= 160; channels += 31) {
16590 for (size_t step = 2; step <= 25; step++) {
16591 DWConvMicrokernelTester()
16592 .cr(32)
16593 .kr(25)
16594 .channels(channels)
16595 .width(3)
16596 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016597 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016598 }
16599 }
16600 }
16601
16602 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
16603 TEST_REQUIRES_ARM_NEON;
16604 for (size_t channels = 1; channels <= 160; channels += 31) {
16605 DWConvMicrokernelTester()
16606 .cr(32)
16607 .kr(25)
16608 .channels(32)
16609 .width(5)
16610 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080016611 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016612 }
16613 }
16614
16615 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmin) {
16616 TEST_REQUIRES_ARM_NEON;
16617 for (size_t channels = 1; channels <= 160; channels += 31) {
16618 DWConvMicrokernelTester()
16619 .cr(32)
16620 .kr(25)
16621 .channels(channels)
16622 .width(3)
16623 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016624 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016625 }
16626 }
16627
16628 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmax) {
16629 TEST_REQUIRES_ARM_NEON;
16630 for (size_t channels = 1; channels <= 160; channels += 31) {
16631 DWConvMicrokernelTester()
16632 .cr(32)
16633 .kr(25)
16634 .channels(channels)
16635 .width(3)
16636 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016637 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016638 }
16639 }
16640
16641 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, input_offset) {
16642 TEST_REQUIRES_ARM_NEON;
16643 for (uint32_t channels = 64; channels < 512; channels += 96) {
16644 DWConvMicrokernelTester()
16645 .cr(32)
16646 .kr(25)
16647 .channels(channels)
16648 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080016649 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016650 }
16651 }
16652
16653 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, zero) {
16654 TEST_REQUIRES_ARM_NEON;
16655 for (uint32_t mz = 0; mz < 25; mz++) {
16656 for (uint32_t channels = 64; channels < 512; channels += 96) {
16657 DWConvMicrokernelTester()
16658 .cr(32)
16659 .kr(25)
16660 .channels(channels)
16661 .input_offset(592)
16662 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016663 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_minmax_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016664 }
16665 }
16666 }
16667#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16668
16669
16670#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16671 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_eq_8) {
16672 TEST_REQUIRES_ARM_NEON_V8;
16673 DWConvMicrokernelTester()
16674 .cr(8)
16675 .kr(25)
16676 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016677 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016678 }
16679
16680 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8) {
16681 TEST_REQUIRES_ARM_NEON_V8;
16682 for (uint32_t channels = 16; channels < 128; channels += 24) {
16683 DWConvMicrokernelTester()
16684 .cr(8)
16685 .kr(25)
16686 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016687 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016688 }
16689 }
16690
16691 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmin) {
16692 TEST_REQUIRES_ARM_NEON_V8;
16693 for (uint32_t channels = 16; channels < 128; channels += 24) {
16694 DWConvMicrokernelTester()
16695 .cr(8)
16696 .kr(25)
16697 .channels(channels)
16698 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016699 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016700 }
16701 }
16702
16703 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmax) {
16704 TEST_REQUIRES_ARM_NEON_V8;
16705 for (uint32_t channels = 16; channels < 128; channels += 24) {
16706 DWConvMicrokernelTester()
16707 .cr(8)
16708 .kr(25)
16709 .channels(channels)
16710 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016711 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016712 }
16713 }
16714
16715 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_lt_8) {
16716 TEST_REQUIRES_ARM_NEON_V8;
16717 for (uint32_t channels = 1; channels < 8; channels++) {
16718 DWConvMicrokernelTester()
16719 .cr(8)
16720 .kr(25)
16721 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016722 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016723 }
16724 }
16725
16726 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8) {
16727 TEST_REQUIRES_ARM_NEON_V8;
16728 for (uint32_t channels = 9; channels < 16; channels++) {
16729 DWConvMicrokernelTester()
16730 .cr(8)
16731 .kr(25)
16732 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016733 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016734 }
16735 }
16736
16737 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmin) {
16738 TEST_REQUIRES_ARM_NEON_V8;
16739 for (uint32_t channels = 9; channels < 16; channels++) {
16740 DWConvMicrokernelTester()
16741 .cr(8)
16742 .kr(25)
16743 .channels(channels)
16744 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016745 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016746 }
16747 }
16748
16749 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmax) {
16750 TEST_REQUIRES_ARM_NEON_V8;
16751 for (uint32_t channels = 9; channels < 16; channels++) {
16752 DWConvMicrokernelTester()
16753 .cr(8)
16754 .kr(25)
16755 .channels(channels)
16756 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016757 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016758 }
16759 }
16760
16761 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel) {
16762 TEST_REQUIRES_ARM_NEON_V8;
16763 for (size_t channels = 1; channels <= 40; channels += 7) {
16764 DWConvMicrokernelTester()
16765 .cr(8)
16766 .kr(25)
16767 .channels(channels)
16768 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016769 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016770 }
16771 }
16772
16773 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_step) {
16774 TEST_REQUIRES_ARM_NEON_V8;
16775 for (size_t channels = 1; channels <= 40; channels += 7) {
16776 for (size_t step = 2; step <= 25; step++) {
16777 DWConvMicrokernelTester()
16778 .cr(8)
16779 .kr(25)
16780 .channels(channels)
16781 .width(3)
16782 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016783 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016784 }
16785 }
16786 }
16787
16788 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_output_stride) {
16789 TEST_REQUIRES_ARM_NEON_V8;
16790 for (size_t channels = 1; channels <= 40; channels += 7) {
16791 DWConvMicrokernelTester()
16792 .cr(8)
16793 .kr(25)
16794 .channels(8)
16795 .width(5)
16796 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016797 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016798 }
16799 }
16800
16801 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmin) {
16802 TEST_REQUIRES_ARM_NEON_V8;
16803 for (size_t channels = 1; channels <= 40; channels += 7) {
16804 DWConvMicrokernelTester()
16805 .cr(8)
16806 .kr(25)
16807 .channels(channels)
16808 .width(3)
16809 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016810 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016811 }
16812 }
16813
16814 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmax) {
16815 TEST_REQUIRES_ARM_NEON_V8;
16816 for (size_t channels = 1; channels <= 40; channels += 7) {
16817 DWConvMicrokernelTester()
16818 .cr(8)
16819 .kr(25)
16820 .channels(channels)
16821 .width(3)
16822 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016823 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016824 }
16825 }
16826
16827 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, input_offset) {
16828 TEST_REQUIRES_ARM_NEON_V8;
16829 for (uint32_t channels = 16; channels < 128; channels += 24) {
16830 DWConvMicrokernelTester()
16831 .cr(8)
16832 .kr(25)
16833 .channels(channels)
16834 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080016835 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016836 }
16837 }
16838
16839 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, zero) {
16840 TEST_REQUIRES_ARM_NEON_V8;
16841 for (uint32_t mz = 0; mz < 25; mz++) {
16842 for (uint32_t channels = 16; channels < 128; channels += 24) {
16843 DWConvMicrokernelTester()
16844 .cr(8)
16845 .kr(25)
16846 .channels(channels)
16847 .input_offset(176)
16848 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016849 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016850 }
16851 }
16852 }
16853#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16854
16855
16856#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16857 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_eq_16) {
16858 TEST_REQUIRES_ARM_NEON_V8;
16859 DWConvMicrokernelTester()
16860 .cr(16)
16861 .kr(25)
16862 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080016863 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016864 }
16865
16866 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16) {
16867 TEST_REQUIRES_ARM_NEON_V8;
16868 for (uint32_t channels = 32; channels < 256; channels += 48) {
16869 DWConvMicrokernelTester()
16870 .cr(16)
16871 .kr(25)
16872 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016873 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016874 }
16875 }
16876
16877 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmin) {
16878 TEST_REQUIRES_ARM_NEON_V8;
16879 for (uint32_t channels = 32; channels < 256; channels += 48) {
16880 DWConvMicrokernelTester()
16881 .cr(16)
16882 .kr(25)
16883 .channels(channels)
16884 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016885 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016886 }
16887 }
16888
16889 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmax) {
16890 TEST_REQUIRES_ARM_NEON_V8;
16891 for (uint32_t channels = 32; channels < 256; channels += 48) {
16892 DWConvMicrokernelTester()
16893 .cr(16)
16894 .kr(25)
16895 .channels(channels)
16896 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016897 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016898 }
16899 }
16900
16901 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_lt_16) {
16902 TEST_REQUIRES_ARM_NEON_V8;
16903 for (uint32_t channels = 1; channels < 16; channels++) {
16904 DWConvMicrokernelTester()
16905 .cr(16)
16906 .kr(25)
16907 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016908 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016909 }
16910 }
16911
16912 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16) {
16913 TEST_REQUIRES_ARM_NEON_V8;
16914 for (uint32_t channels = 17; channels < 32; channels++) {
16915 DWConvMicrokernelTester()
16916 .cr(16)
16917 .kr(25)
16918 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080016919 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016920 }
16921 }
16922
16923 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmin) {
16924 TEST_REQUIRES_ARM_NEON_V8;
16925 for (uint32_t channels = 17; channels < 32; channels++) {
16926 DWConvMicrokernelTester()
16927 .cr(16)
16928 .kr(25)
16929 .channels(channels)
16930 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016931 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016932 }
16933 }
16934
16935 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmax) {
16936 TEST_REQUIRES_ARM_NEON_V8;
16937 for (uint32_t channels = 17; channels < 32; channels++) {
16938 DWConvMicrokernelTester()
16939 .cr(16)
16940 .kr(25)
16941 .channels(channels)
16942 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016943 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016944 }
16945 }
16946
16947 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel) {
16948 TEST_REQUIRES_ARM_NEON_V8;
16949 for (size_t channels = 1; channels <= 80; channels += 15) {
16950 DWConvMicrokernelTester()
16951 .cr(16)
16952 .kr(25)
16953 .channels(channels)
16954 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016955 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016956 }
16957 }
16958
16959 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_step) {
16960 TEST_REQUIRES_ARM_NEON_V8;
16961 for (size_t channels = 1; channels <= 80; channels += 15) {
16962 for (size_t step = 2; step <= 25; step++) {
16963 DWConvMicrokernelTester()
16964 .cr(16)
16965 .kr(25)
16966 .channels(channels)
16967 .width(3)
16968 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080016969 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016970 }
16971 }
16972 }
16973
16974 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_output_stride) {
16975 TEST_REQUIRES_ARM_NEON_V8;
16976 for (size_t channels = 1; channels <= 80; channels += 15) {
16977 DWConvMicrokernelTester()
16978 .cr(16)
16979 .kr(25)
16980 .channels(16)
16981 .width(5)
16982 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016983 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016984 }
16985 }
16986
16987 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmin) {
16988 TEST_REQUIRES_ARM_NEON_V8;
16989 for (size_t channels = 1; channels <= 80; channels += 15) {
16990 DWConvMicrokernelTester()
16991 .cr(16)
16992 .kr(25)
16993 .channels(channels)
16994 .width(3)
16995 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016996 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070016997 }
16998 }
16999
17000 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmax) {
17001 TEST_REQUIRES_ARM_NEON_V8;
17002 for (size_t channels = 1; channels <= 80; channels += 15) {
17003 DWConvMicrokernelTester()
17004 .cr(16)
17005 .kr(25)
17006 .channels(channels)
17007 .width(3)
17008 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017009 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017010 }
17011 }
17012
17013 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, input_offset) {
17014 TEST_REQUIRES_ARM_NEON_V8;
17015 for (uint32_t channels = 32; channels < 256; channels += 48) {
17016 DWConvMicrokernelTester()
17017 .cr(16)
17018 .kr(25)
17019 .channels(channels)
17020 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080017021 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017022 }
17023 }
17024
17025 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, zero) {
17026 TEST_REQUIRES_ARM_NEON_V8;
17027 for (uint32_t mz = 0; mz < 25; mz++) {
17028 for (uint32_t channels = 32; channels < 256; channels += 48) {
17029 DWConvMicrokernelTester()
17030 .cr(16)
17031 .kr(25)
17032 .channels(channels)
17033 .input_offset(304)
17034 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017035 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017036 }
17037 }
17038 }
17039#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17040
17041
17042#if XNN_ARCH_ARM || XNN_ARCH_ARM64
17043 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_eq_24) {
17044 TEST_REQUIRES_ARM_NEON_V8;
17045 DWConvMicrokernelTester()
17046 .cr(24)
17047 .kr(25)
17048 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080017049 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017050 }
17051
17052 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24) {
17053 TEST_REQUIRES_ARM_NEON_V8;
17054 for (uint32_t channels = 48; channels < 384; channels += 72) {
17055 DWConvMicrokernelTester()
17056 .cr(24)
17057 .kr(25)
17058 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017059 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017060 }
17061 }
17062
17063 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmin) {
17064 TEST_REQUIRES_ARM_NEON_V8;
17065 for (uint32_t channels = 48; channels < 384; channels += 72) {
17066 DWConvMicrokernelTester()
17067 .cr(24)
17068 .kr(25)
17069 .channels(channels)
17070 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017071 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017072 }
17073 }
17074
17075 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmax) {
17076 TEST_REQUIRES_ARM_NEON_V8;
17077 for (uint32_t channels = 48; channels < 384; channels += 72) {
17078 DWConvMicrokernelTester()
17079 .cr(24)
17080 .kr(25)
17081 .channels(channels)
17082 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017083 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017084 }
17085 }
17086
17087 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_lt_24) {
17088 TEST_REQUIRES_ARM_NEON_V8;
17089 for (uint32_t channels = 1; channels < 24; channels++) {
17090 DWConvMicrokernelTester()
17091 .cr(24)
17092 .kr(25)
17093 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017095 }
17096 }
17097
17098 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24) {
17099 TEST_REQUIRES_ARM_NEON_V8;
17100 for (uint32_t channels = 25; channels < 48; channels++) {
17101 DWConvMicrokernelTester()
17102 .cr(24)
17103 .kr(25)
17104 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017105 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017106 }
17107 }
17108
17109 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmin) {
17110 TEST_REQUIRES_ARM_NEON_V8;
17111 for (uint32_t channels = 25; channels < 48; channels++) {
17112 DWConvMicrokernelTester()
17113 .cr(24)
17114 .kr(25)
17115 .channels(channels)
17116 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017117 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017118 }
17119 }
17120
17121 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmax) {
17122 TEST_REQUIRES_ARM_NEON_V8;
17123 for (uint32_t channels = 25; channels < 48; channels++) {
17124 DWConvMicrokernelTester()
17125 .cr(24)
17126 .kr(25)
17127 .channels(channels)
17128 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017129 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017130 }
17131 }
17132
17133 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel) {
17134 TEST_REQUIRES_ARM_NEON_V8;
17135 for (size_t channels = 1; channels <= 120; channels += 23) {
17136 DWConvMicrokernelTester()
17137 .cr(24)
17138 .kr(25)
17139 .channels(channels)
17140 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017141 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017142 }
17143 }
17144
17145 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_step) {
17146 TEST_REQUIRES_ARM_NEON_V8;
17147 for (size_t channels = 1; channels <= 120; channels += 23) {
17148 for (size_t step = 2; step <= 25; step++) {
17149 DWConvMicrokernelTester()
17150 .cr(24)
17151 .kr(25)
17152 .channels(channels)
17153 .width(3)
17154 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017155 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017156 }
17157 }
17158 }
17159
17160 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_output_stride) {
17161 TEST_REQUIRES_ARM_NEON_V8;
17162 for (size_t channels = 1; channels <= 120; channels += 23) {
17163 DWConvMicrokernelTester()
17164 .cr(24)
17165 .kr(25)
17166 .channels(24)
17167 .width(5)
17168 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080017169 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017170 }
17171 }
17172
17173 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmin) {
17174 TEST_REQUIRES_ARM_NEON_V8;
17175 for (size_t channels = 1; channels <= 120; channels += 23) {
17176 DWConvMicrokernelTester()
17177 .cr(24)
17178 .kr(25)
17179 .channels(channels)
17180 .width(3)
17181 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017182 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017183 }
17184 }
17185
17186 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmax) {
17187 TEST_REQUIRES_ARM_NEON_V8;
17188 for (size_t channels = 1; channels <= 120; channels += 23) {
17189 DWConvMicrokernelTester()
17190 .cr(24)
17191 .kr(25)
17192 .channels(channels)
17193 .width(3)
17194 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017195 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017196 }
17197 }
17198
17199 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, input_offset) {
17200 TEST_REQUIRES_ARM_NEON_V8;
17201 for (uint32_t channels = 48; channels < 384; channels += 72) {
17202 DWConvMicrokernelTester()
17203 .cr(24)
17204 .kr(25)
17205 .channels(channels)
17206 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080017207 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017208 }
17209 }
17210
17211 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, zero) {
17212 TEST_REQUIRES_ARM_NEON_V8;
17213 for (uint32_t mz = 0; mz < 25; mz++) {
17214 for (uint32_t channels = 48; channels < 384; channels += 72) {
17215 DWConvMicrokernelTester()
17216 .cr(24)
17217 .kr(25)
17218 .channels(channels)
17219 .input_offset(464)
17220 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017221 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017222 }
17223 }
17224 }
17225#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17226
17227
17228#if XNN_ARCH_ARM || XNN_ARCH_ARM64
17229 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_eq_32) {
17230 TEST_REQUIRES_ARM_NEON_V8;
17231 DWConvMicrokernelTester()
17232 .cr(32)
17233 .kr(25)
17234 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080017235 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017236 }
17237
17238 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32) {
17239 TEST_REQUIRES_ARM_NEON_V8;
17240 for (uint32_t channels = 64; channels < 512; channels += 96) {
17241 DWConvMicrokernelTester()
17242 .cr(32)
17243 .kr(25)
17244 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017245 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017246 }
17247 }
17248
17249 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmin) {
17250 TEST_REQUIRES_ARM_NEON_V8;
17251 for (uint32_t channels = 64; channels < 512; channels += 96) {
17252 DWConvMicrokernelTester()
17253 .cr(32)
17254 .kr(25)
17255 .channels(channels)
17256 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017257 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017258 }
17259 }
17260
17261 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmax) {
17262 TEST_REQUIRES_ARM_NEON_V8;
17263 for (uint32_t channels = 64; channels < 512; channels += 96) {
17264 DWConvMicrokernelTester()
17265 .cr(32)
17266 .kr(25)
17267 .channels(channels)
17268 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017269 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017270 }
17271 }
17272
17273 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_lt_32) {
17274 TEST_REQUIRES_ARM_NEON_V8;
17275 for (uint32_t channels = 1; channels < 32; channels++) {
17276 DWConvMicrokernelTester()
17277 .cr(32)
17278 .kr(25)
17279 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017280 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017281 }
17282 }
17283
17284 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32) {
17285 TEST_REQUIRES_ARM_NEON_V8;
17286 for (uint32_t channels = 33; channels < 64; channels++) {
17287 DWConvMicrokernelTester()
17288 .cr(32)
17289 .kr(25)
17290 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017291 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017292 }
17293 }
17294
17295 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmin) {
17296 TEST_REQUIRES_ARM_NEON_V8;
17297 for (uint32_t channels = 33; channels < 64; channels++) {
17298 DWConvMicrokernelTester()
17299 .cr(32)
17300 .kr(25)
17301 .channels(channels)
17302 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017303 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017304 }
17305 }
17306
17307 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmax) {
17308 TEST_REQUIRES_ARM_NEON_V8;
17309 for (uint32_t channels = 33; channels < 64; channels++) {
17310 DWConvMicrokernelTester()
17311 .cr(32)
17312 .kr(25)
17313 .channels(channels)
17314 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017315 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017316 }
17317 }
17318
17319 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel) {
17320 TEST_REQUIRES_ARM_NEON_V8;
17321 for (size_t channels = 1; channels <= 160; channels += 31) {
17322 DWConvMicrokernelTester()
17323 .cr(32)
17324 .kr(25)
17325 .channels(channels)
17326 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017327 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017328 }
17329 }
17330
17331 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_step) {
17332 TEST_REQUIRES_ARM_NEON_V8;
17333 for (size_t channels = 1; channels <= 160; channels += 31) {
17334 for (size_t step = 2; step <= 25; step++) {
17335 DWConvMicrokernelTester()
17336 .cr(32)
17337 .kr(25)
17338 .channels(channels)
17339 .width(3)
17340 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017341 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017342 }
17343 }
17344 }
17345
17346 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_output_stride) {
17347 TEST_REQUIRES_ARM_NEON_V8;
17348 for (size_t channels = 1; channels <= 160; channels += 31) {
17349 DWConvMicrokernelTester()
17350 .cr(32)
17351 .kr(25)
17352 .channels(32)
17353 .width(5)
17354 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080017355 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017356 }
17357 }
17358
17359 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmin) {
17360 TEST_REQUIRES_ARM_NEON_V8;
17361 for (size_t channels = 1; channels <= 160; channels += 31) {
17362 DWConvMicrokernelTester()
17363 .cr(32)
17364 .kr(25)
17365 .channels(channels)
17366 .width(3)
17367 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017368 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017369 }
17370 }
17371
17372 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmax) {
17373 TEST_REQUIRES_ARM_NEON_V8;
17374 for (size_t channels = 1; channels <= 160; channels += 31) {
17375 DWConvMicrokernelTester()
17376 .cr(32)
17377 .kr(25)
17378 .channels(channels)
17379 .width(3)
17380 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017381 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017382 }
17383 }
17384
17385 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, input_offset) {
17386 TEST_REQUIRES_ARM_NEON_V8;
17387 for (uint32_t channels = 64; channels < 512; channels += 96) {
17388 DWConvMicrokernelTester()
17389 .cr(32)
17390 .kr(25)
17391 .channels(channels)
17392 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080017393 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017394 }
17395 }
17396
17397 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, zero) {
17398 TEST_REQUIRES_ARM_NEON_V8;
17399 for (uint32_t mz = 0; mz < 25; mz++) {
17400 for (uint32_t channels = 64; channels < 512; channels += 96) {
17401 DWConvMicrokernelTester()
17402 .cr(32)
17403 .kr(25)
17404 .channels(channels)
17405 .input_offset(592)
17406 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017407 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_minmax_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan59af5812021-06-29 18:09:57 -070017408 }
17409 }
17410 }
17411#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17412
17413
Marat Dukhan98042f22021-06-15 00:43:13 -070017414#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17415 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_eq_8) {
17416 TEST_REQUIRES_X86_SSE2;
17417 DWConvMicrokernelTester()
17418 .cr(8)
17419 .kr(25)
17420 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017421 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017422 }
17423
17424 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8) {
17425 TEST_REQUIRES_X86_SSE2;
17426 for (uint32_t channels = 16; channels < 128; channels += 24) {
17427 DWConvMicrokernelTester()
17428 .cr(8)
17429 .kr(25)
17430 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017431 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017432 }
17433 }
17434
17435 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmin) {
17436 TEST_REQUIRES_X86_SSE2;
17437 for (uint32_t channels = 16; channels < 128; channels += 24) {
17438 DWConvMicrokernelTester()
17439 .cr(8)
17440 .kr(25)
17441 .channels(channels)
17442 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017443 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017444 }
17445 }
17446
17447 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmax) {
17448 TEST_REQUIRES_X86_SSE2;
17449 for (uint32_t channels = 16; channels < 128; channels += 24) {
17450 DWConvMicrokernelTester()
17451 .cr(8)
17452 .kr(25)
17453 .channels(channels)
17454 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017455 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017456 }
17457 }
17458
17459 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_lt_8) {
17460 TEST_REQUIRES_X86_SSE2;
17461 for (uint32_t channels = 1; channels < 8; channels++) {
17462 DWConvMicrokernelTester()
17463 .cr(8)
17464 .kr(25)
17465 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017466 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017467 }
17468 }
17469
17470 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8) {
17471 TEST_REQUIRES_X86_SSE2;
17472 for (uint32_t channels = 9; channels < 16; channels++) {
17473 DWConvMicrokernelTester()
17474 .cr(8)
17475 .kr(25)
17476 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017477 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017478 }
17479 }
17480
17481 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmin) {
17482 TEST_REQUIRES_X86_SSE2;
17483 for (uint32_t channels = 9; channels < 16; channels++) {
17484 DWConvMicrokernelTester()
17485 .cr(8)
17486 .kr(25)
17487 .channels(channels)
17488 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017489 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017490 }
17491 }
17492
17493 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmax) {
17494 TEST_REQUIRES_X86_SSE2;
17495 for (uint32_t channels = 9; channels < 16; channels++) {
17496 DWConvMicrokernelTester()
17497 .cr(8)
17498 .kr(25)
17499 .channels(channels)
17500 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017501 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017502 }
17503 }
17504
17505 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel) {
17506 TEST_REQUIRES_X86_SSE2;
17507 for (size_t channels = 1; channels <= 40; channels += 7) {
17508 DWConvMicrokernelTester()
17509 .cr(8)
17510 .kr(25)
17511 .channels(channels)
17512 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017513 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017514 }
17515 }
17516
17517 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_step) {
17518 TEST_REQUIRES_X86_SSE2;
17519 for (size_t channels = 1; channels <= 40; channels += 7) {
17520 for (size_t step = 2; step <= 25; step++) {
17521 DWConvMicrokernelTester()
17522 .cr(8)
17523 .kr(25)
17524 .channels(channels)
17525 .width(3)
17526 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017527 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017528 }
17529 }
17530 }
17531
17532 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_output_stride) {
17533 TEST_REQUIRES_X86_SSE2;
17534 for (size_t channels = 1; channels <= 40; channels += 7) {
17535 DWConvMicrokernelTester()
17536 .cr(8)
17537 .kr(25)
17538 .channels(8)
17539 .width(5)
17540 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017541 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017542 }
17543 }
17544
17545 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmin) {
17546 TEST_REQUIRES_X86_SSE2;
17547 for (size_t channels = 1; channels <= 40; channels += 7) {
17548 DWConvMicrokernelTester()
17549 .cr(8)
17550 .kr(25)
17551 .channels(channels)
17552 .width(3)
17553 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017554 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017555 }
17556 }
17557
17558 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmax) {
17559 TEST_REQUIRES_X86_SSE2;
17560 for (size_t channels = 1; channels <= 40; channels += 7) {
17561 DWConvMicrokernelTester()
17562 .cr(8)
17563 .kr(25)
17564 .channels(channels)
17565 .width(3)
17566 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017567 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017568 }
17569 }
17570
17571 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, input_offset) {
17572 TEST_REQUIRES_X86_SSE2;
17573 for (uint32_t channels = 16; channels < 128; channels += 24) {
17574 DWConvMicrokernelTester()
17575 .cr(8)
17576 .kr(25)
17577 .channels(channels)
17578 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080017579 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017580 }
17581 }
17582
17583 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, zero) {
17584 TEST_REQUIRES_X86_SSE2;
17585 for (uint32_t mz = 0; mz < 25; mz++) {
17586 for (uint32_t channels = 16; channels < 128; channels += 24) {
17587 DWConvMicrokernelTester()
17588 .cr(8)
17589 .kr(25)
17590 .channels(channels)
17591 .input_offset(176)
17592 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017593 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017594 }
17595 }
17596 }
17597#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17598
17599
17600#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17601 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_eq_16) {
17602 TEST_REQUIRES_X86_SSE2;
17603 DWConvMicrokernelTester()
17604 .cr(16)
17605 .kr(25)
17606 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080017607 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017608 }
17609
17610 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16) {
17611 TEST_REQUIRES_X86_SSE2;
17612 for (uint32_t channels = 32; channels < 256; channels += 48) {
17613 DWConvMicrokernelTester()
17614 .cr(16)
17615 .kr(25)
17616 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017617 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017618 }
17619 }
17620
17621 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmin) {
17622 TEST_REQUIRES_X86_SSE2;
17623 for (uint32_t channels = 32; channels < 256; channels += 48) {
17624 DWConvMicrokernelTester()
17625 .cr(16)
17626 .kr(25)
17627 .channels(channels)
17628 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017629 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017630 }
17631 }
17632
17633 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmax) {
17634 TEST_REQUIRES_X86_SSE2;
17635 for (uint32_t channels = 32; channels < 256; channels += 48) {
17636 DWConvMicrokernelTester()
17637 .cr(16)
17638 .kr(25)
17639 .channels(channels)
17640 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017641 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017642 }
17643 }
17644
17645 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_lt_16) {
17646 TEST_REQUIRES_X86_SSE2;
17647 for (uint32_t channels = 1; channels < 16; channels++) {
17648 DWConvMicrokernelTester()
17649 .cr(16)
17650 .kr(25)
17651 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017652 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017653 }
17654 }
17655
17656 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16) {
17657 TEST_REQUIRES_X86_SSE2;
17658 for (uint32_t channels = 17; channels < 32; channels++) {
17659 DWConvMicrokernelTester()
17660 .cr(16)
17661 .kr(25)
17662 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017663 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017664 }
17665 }
17666
17667 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmin) {
17668 TEST_REQUIRES_X86_SSE2;
17669 for (uint32_t channels = 17; channels < 32; channels++) {
17670 DWConvMicrokernelTester()
17671 .cr(16)
17672 .kr(25)
17673 .channels(channels)
17674 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017675 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017676 }
17677 }
17678
17679 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmax) {
17680 TEST_REQUIRES_X86_SSE2;
17681 for (uint32_t channels = 17; channels < 32; channels++) {
17682 DWConvMicrokernelTester()
17683 .cr(16)
17684 .kr(25)
17685 .channels(channels)
17686 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017687 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017688 }
17689 }
17690
17691 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel) {
17692 TEST_REQUIRES_X86_SSE2;
17693 for (size_t channels = 1; channels <= 80; channels += 15) {
17694 DWConvMicrokernelTester()
17695 .cr(16)
17696 .kr(25)
17697 .channels(channels)
17698 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017699 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017700 }
17701 }
17702
17703 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_step) {
17704 TEST_REQUIRES_X86_SSE2;
17705 for (size_t channels = 1; channels <= 80; channels += 15) {
17706 for (size_t step = 2; step <= 25; step++) {
17707 DWConvMicrokernelTester()
17708 .cr(16)
17709 .kr(25)
17710 .channels(channels)
17711 .width(3)
17712 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017713 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017714 }
17715 }
17716 }
17717
17718 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_output_stride) {
17719 TEST_REQUIRES_X86_SSE2;
17720 for (size_t channels = 1; channels <= 80; channels += 15) {
17721 DWConvMicrokernelTester()
17722 .cr(16)
17723 .kr(25)
17724 .channels(16)
17725 .width(5)
17726 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017727 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017728 }
17729 }
17730
17731 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmin) {
17732 TEST_REQUIRES_X86_SSE2;
17733 for (size_t channels = 1; channels <= 80; channels += 15) {
17734 DWConvMicrokernelTester()
17735 .cr(16)
17736 .kr(25)
17737 .channels(channels)
17738 .width(3)
17739 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017740 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017741 }
17742 }
17743
17744 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmax) {
17745 TEST_REQUIRES_X86_SSE2;
17746 for (size_t channels = 1; channels <= 80; channels += 15) {
17747 DWConvMicrokernelTester()
17748 .cr(16)
17749 .kr(25)
17750 .channels(channels)
17751 .width(3)
17752 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017753 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017754 }
17755 }
17756
17757 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, input_offset) {
17758 TEST_REQUIRES_X86_SSE2;
17759 for (uint32_t channels = 32; channels < 256; channels += 48) {
17760 DWConvMicrokernelTester()
17761 .cr(16)
17762 .kr(25)
17763 .channels(channels)
17764 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080017765 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017766 }
17767 }
17768
17769 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, zero) {
17770 TEST_REQUIRES_X86_SSE2;
17771 for (uint32_t mz = 0; mz < 25; mz++) {
17772 for (uint32_t channels = 32; channels < 256; channels += 48) {
17773 DWConvMicrokernelTester()
17774 .cr(16)
17775 .kr(25)
17776 .channels(channels)
17777 .input_offset(304)
17778 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017779 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017780 }
17781 }
17782 }
17783#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17784
17785
17786#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17787 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_eq_24) {
17788 TEST_REQUIRES_X86_SSE2;
17789 DWConvMicrokernelTester()
17790 .cr(24)
17791 .kr(25)
17792 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080017793 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017794 }
17795
17796 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24) {
17797 TEST_REQUIRES_X86_SSE2;
17798 for (uint32_t channels = 48; channels < 384; channels += 72) {
17799 DWConvMicrokernelTester()
17800 .cr(24)
17801 .kr(25)
17802 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017803 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017804 }
17805 }
17806
17807 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmin) {
17808 TEST_REQUIRES_X86_SSE2;
17809 for (uint32_t channels = 48; channels < 384; channels += 72) {
17810 DWConvMicrokernelTester()
17811 .cr(24)
17812 .kr(25)
17813 .channels(channels)
17814 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017815 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017816 }
17817 }
17818
17819 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmax) {
17820 TEST_REQUIRES_X86_SSE2;
17821 for (uint32_t channels = 48; channels < 384; channels += 72) {
17822 DWConvMicrokernelTester()
17823 .cr(24)
17824 .kr(25)
17825 .channels(channels)
17826 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017827 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017828 }
17829 }
17830
17831 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_lt_24) {
17832 TEST_REQUIRES_X86_SSE2;
17833 for (uint32_t channels = 1; channels < 24; channels++) {
17834 DWConvMicrokernelTester()
17835 .cr(24)
17836 .kr(25)
17837 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017838 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017839 }
17840 }
17841
17842 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24) {
17843 TEST_REQUIRES_X86_SSE2;
17844 for (uint32_t channels = 25; channels < 48; channels++) {
17845 DWConvMicrokernelTester()
17846 .cr(24)
17847 .kr(25)
17848 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017849 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017850 }
17851 }
17852
17853 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmin) {
17854 TEST_REQUIRES_X86_SSE2;
17855 for (uint32_t channels = 25; channels < 48; channels++) {
17856 DWConvMicrokernelTester()
17857 .cr(24)
17858 .kr(25)
17859 .channels(channels)
17860 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017861 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017862 }
17863 }
17864
17865 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmax) {
17866 TEST_REQUIRES_X86_SSE2;
17867 for (uint32_t channels = 25; channels < 48; channels++) {
17868 DWConvMicrokernelTester()
17869 .cr(24)
17870 .kr(25)
17871 .channels(channels)
17872 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017873 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017874 }
17875 }
17876
17877 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel) {
17878 TEST_REQUIRES_X86_SSE2;
17879 for (size_t channels = 1; channels <= 120; channels += 23) {
17880 DWConvMicrokernelTester()
17881 .cr(24)
17882 .kr(25)
17883 .channels(channels)
17884 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017885 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017886 }
17887 }
17888
17889 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_step) {
17890 TEST_REQUIRES_X86_SSE2;
17891 for (size_t channels = 1; channels <= 120; channels += 23) {
17892 for (size_t step = 2; step <= 25; step++) {
17893 DWConvMicrokernelTester()
17894 .cr(24)
17895 .kr(25)
17896 .channels(channels)
17897 .width(3)
17898 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080017899 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017900 }
17901 }
17902 }
17903
17904 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_output_stride) {
17905 TEST_REQUIRES_X86_SSE2;
17906 for (size_t channels = 1; channels <= 120; channels += 23) {
17907 DWConvMicrokernelTester()
17908 .cr(24)
17909 .kr(25)
17910 .channels(24)
17911 .width(5)
17912 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080017913 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017914 }
17915 }
17916
17917 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmin) {
17918 TEST_REQUIRES_X86_SSE2;
17919 for (size_t channels = 1; channels <= 120; channels += 23) {
17920 DWConvMicrokernelTester()
17921 .cr(24)
17922 .kr(25)
17923 .channels(channels)
17924 .width(3)
17925 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017926 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017927 }
17928 }
17929
17930 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmax) {
17931 TEST_REQUIRES_X86_SSE2;
17932 for (size_t channels = 1; channels <= 120; channels += 23) {
17933 DWConvMicrokernelTester()
17934 .cr(24)
17935 .kr(25)
17936 .channels(channels)
17937 .width(3)
17938 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017939 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017940 }
17941 }
17942
17943 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, input_offset) {
17944 TEST_REQUIRES_X86_SSE2;
17945 for (uint32_t channels = 48; channels < 384; channels += 72) {
17946 DWConvMicrokernelTester()
17947 .cr(24)
17948 .kr(25)
17949 .channels(channels)
17950 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080017951 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017952 }
17953 }
17954
17955 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, zero) {
17956 TEST_REQUIRES_X86_SSE2;
17957 for (uint32_t mz = 0; mz < 25; mz++) {
17958 for (uint32_t channels = 48; channels < 384; channels += 72) {
17959 DWConvMicrokernelTester()
17960 .cr(24)
17961 .kr(25)
17962 .channels(channels)
17963 .input_offset(464)
17964 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017965 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070017966 }
17967 }
17968 }
17969#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17970
17971
17972#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -070017973 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_eq_8) {
17974 TEST_REQUIRES_X86_SSE2;
17975 DWConvMicrokernelTester()
17976 .cr(8)
17977 .kr(25)
17978 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017979 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070017980 }
17981
17982 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8) {
17983 TEST_REQUIRES_X86_SSE2;
17984 for (uint32_t channels = 16; channels < 128; channels += 24) {
17985 DWConvMicrokernelTester()
17986 .cr(8)
17987 .kr(25)
17988 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080017989 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070017990 }
17991 }
17992
17993 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
17994 TEST_REQUIRES_X86_SSE2;
17995 for (uint32_t channels = 16; channels < 128; channels += 24) {
17996 DWConvMicrokernelTester()
17997 .cr(8)
17998 .kr(25)
17999 .channels(channels)
18000 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018001 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018002 }
18003 }
18004
18005 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
18006 TEST_REQUIRES_X86_SSE2;
18007 for (uint32_t channels = 16; channels < 128; channels += 24) {
18008 DWConvMicrokernelTester()
18009 .cr(8)
18010 .kr(25)
18011 .channels(channels)
18012 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018013 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018014 }
18015 }
18016
18017 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_lt_8) {
18018 TEST_REQUIRES_X86_SSE2;
18019 for (uint32_t channels = 1; channels < 8; channels++) {
18020 DWConvMicrokernelTester()
18021 .cr(8)
18022 .kr(25)
18023 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018024 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018025 }
18026 }
18027
18028 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8) {
18029 TEST_REQUIRES_X86_SSE2;
18030 for (uint32_t channels = 9; channels < 16; channels++) {
18031 DWConvMicrokernelTester()
18032 .cr(8)
18033 .kr(25)
18034 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018035 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018036 }
18037 }
18038
18039 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
18040 TEST_REQUIRES_X86_SSE2;
18041 for (uint32_t channels = 9; channels < 16; channels++) {
18042 DWConvMicrokernelTester()
18043 .cr(8)
18044 .kr(25)
18045 .channels(channels)
18046 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018047 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018048 }
18049 }
18050
18051 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
18052 TEST_REQUIRES_X86_SSE2;
18053 for (uint32_t channels = 9; channels < 16; channels++) {
18054 DWConvMicrokernelTester()
18055 .cr(8)
18056 .kr(25)
18057 .channels(channels)
18058 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018059 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018060 }
18061 }
18062
18063 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel) {
18064 TEST_REQUIRES_X86_SSE2;
18065 for (size_t channels = 1; channels <= 40; channels += 7) {
18066 DWConvMicrokernelTester()
18067 .cr(8)
18068 .kr(25)
18069 .channels(channels)
18070 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018071 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018072 }
18073 }
18074
18075 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_step) {
18076 TEST_REQUIRES_X86_SSE2;
18077 for (size_t channels = 1; channels <= 40; channels += 7) {
18078 for (size_t step = 2; step <= 25; step++) {
18079 DWConvMicrokernelTester()
18080 .cr(8)
18081 .kr(25)
18082 .channels(channels)
18083 .width(3)
18084 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018085 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018086 }
18087 }
18088 }
18089
18090 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
18091 TEST_REQUIRES_X86_SSE2;
18092 for (size_t channels = 1; channels <= 40; channels += 7) {
18093 DWConvMicrokernelTester()
18094 .cr(8)
18095 .kr(25)
18096 .channels(8)
18097 .width(5)
18098 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018099 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018100 }
18101 }
18102
18103 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
18104 TEST_REQUIRES_X86_SSE2;
18105 for (size_t channels = 1; channels <= 40; channels += 7) {
18106 DWConvMicrokernelTester()
18107 .cr(8)
18108 .kr(25)
18109 .channels(channels)
18110 .width(3)
18111 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018112 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018113 }
18114 }
18115
18116 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
18117 TEST_REQUIRES_X86_SSE2;
18118 for (size_t channels = 1; channels <= 40; channels += 7) {
18119 DWConvMicrokernelTester()
18120 .cr(8)
18121 .kr(25)
18122 .channels(channels)
18123 .width(3)
18124 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018125 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018126 }
18127 }
18128
18129 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, input_offset) {
18130 TEST_REQUIRES_X86_SSE2;
18131 for (uint32_t channels = 16; channels < 128; channels += 24) {
18132 DWConvMicrokernelTester()
18133 .cr(8)
18134 .kr(25)
18135 .channels(channels)
18136 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080018137 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018138 }
18139 }
18140
18141 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, zero) {
18142 TEST_REQUIRES_X86_SSE2;
18143 for (uint32_t mz = 0; mz < 25; mz++) {
18144 for (uint32_t channels = 16; channels < 128; channels += 24) {
18145 DWConvMicrokernelTester()
18146 .cr(8)
18147 .kr(25)
18148 .channels(channels)
18149 .input_offset(176)
18150 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018151 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018152 }
18153 }
18154 }
18155#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18156
18157
18158#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18159 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_eq_16) {
18160 TEST_REQUIRES_X86_SSE2;
18161 DWConvMicrokernelTester()
18162 .cr(16)
18163 .kr(25)
18164 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080018165 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018166 }
18167
18168 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16) {
18169 TEST_REQUIRES_X86_SSE2;
18170 for (uint32_t channels = 32; channels < 256; channels += 48) {
18171 DWConvMicrokernelTester()
18172 .cr(16)
18173 .kr(25)
18174 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018175 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018176 }
18177 }
18178
18179 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
18180 TEST_REQUIRES_X86_SSE2;
18181 for (uint32_t channels = 32; channels < 256; channels += 48) {
18182 DWConvMicrokernelTester()
18183 .cr(16)
18184 .kr(25)
18185 .channels(channels)
18186 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018187 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018188 }
18189 }
18190
18191 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
18192 TEST_REQUIRES_X86_SSE2;
18193 for (uint32_t channels = 32; channels < 256; channels += 48) {
18194 DWConvMicrokernelTester()
18195 .cr(16)
18196 .kr(25)
18197 .channels(channels)
18198 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018199 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018200 }
18201 }
18202
18203 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_lt_16) {
18204 TEST_REQUIRES_X86_SSE2;
18205 for (uint32_t channels = 1; channels < 16; channels++) {
18206 DWConvMicrokernelTester()
18207 .cr(16)
18208 .kr(25)
18209 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018210 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018211 }
18212 }
18213
18214 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16) {
18215 TEST_REQUIRES_X86_SSE2;
18216 for (uint32_t channels = 17; channels < 32; channels++) {
18217 DWConvMicrokernelTester()
18218 .cr(16)
18219 .kr(25)
18220 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018221 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018222 }
18223 }
18224
18225 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
18226 TEST_REQUIRES_X86_SSE2;
18227 for (uint32_t channels = 17; channels < 32; channels++) {
18228 DWConvMicrokernelTester()
18229 .cr(16)
18230 .kr(25)
18231 .channels(channels)
18232 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018233 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018234 }
18235 }
18236
18237 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
18238 TEST_REQUIRES_X86_SSE2;
18239 for (uint32_t channels = 17; channels < 32; channels++) {
18240 DWConvMicrokernelTester()
18241 .cr(16)
18242 .kr(25)
18243 .channels(channels)
18244 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018245 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018246 }
18247 }
18248
18249 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel) {
18250 TEST_REQUIRES_X86_SSE2;
18251 for (size_t channels = 1; channels <= 80; channels += 15) {
18252 DWConvMicrokernelTester()
18253 .cr(16)
18254 .kr(25)
18255 .channels(channels)
18256 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018257 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018258 }
18259 }
18260
18261 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_step) {
18262 TEST_REQUIRES_X86_SSE2;
18263 for (size_t channels = 1; channels <= 80; channels += 15) {
18264 for (size_t step = 2; step <= 25; step++) {
18265 DWConvMicrokernelTester()
18266 .cr(16)
18267 .kr(25)
18268 .channels(channels)
18269 .width(3)
18270 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018271 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018272 }
18273 }
18274 }
18275
18276 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
18277 TEST_REQUIRES_X86_SSE2;
18278 for (size_t channels = 1; channels <= 80; channels += 15) {
18279 DWConvMicrokernelTester()
18280 .cr(16)
18281 .kr(25)
18282 .channels(16)
18283 .width(5)
18284 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018285 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018286 }
18287 }
18288
18289 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
18290 TEST_REQUIRES_X86_SSE2;
18291 for (size_t channels = 1; channels <= 80; channels += 15) {
18292 DWConvMicrokernelTester()
18293 .cr(16)
18294 .kr(25)
18295 .channels(channels)
18296 .width(3)
18297 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018298 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018299 }
18300 }
18301
18302 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
18303 TEST_REQUIRES_X86_SSE2;
18304 for (size_t channels = 1; channels <= 80; channels += 15) {
18305 DWConvMicrokernelTester()
18306 .cr(16)
18307 .kr(25)
18308 .channels(channels)
18309 .width(3)
18310 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018311 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018312 }
18313 }
18314
18315 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, input_offset) {
18316 TEST_REQUIRES_X86_SSE2;
18317 for (uint32_t channels = 32; channels < 256; channels += 48) {
18318 DWConvMicrokernelTester()
18319 .cr(16)
18320 .kr(25)
18321 .channels(channels)
18322 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080018323 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018324 }
18325 }
18326
18327 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, zero) {
18328 TEST_REQUIRES_X86_SSE2;
18329 for (uint32_t mz = 0; mz < 25; mz++) {
18330 for (uint32_t channels = 32; channels < 256; channels += 48) {
18331 DWConvMicrokernelTester()
18332 .cr(16)
18333 .kr(25)
18334 .channels(channels)
18335 .input_offset(304)
18336 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018337 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_minmax_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018338 }
18339 }
18340 }
18341#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18342
18343
18344#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan98042f22021-06-15 00:43:13 -070018345 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_eq_8) {
18346 TEST_REQUIRES_X86_SSE41;
18347 DWConvMicrokernelTester()
18348 .cr(8)
18349 .kr(25)
18350 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018351 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018352 }
18353
18354 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8) {
18355 TEST_REQUIRES_X86_SSE41;
18356 for (uint32_t channels = 16; channels < 128; channels += 24) {
18357 DWConvMicrokernelTester()
18358 .cr(8)
18359 .kr(25)
18360 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018361 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018362 }
18363 }
18364
18365 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmin) {
18366 TEST_REQUIRES_X86_SSE41;
18367 for (uint32_t channels = 16; channels < 128; channels += 24) {
18368 DWConvMicrokernelTester()
18369 .cr(8)
18370 .kr(25)
18371 .channels(channels)
18372 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018373 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018374 }
18375 }
18376
18377 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmax) {
18378 TEST_REQUIRES_X86_SSE41;
18379 for (uint32_t channels = 16; channels < 128; channels += 24) {
18380 DWConvMicrokernelTester()
18381 .cr(8)
18382 .kr(25)
18383 .channels(channels)
18384 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018385 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018386 }
18387 }
18388
18389 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_lt_8) {
18390 TEST_REQUIRES_X86_SSE41;
18391 for (uint32_t channels = 1; channels < 8; channels++) {
18392 DWConvMicrokernelTester()
18393 .cr(8)
18394 .kr(25)
18395 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018396 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018397 }
18398 }
18399
18400 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8) {
18401 TEST_REQUIRES_X86_SSE41;
18402 for (uint32_t channels = 9; channels < 16; channels++) {
18403 DWConvMicrokernelTester()
18404 .cr(8)
18405 .kr(25)
18406 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018407 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018408 }
18409 }
18410
18411 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmin) {
18412 TEST_REQUIRES_X86_SSE41;
18413 for (uint32_t channels = 9; channels < 16; channels++) {
18414 DWConvMicrokernelTester()
18415 .cr(8)
18416 .kr(25)
18417 .channels(channels)
18418 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018419 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018420 }
18421 }
18422
18423 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmax) {
18424 TEST_REQUIRES_X86_SSE41;
18425 for (uint32_t channels = 9; channels < 16; channels++) {
18426 DWConvMicrokernelTester()
18427 .cr(8)
18428 .kr(25)
18429 .channels(channels)
18430 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018431 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018432 }
18433 }
18434
18435 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel) {
18436 TEST_REQUIRES_X86_SSE41;
18437 for (size_t channels = 1; channels <= 40; channels += 7) {
18438 DWConvMicrokernelTester()
18439 .cr(8)
18440 .kr(25)
18441 .channels(channels)
18442 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018443 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018444 }
18445 }
18446
18447 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_step) {
18448 TEST_REQUIRES_X86_SSE41;
18449 for (size_t channels = 1; channels <= 40; channels += 7) {
18450 for (size_t step = 2; step <= 25; step++) {
18451 DWConvMicrokernelTester()
18452 .cr(8)
18453 .kr(25)
18454 .channels(channels)
18455 .width(3)
18456 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018457 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018458 }
18459 }
18460 }
18461
18462 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_output_stride) {
18463 TEST_REQUIRES_X86_SSE41;
18464 for (size_t channels = 1; channels <= 40; channels += 7) {
18465 DWConvMicrokernelTester()
18466 .cr(8)
18467 .kr(25)
18468 .channels(8)
18469 .width(5)
18470 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018471 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018472 }
18473 }
18474
18475 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmin) {
18476 TEST_REQUIRES_X86_SSE41;
18477 for (size_t channels = 1; channels <= 40; channels += 7) {
18478 DWConvMicrokernelTester()
18479 .cr(8)
18480 .kr(25)
18481 .channels(channels)
18482 .width(3)
18483 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018484 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018485 }
18486 }
18487
18488 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmax) {
18489 TEST_REQUIRES_X86_SSE41;
18490 for (size_t channels = 1; channels <= 40; channels += 7) {
18491 DWConvMicrokernelTester()
18492 .cr(8)
18493 .kr(25)
18494 .channels(channels)
18495 .width(3)
18496 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018497 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018498 }
18499 }
18500
18501 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, input_offset) {
18502 TEST_REQUIRES_X86_SSE41;
18503 for (uint32_t channels = 16; channels < 128; channels += 24) {
18504 DWConvMicrokernelTester()
18505 .cr(8)
18506 .kr(25)
18507 .channels(channels)
18508 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080018509 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018510 }
18511 }
18512
18513 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, zero) {
18514 TEST_REQUIRES_X86_SSE41;
18515 for (uint32_t mz = 0; mz < 25; mz++) {
18516 for (uint32_t channels = 16; channels < 128; channels += 24) {
18517 DWConvMicrokernelTester()
18518 .cr(8)
18519 .kr(25)
18520 .channels(channels)
18521 .input_offset(176)
18522 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018523 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018524 }
18525 }
18526 }
18527#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18528
18529
18530#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18531 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_eq_16) {
18532 TEST_REQUIRES_X86_SSE41;
18533 DWConvMicrokernelTester()
18534 .cr(16)
18535 .kr(25)
18536 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080018537 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018538 }
18539
18540 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16) {
18541 TEST_REQUIRES_X86_SSE41;
18542 for (uint32_t channels = 32; channels < 256; channels += 48) {
18543 DWConvMicrokernelTester()
18544 .cr(16)
18545 .kr(25)
18546 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018547 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018548 }
18549 }
18550
18551 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmin) {
18552 TEST_REQUIRES_X86_SSE41;
18553 for (uint32_t channels = 32; channels < 256; channels += 48) {
18554 DWConvMicrokernelTester()
18555 .cr(16)
18556 .kr(25)
18557 .channels(channels)
18558 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018559 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018560 }
18561 }
18562
18563 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmax) {
18564 TEST_REQUIRES_X86_SSE41;
18565 for (uint32_t channels = 32; channels < 256; channels += 48) {
18566 DWConvMicrokernelTester()
18567 .cr(16)
18568 .kr(25)
18569 .channels(channels)
18570 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018571 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018572 }
18573 }
18574
18575 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_lt_16) {
18576 TEST_REQUIRES_X86_SSE41;
18577 for (uint32_t channels = 1; channels < 16; channels++) {
18578 DWConvMicrokernelTester()
18579 .cr(16)
18580 .kr(25)
18581 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018582 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018583 }
18584 }
18585
18586 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16) {
18587 TEST_REQUIRES_X86_SSE41;
18588 for (uint32_t channels = 17; channels < 32; channels++) {
18589 DWConvMicrokernelTester()
18590 .cr(16)
18591 .kr(25)
18592 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018593 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018594 }
18595 }
18596
18597 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmin) {
18598 TEST_REQUIRES_X86_SSE41;
18599 for (uint32_t channels = 17; channels < 32; channels++) {
18600 DWConvMicrokernelTester()
18601 .cr(16)
18602 .kr(25)
18603 .channels(channels)
18604 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018605 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018606 }
18607 }
18608
18609 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmax) {
18610 TEST_REQUIRES_X86_SSE41;
18611 for (uint32_t channels = 17; channels < 32; channels++) {
18612 DWConvMicrokernelTester()
18613 .cr(16)
18614 .kr(25)
18615 .channels(channels)
18616 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018617 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018618 }
18619 }
18620
18621 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel) {
18622 TEST_REQUIRES_X86_SSE41;
18623 for (size_t channels = 1; channels <= 80; channels += 15) {
18624 DWConvMicrokernelTester()
18625 .cr(16)
18626 .kr(25)
18627 .channels(channels)
18628 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018629 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018630 }
18631 }
18632
18633 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_step) {
18634 TEST_REQUIRES_X86_SSE41;
18635 for (size_t channels = 1; channels <= 80; channels += 15) {
18636 for (size_t step = 2; step <= 25; step++) {
18637 DWConvMicrokernelTester()
18638 .cr(16)
18639 .kr(25)
18640 .channels(channels)
18641 .width(3)
18642 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018643 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018644 }
18645 }
18646 }
18647
18648 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_output_stride) {
18649 TEST_REQUIRES_X86_SSE41;
18650 for (size_t channels = 1; channels <= 80; channels += 15) {
18651 DWConvMicrokernelTester()
18652 .cr(16)
18653 .kr(25)
18654 .channels(16)
18655 .width(5)
18656 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018657 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018658 }
18659 }
18660
18661 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmin) {
18662 TEST_REQUIRES_X86_SSE41;
18663 for (size_t channels = 1; channels <= 80; channels += 15) {
18664 DWConvMicrokernelTester()
18665 .cr(16)
18666 .kr(25)
18667 .channels(channels)
18668 .width(3)
18669 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018670 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018671 }
18672 }
18673
18674 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmax) {
18675 TEST_REQUIRES_X86_SSE41;
18676 for (size_t channels = 1; channels <= 80; channels += 15) {
18677 DWConvMicrokernelTester()
18678 .cr(16)
18679 .kr(25)
18680 .channels(channels)
18681 .width(3)
18682 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018683 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018684 }
18685 }
18686
18687 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, input_offset) {
18688 TEST_REQUIRES_X86_SSE41;
18689 for (uint32_t channels = 32; channels < 256; channels += 48) {
18690 DWConvMicrokernelTester()
18691 .cr(16)
18692 .kr(25)
18693 .channels(channels)
18694 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080018695 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018696 }
18697 }
18698
18699 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, zero) {
18700 TEST_REQUIRES_X86_SSE41;
18701 for (uint32_t mz = 0; mz < 25; mz++) {
18702 for (uint32_t channels = 32; channels < 256; channels += 48) {
18703 DWConvMicrokernelTester()
18704 .cr(16)
18705 .kr(25)
18706 .channels(channels)
18707 .input_offset(304)
18708 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018709 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018710 }
18711 }
18712 }
18713#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18714
18715
18716#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18717 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_eq_24) {
18718 TEST_REQUIRES_X86_SSE41;
18719 DWConvMicrokernelTester()
18720 .cr(24)
18721 .kr(25)
18722 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080018723 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018724 }
18725
18726 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24) {
18727 TEST_REQUIRES_X86_SSE41;
18728 for (uint32_t channels = 48; channels < 384; channels += 72) {
18729 DWConvMicrokernelTester()
18730 .cr(24)
18731 .kr(25)
18732 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018733 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018734 }
18735 }
18736
18737 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmin) {
18738 TEST_REQUIRES_X86_SSE41;
18739 for (uint32_t channels = 48; channels < 384; channels += 72) {
18740 DWConvMicrokernelTester()
18741 .cr(24)
18742 .kr(25)
18743 .channels(channels)
18744 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018745 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018746 }
18747 }
18748
18749 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmax) {
18750 TEST_REQUIRES_X86_SSE41;
18751 for (uint32_t channels = 48; channels < 384; channels += 72) {
18752 DWConvMicrokernelTester()
18753 .cr(24)
18754 .kr(25)
18755 .channels(channels)
18756 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018757 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018758 }
18759 }
18760
18761 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_lt_24) {
18762 TEST_REQUIRES_X86_SSE41;
18763 for (uint32_t channels = 1; channels < 24; channels++) {
18764 DWConvMicrokernelTester()
18765 .cr(24)
18766 .kr(25)
18767 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018768 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018769 }
18770 }
18771
18772 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24) {
18773 TEST_REQUIRES_X86_SSE41;
18774 for (uint32_t channels = 25; channels < 48; channels++) {
18775 DWConvMicrokernelTester()
18776 .cr(24)
18777 .kr(25)
18778 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018779 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018780 }
18781 }
18782
18783 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmin) {
18784 TEST_REQUIRES_X86_SSE41;
18785 for (uint32_t channels = 25; channels < 48; channels++) {
18786 DWConvMicrokernelTester()
18787 .cr(24)
18788 .kr(25)
18789 .channels(channels)
18790 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018791 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018792 }
18793 }
18794
18795 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmax) {
18796 TEST_REQUIRES_X86_SSE41;
18797 for (uint32_t channels = 25; channels < 48; channels++) {
18798 DWConvMicrokernelTester()
18799 .cr(24)
18800 .kr(25)
18801 .channels(channels)
18802 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018803 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018804 }
18805 }
18806
18807 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel) {
18808 TEST_REQUIRES_X86_SSE41;
18809 for (size_t channels = 1; channels <= 120; channels += 23) {
18810 DWConvMicrokernelTester()
18811 .cr(24)
18812 .kr(25)
18813 .channels(channels)
18814 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018815 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018816 }
18817 }
18818
18819 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_step) {
18820 TEST_REQUIRES_X86_SSE41;
18821 for (size_t channels = 1; channels <= 120; channels += 23) {
18822 for (size_t step = 2; step <= 25; step++) {
18823 DWConvMicrokernelTester()
18824 .cr(24)
18825 .kr(25)
18826 .channels(channels)
18827 .width(3)
18828 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080018829 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018830 }
18831 }
18832 }
18833
18834 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_output_stride) {
18835 TEST_REQUIRES_X86_SSE41;
18836 for (size_t channels = 1; channels <= 120; channels += 23) {
18837 DWConvMicrokernelTester()
18838 .cr(24)
18839 .kr(25)
18840 .channels(24)
18841 .width(5)
18842 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080018843 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018844 }
18845 }
18846
18847 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmin) {
18848 TEST_REQUIRES_X86_SSE41;
18849 for (size_t channels = 1; channels <= 120; channels += 23) {
18850 DWConvMicrokernelTester()
18851 .cr(24)
18852 .kr(25)
18853 .channels(channels)
18854 .width(3)
18855 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018856 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018857 }
18858 }
18859
18860 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmax) {
18861 TEST_REQUIRES_X86_SSE41;
18862 for (size_t channels = 1; channels <= 120; channels += 23) {
18863 DWConvMicrokernelTester()
18864 .cr(24)
18865 .kr(25)
18866 .channels(channels)
18867 .width(3)
18868 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018869 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018870 }
18871 }
18872
18873 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, input_offset) {
18874 TEST_REQUIRES_X86_SSE41;
18875 for (uint32_t channels = 48; channels < 384; channels += 72) {
18876 DWConvMicrokernelTester()
18877 .cr(24)
18878 .kr(25)
18879 .channels(channels)
18880 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080018881 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018882 }
18883 }
18884
18885 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, zero) {
18886 TEST_REQUIRES_X86_SSE41;
18887 for (uint32_t mz = 0; mz < 25; mz++) {
18888 for (uint32_t channels = 48; channels < 384; channels += 72) {
18889 DWConvMicrokernelTester()
18890 .cr(24)
18891 .kr(25)
18892 .channels(channels)
18893 .input_offset(464)
18894 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018895 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070018896 }
18897 }
18898 }
18899#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18900
18901
18902#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -070018903 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_eq_8) {
18904 TEST_REQUIRES_X86_SSE41;
18905 DWConvMicrokernelTester()
18906 .cr(8)
18907 .kr(25)
18908 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018909 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018910 }
18911
18912 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8) {
18913 TEST_REQUIRES_X86_SSE41;
18914 for (uint32_t channels = 16; channels < 128; channels += 24) {
18915 DWConvMicrokernelTester()
18916 .cr(8)
18917 .kr(25)
18918 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018919 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018920 }
18921 }
18922
18923 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
18924 TEST_REQUIRES_X86_SSE41;
18925 for (uint32_t channels = 16; channels < 128; channels += 24) {
18926 DWConvMicrokernelTester()
18927 .cr(8)
18928 .kr(25)
18929 .channels(channels)
18930 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018931 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018932 }
18933 }
18934
18935 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
18936 TEST_REQUIRES_X86_SSE41;
18937 for (uint32_t channels = 16; channels < 128; channels += 24) {
18938 DWConvMicrokernelTester()
18939 .cr(8)
18940 .kr(25)
18941 .channels(channels)
18942 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018943 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018944 }
18945 }
18946
18947 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_lt_8) {
18948 TEST_REQUIRES_X86_SSE41;
18949 for (uint32_t channels = 1; channels < 8; channels++) {
18950 DWConvMicrokernelTester()
18951 .cr(8)
18952 .kr(25)
18953 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018954 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018955 }
18956 }
18957
18958 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8) {
18959 TEST_REQUIRES_X86_SSE41;
18960 for (uint32_t channels = 9; channels < 16; channels++) {
18961 DWConvMicrokernelTester()
18962 .cr(8)
18963 .kr(25)
18964 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080018965 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018966 }
18967 }
18968
18969 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
18970 TEST_REQUIRES_X86_SSE41;
18971 for (uint32_t channels = 9; channels < 16; channels++) {
18972 DWConvMicrokernelTester()
18973 .cr(8)
18974 .kr(25)
18975 .channels(channels)
18976 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018977 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018978 }
18979 }
18980
18981 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
18982 TEST_REQUIRES_X86_SSE41;
18983 for (uint32_t channels = 9; channels < 16; channels++) {
18984 DWConvMicrokernelTester()
18985 .cr(8)
18986 .kr(25)
18987 .channels(channels)
18988 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018989 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070018990 }
18991 }
18992
18993 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel) {
18994 TEST_REQUIRES_X86_SSE41;
18995 for (size_t channels = 1; channels <= 40; channels += 7) {
18996 DWConvMicrokernelTester()
18997 .cr(8)
18998 .kr(25)
18999 .channels(channels)
19000 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019001 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019002 }
19003 }
19004
19005 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_step) {
19006 TEST_REQUIRES_X86_SSE41;
19007 for (size_t channels = 1; channels <= 40; channels += 7) {
19008 for (size_t step = 2; step <= 25; step++) {
19009 DWConvMicrokernelTester()
19010 .cr(8)
19011 .kr(25)
19012 .channels(channels)
19013 .width(3)
19014 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019015 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019016 }
19017 }
19018 }
19019
19020 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
19021 TEST_REQUIRES_X86_SSE41;
19022 for (size_t channels = 1; channels <= 40; channels += 7) {
19023 DWConvMicrokernelTester()
19024 .cr(8)
19025 .kr(25)
19026 .channels(8)
19027 .width(5)
19028 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019029 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019030 }
19031 }
19032
19033 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
19034 TEST_REQUIRES_X86_SSE41;
19035 for (size_t channels = 1; channels <= 40; channels += 7) {
19036 DWConvMicrokernelTester()
19037 .cr(8)
19038 .kr(25)
19039 .channels(channels)
19040 .width(3)
19041 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019042 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019043 }
19044 }
19045
19046 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
19047 TEST_REQUIRES_X86_SSE41;
19048 for (size_t channels = 1; channels <= 40; channels += 7) {
19049 DWConvMicrokernelTester()
19050 .cr(8)
19051 .kr(25)
19052 .channels(channels)
19053 .width(3)
19054 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019055 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019056 }
19057 }
19058
19059 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, input_offset) {
19060 TEST_REQUIRES_X86_SSE41;
19061 for (uint32_t channels = 16; channels < 128; channels += 24) {
19062 DWConvMicrokernelTester()
19063 .cr(8)
19064 .kr(25)
19065 .channels(channels)
19066 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080019067 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019068 }
19069 }
19070
19071 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, zero) {
19072 TEST_REQUIRES_X86_SSE41;
19073 for (uint32_t mz = 0; mz < 25; mz++) {
19074 for (uint32_t channels = 16; channels < 128; channels += 24) {
19075 DWConvMicrokernelTester()
19076 .cr(8)
19077 .kr(25)
19078 .channels(channels)
19079 .input_offset(176)
19080 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019081 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019082 }
19083 }
19084 }
19085#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19086
19087
19088#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19089 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_eq_16) {
19090 TEST_REQUIRES_X86_SSE41;
19091 DWConvMicrokernelTester()
19092 .cr(16)
19093 .kr(25)
19094 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080019095 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019096 }
19097
19098 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16) {
19099 TEST_REQUIRES_X86_SSE41;
19100 for (uint32_t channels = 32; channels < 256; channels += 48) {
19101 DWConvMicrokernelTester()
19102 .cr(16)
19103 .kr(25)
19104 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019105 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019106 }
19107 }
19108
19109 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
19110 TEST_REQUIRES_X86_SSE41;
19111 for (uint32_t channels = 32; channels < 256; channels += 48) {
19112 DWConvMicrokernelTester()
19113 .cr(16)
19114 .kr(25)
19115 .channels(channels)
19116 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019117 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019118 }
19119 }
19120
19121 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
19122 TEST_REQUIRES_X86_SSE41;
19123 for (uint32_t channels = 32; channels < 256; channels += 48) {
19124 DWConvMicrokernelTester()
19125 .cr(16)
19126 .kr(25)
19127 .channels(channels)
19128 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019129 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019130 }
19131 }
19132
19133 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_lt_16) {
19134 TEST_REQUIRES_X86_SSE41;
19135 for (uint32_t channels = 1; channels < 16; channels++) {
19136 DWConvMicrokernelTester()
19137 .cr(16)
19138 .kr(25)
19139 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019140 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019141 }
19142 }
19143
19144 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16) {
19145 TEST_REQUIRES_X86_SSE41;
19146 for (uint32_t channels = 17; channels < 32; channels++) {
19147 DWConvMicrokernelTester()
19148 .cr(16)
19149 .kr(25)
19150 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019151 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019152 }
19153 }
19154
19155 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
19156 TEST_REQUIRES_X86_SSE41;
19157 for (uint32_t channels = 17; channels < 32; channels++) {
19158 DWConvMicrokernelTester()
19159 .cr(16)
19160 .kr(25)
19161 .channels(channels)
19162 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019163 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019164 }
19165 }
19166
19167 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
19168 TEST_REQUIRES_X86_SSE41;
19169 for (uint32_t channels = 17; channels < 32; channels++) {
19170 DWConvMicrokernelTester()
19171 .cr(16)
19172 .kr(25)
19173 .channels(channels)
19174 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019175 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019176 }
19177 }
19178
19179 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel) {
19180 TEST_REQUIRES_X86_SSE41;
19181 for (size_t channels = 1; channels <= 80; channels += 15) {
19182 DWConvMicrokernelTester()
19183 .cr(16)
19184 .kr(25)
19185 .channels(channels)
19186 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019187 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019188 }
19189 }
19190
19191 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_step) {
19192 TEST_REQUIRES_X86_SSE41;
19193 for (size_t channels = 1; channels <= 80; channels += 15) {
19194 for (size_t step = 2; step <= 25; step++) {
19195 DWConvMicrokernelTester()
19196 .cr(16)
19197 .kr(25)
19198 .channels(channels)
19199 .width(3)
19200 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019201 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019202 }
19203 }
19204 }
19205
19206 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
19207 TEST_REQUIRES_X86_SSE41;
19208 for (size_t channels = 1; channels <= 80; channels += 15) {
19209 DWConvMicrokernelTester()
19210 .cr(16)
19211 .kr(25)
19212 .channels(16)
19213 .width(5)
19214 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019215 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019216 }
19217 }
19218
19219 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
19220 TEST_REQUIRES_X86_SSE41;
19221 for (size_t channels = 1; channels <= 80; channels += 15) {
19222 DWConvMicrokernelTester()
19223 .cr(16)
19224 .kr(25)
19225 .channels(channels)
19226 .width(3)
19227 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019228 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019229 }
19230 }
19231
19232 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
19233 TEST_REQUIRES_X86_SSE41;
19234 for (size_t channels = 1; channels <= 80; channels += 15) {
19235 DWConvMicrokernelTester()
19236 .cr(16)
19237 .kr(25)
19238 .channels(channels)
19239 .width(3)
19240 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019241 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019242 }
19243 }
19244
19245 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, input_offset) {
19246 TEST_REQUIRES_X86_SSE41;
19247 for (uint32_t channels = 32; channels < 256; channels += 48) {
19248 DWConvMicrokernelTester()
19249 .cr(16)
19250 .kr(25)
19251 .channels(channels)
19252 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080019253 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019254 }
19255 }
19256
19257 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, zero) {
19258 TEST_REQUIRES_X86_SSE41;
19259 for (uint32_t mz = 0; mz < 25; mz++) {
19260 for (uint32_t channels = 32; channels < 256; channels += 48) {
19261 DWConvMicrokernelTester()
19262 .cr(16)
19263 .kr(25)
19264 .channels(channels)
19265 .input_offset(304)
19266 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019267 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019268 }
19269 }
19270 }
19271#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19272
19273
19274#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan98042f22021-06-15 00:43:13 -070019275 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_eq_8) {
19276 TEST_REQUIRES_X86_AVX;
19277 DWConvMicrokernelTester()
19278 .cr(8)
19279 .kr(25)
19280 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019281 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019282 }
19283
19284 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8) {
19285 TEST_REQUIRES_X86_AVX;
19286 for (uint32_t channels = 16; channels < 128; channels += 24) {
19287 DWConvMicrokernelTester()
19288 .cr(8)
19289 .kr(25)
19290 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019291 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019292 }
19293 }
19294
19295 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmin) {
19296 TEST_REQUIRES_X86_AVX;
19297 for (uint32_t channels = 16; channels < 128; channels += 24) {
19298 DWConvMicrokernelTester()
19299 .cr(8)
19300 .kr(25)
19301 .channels(channels)
19302 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019303 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019304 }
19305 }
19306
19307 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmax) {
19308 TEST_REQUIRES_X86_AVX;
19309 for (uint32_t channels = 16; channels < 128; channels += 24) {
19310 DWConvMicrokernelTester()
19311 .cr(8)
19312 .kr(25)
19313 .channels(channels)
19314 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019315 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019316 }
19317 }
19318
19319 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_lt_8) {
19320 TEST_REQUIRES_X86_AVX;
19321 for (uint32_t channels = 1; channels < 8; channels++) {
19322 DWConvMicrokernelTester()
19323 .cr(8)
19324 .kr(25)
19325 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019326 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019327 }
19328 }
19329
19330 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8) {
19331 TEST_REQUIRES_X86_AVX;
19332 for (uint32_t channels = 9; channels < 16; channels++) {
19333 DWConvMicrokernelTester()
19334 .cr(8)
19335 .kr(25)
19336 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019337 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019338 }
19339 }
19340
19341 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmin) {
19342 TEST_REQUIRES_X86_AVX;
19343 for (uint32_t channels = 9; channels < 16; channels++) {
19344 DWConvMicrokernelTester()
19345 .cr(8)
19346 .kr(25)
19347 .channels(channels)
19348 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019349 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019350 }
19351 }
19352
19353 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmax) {
19354 TEST_REQUIRES_X86_AVX;
19355 for (uint32_t channels = 9; channels < 16; channels++) {
19356 DWConvMicrokernelTester()
19357 .cr(8)
19358 .kr(25)
19359 .channels(channels)
19360 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019361 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019362 }
19363 }
19364
19365 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel) {
19366 TEST_REQUIRES_X86_AVX;
19367 for (size_t channels = 1; channels <= 40; channels += 7) {
19368 DWConvMicrokernelTester()
19369 .cr(8)
19370 .kr(25)
19371 .channels(channels)
19372 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019373 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019374 }
19375 }
19376
19377 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_step) {
19378 TEST_REQUIRES_X86_AVX;
19379 for (size_t channels = 1; channels <= 40; channels += 7) {
19380 for (size_t step = 2; step <= 25; step++) {
19381 DWConvMicrokernelTester()
19382 .cr(8)
19383 .kr(25)
19384 .channels(channels)
19385 .width(3)
19386 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019387 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019388 }
19389 }
19390 }
19391
19392 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_output_stride) {
19393 TEST_REQUIRES_X86_AVX;
19394 for (size_t channels = 1; channels <= 40; channels += 7) {
19395 DWConvMicrokernelTester()
19396 .cr(8)
19397 .kr(25)
19398 .channels(8)
19399 .width(5)
19400 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019401 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019402 }
19403 }
19404
19405 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmin) {
19406 TEST_REQUIRES_X86_AVX;
19407 for (size_t channels = 1; channels <= 40; channels += 7) {
19408 DWConvMicrokernelTester()
19409 .cr(8)
19410 .kr(25)
19411 .channels(channels)
19412 .width(3)
19413 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019414 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019415 }
19416 }
19417
19418 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmax) {
19419 TEST_REQUIRES_X86_AVX;
19420 for (size_t channels = 1; channels <= 40; channels += 7) {
19421 DWConvMicrokernelTester()
19422 .cr(8)
19423 .kr(25)
19424 .channels(channels)
19425 .width(3)
19426 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019427 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019428 }
19429 }
19430
19431 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, input_offset) {
19432 TEST_REQUIRES_X86_AVX;
19433 for (uint32_t channels = 16; channels < 128; channels += 24) {
19434 DWConvMicrokernelTester()
19435 .cr(8)
19436 .kr(25)
19437 .channels(channels)
19438 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080019439 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019440 }
19441 }
19442
19443 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, zero) {
19444 TEST_REQUIRES_X86_AVX;
19445 for (uint32_t mz = 0; mz < 25; mz++) {
19446 for (uint32_t channels = 16; channels < 128; channels += 24) {
19447 DWConvMicrokernelTester()
19448 .cr(8)
19449 .kr(25)
19450 .channels(channels)
19451 .input_offset(176)
19452 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019453 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019454 }
19455 }
19456 }
19457#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19458
19459
19460#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19461 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_eq_16) {
19462 TEST_REQUIRES_X86_AVX;
19463 DWConvMicrokernelTester()
19464 .cr(16)
19465 .kr(25)
19466 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080019467 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019468 }
19469
19470 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16) {
19471 TEST_REQUIRES_X86_AVX;
19472 for (uint32_t channels = 32; channels < 256; channels += 48) {
19473 DWConvMicrokernelTester()
19474 .cr(16)
19475 .kr(25)
19476 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019477 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019478 }
19479 }
19480
19481 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmin) {
19482 TEST_REQUIRES_X86_AVX;
19483 for (uint32_t channels = 32; channels < 256; channels += 48) {
19484 DWConvMicrokernelTester()
19485 .cr(16)
19486 .kr(25)
19487 .channels(channels)
19488 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019489 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019490 }
19491 }
19492
19493 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmax) {
19494 TEST_REQUIRES_X86_AVX;
19495 for (uint32_t channels = 32; channels < 256; channels += 48) {
19496 DWConvMicrokernelTester()
19497 .cr(16)
19498 .kr(25)
19499 .channels(channels)
19500 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019501 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019502 }
19503 }
19504
19505 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_lt_16) {
19506 TEST_REQUIRES_X86_AVX;
19507 for (uint32_t channels = 1; channels < 16; channels++) {
19508 DWConvMicrokernelTester()
19509 .cr(16)
19510 .kr(25)
19511 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019512 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019513 }
19514 }
19515
19516 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16) {
19517 TEST_REQUIRES_X86_AVX;
19518 for (uint32_t channels = 17; channels < 32; channels++) {
19519 DWConvMicrokernelTester()
19520 .cr(16)
19521 .kr(25)
19522 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019523 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019524 }
19525 }
19526
19527 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmin) {
19528 TEST_REQUIRES_X86_AVX;
19529 for (uint32_t channels = 17; channels < 32; channels++) {
19530 DWConvMicrokernelTester()
19531 .cr(16)
19532 .kr(25)
19533 .channels(channels)
19534 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019535 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019536 }
19537 }
19538
19539 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmax) {
19540 TEST_REQUIRES_X86_AVX;
19541 for (uint32_t channels = 17; channels < 32; channels++) {
19542 DWConvMicrokernelTester()
19543 .cr(16)
19544 .kr(25)
19545 .channels(channels)
19546 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019547 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019548 }
19549 }
19550
19551 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel) {
19552 TEST_REQUIRES_X86_AVX;
19553 for (size_t channels = 1; channels <= 80; channels += 15) {
19554 DWConvMicrokernelTester()
19555 .cr(16)
19556 .kr(25)
19557 .channels(channels)
19558 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019559 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019560 }
19561 }
19562
19563 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_step) {
19564 TEST_REQUIRES_X86_AVX;
19565 for (size_t channels = 1; channels <= 80; channels += 15) {
19566 for (size_t step = 2; step <= 25; step++) {
19567 DWConvMicrokernelTester()
19568 .cr(16)
19569 .kr(25)
19570 .channels(channels)
19571 .width(3)
19572 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019573 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019574 }
19575 }
19576 }
19577
19578 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_output_stride) {
19579 TEST_REQUIRES_X86_AVX;
19580 for (size_t channels = 1; channels <= 80; channels += 15) {
19581 DWConvMicrokernelTester()
19582 .cr(16)
19583 .kr(25)
19584 .channels(16)
19585 .width(5)
19586 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019587 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019588 }
19589 }
19590
19591 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmin) {
19592 TEST_REQUIRES_X86_AVX;
19593 for (size_t channels = 1; channels <= 80; channels += 15) {
19594 DWConvMicrokernelTester()
19595 .cr(16)
19596 .kr(25)
19597 .channels(channels)
19598 .width(3)
19599 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019600 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019601 }
19602 }
19603
19604 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmax) {
19605 TEST_REQUIRES_X86_AVX;
19606 for (size_t channels = 1; channels <= 80; channels += 15) {
19607 DWConvMicrokernelTester()
19608 .cr(16)
19609 .kr(25)
19610 .channels(channels)
19611 .width(3)
19612 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019613 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019614 }
19615 }
19616
19617 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, input_offset) {
19618 TEST_REQUIRES_X86_AVX;
19619 for (uint32_t channels = 32; channels < 256; channels += 48) {
19620 DWConvMicrokernelTester()
19621 .cr(16)
19622 .kr(25)
19623 .channels(channels)
19624 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080019625 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019626 }
19627 }
19628
19629 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, zero) {
19630 TEST_REQUIRES_X86_AVX;
19631 for (uint32_t mz = 0; mz < 25; mz++) {
19632 for (uint32_t channels = 32; channels < 256; channels += 48) {
19633 DWConvMicrokernelTester()
19634 .cr(16)
19635 .kr(25)
19636 .channels(channels)
19637 .input_offset(304)
19638 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019639 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019640 }
19641 }
19642 }
19643#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19644
19645
19646#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19647 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_eq_24) {
19648 TEST_REQUIRES_X86_AVX;
19649 DWConvMicrokernelTester()
19650 .cr(24)
19651 .kr(25)
19652 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080019653 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019654 }
19655
19656 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24) {
19657 TEST_REQUIRES_X86_AVX;
19658 for (uint32_t channels = 48; channels < 384; channels += 72) {
19659 DWConvMicrokernelTester()
19660 .cr(24)
19661 .kr(25)
19662 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019663 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019664 }
19665 }
19666
19667 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmin) {
19668 TEST_REQUIRES_X86_AVX;
19669 for (uint32_t channels = 48; channels < 384; channels += 72) {
19670 DWConvMicrokernelTester()
19671 .cr(24)
19672 .kr(25)
19673 .channels(channels)
19674 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019675 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019676 }
19677 }
19678
19679 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmax) {
19680 TEST_REQUIRES_X86_AVX;
19681 for (uint32_t channels = 48; channels < 384; channels += 72) {
19682 DWConvMicrokernelTester()
19683 .cr(24)
19684 .kr(25)
19685 .channels(channels)
19686 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019687 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019688 }
19689 }
19690
19691 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_lt_24) {
19692 TEST_REQUIRES_X86_AVX;
19693 for (uint32_t channels = 1; channels < 24; channels++) {
19694 DWConvMicrokernelTester()
19695 .cr(24)
19696 .kr(25)
19697 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019698 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019699 }
19700 }
19701
19702 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24) {
19703 TEST_REQUIRES_X86_AVX;
19704 for (uint32_t channels = 25; channels < 48; channels++) {
19705 DWConvMicrokernelTester()
19706 .cr(24)
19707 .kr(25)
19708 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019709 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019710 }
19711 }
19712
19713 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmin) {
19714 TEST_REQUIRES_X86_AVX;
19715 for (uint32_t channels = 25; channels < 48; channels++) {
19716 DWConvMicrokernelTester()
19717 .cr(24)
19718 .kr(25)
19719 .channels(channels)
19720 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019721 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019722 }
19723 }
19724
19725 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmax) {
19726 TEST_REQUIRES_X86_AVX;
19727 for (uint32_t channels = 25; channels < 48; channels++) {
19728 DWConvMicrokernelTester()
19729 .cr(24)
19730 .kr(25)
19731 .channels(channels)
19732 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019733 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019734 }
19735 }
19736
19737 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel) {
19738 TEST_REQUIRES_X86_AVX;
19739 for (size_t channels = 1; channels <= 120; channels += 23) {
19740 DWConvMicrokernelTester()
19741 .cr(24)
19742 .kr(25)
19743 .channels(channels)
19744 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019745 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019746 }
19747 }
19748
19749 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_step) {
19750 TEST_REQUIRES_X86_AVX;
19751 for (size_t channels = 1; channels <= 120; channels += 23) {
19752 for (size_t step = 2; step <= 25; step++) {
19753 DWConvMicrokernelTester()
19754 .cr(24)
19755 .kr(25)
19756 .channels(channels)
19757 .width(3)
19758 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019759 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019760 }
19761 }
19762 }
19763
19764 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_output_stride) {
19765 TEST_REQUIRES_X86_AVX;
19766 for (size_t channels = 1; channels <= 120; channels += 23) {
19767 DWConvMicrokernelTester()
19768 .cr(24)
19769 .kr(25)
19770 .channels(24)
19771 .width(5)
19772 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080019773 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019774 }
19775 }
19776
19777 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmin) {
19778 TEST_REQUIRES_X86_AVX;
19779 for (size_t channels = 1; channels <= 120; channels += 23) {
19780 DWConvMicrokernelTester()
19781 .cr(24)
19782 .kr(25)
19783 .channels(channels)
19784 .width(3)
19785 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019786 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019787 }
19788 }
19789
19790 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmax) {
19791 TEST_REQUIRES_X86_AVX;
19792 for (size_t channels = 1; channels <= 120; channels += 23) {
19793 DWConvMicrokernelTester()
19794 .cr(24)
19795 .kr(25)
19796 .channels(channels)
19797 .width(3)
19798 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019799 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019800 }
19801 }
19802
19803 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, input_offset) {
19804 TEST_REQUIRES_X86_AVX;
19805 for (uint32_t channels = 48; channels < 384; channels += 72) {
19806 DWConvMicrokernelTester()
19807 .cr(24)
19808 .kr(25)
19809 .channels(channels)
19810 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080019811 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019812 }
19813 }
19814
19815 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, zero) {
19816 TEST_REQUIRES_X86_AVX;
19817 for (uint32_t mz = 0; mz < 25; mz++) {
19818 for (uint32_t channels = 48; channels < 384; channels += 72) {
19819 DWConvMicrokernelTester()
19820 .cr(24)
19821 .kr(25)
19822 .channels(channels)
19823 .input_offset(464)
19824 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019825 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070019826 }
19827 }
19828 }
19829#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19830
19831
19832#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan09668562021-07-26 16:52:20 -070019833 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_eq_8) {
19834 TEST_REQUIRES_X86_AVX;
19835 DWConvMicrokernelTester()
19836 .cr(8)
19837 .kr(25)
19838 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019839 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019840 }
19841
19842 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8) {
19843 TEST_REQUIRES_X86_AVX;
19844 for (uint32_t channels = 16; channels < 128; channels += 24) {
19845 DWConvMicrokernelTester()
19846 .cr(8)
19847 .kr(25)
19848 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019849 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019850 }
19851 }
19852
19853 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmin) {
19854 TEST_REQUIRES_X86_AVX;
19855 for (uint32_t channels = 16; channels < 128; channels += 24) {
19856 DWConvMicrokernelTester()
19857 .cr(8)
19858 .kr(25)
19859 .channels(channels)
19860 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019861 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019862 }
19863 }
19864
19865 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmax) {
19866 TEST_REQUIRES_X86_AVX;
19867 for (uint32_t channels = 16; channels < 128; channels += 24) {
19868 DWConvMicrokernelTester()
19869 .cr(8)
19870 .kr(25)
19871 .channels(channels)
19872 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019873 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019874 }
19875 }
19876
19877 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_lt_8) {
19878 TEST_REQUIRES_X86_AVX;
19879 for (uint32_t channels = 1; channels < 8; channels++) {
19880 DWConvMicrokernelTester()
19881 .cr(8)
19882 .kr(25)
19883 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019884 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019885 }
19886 }
19887
19888 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8) {
19889 TEST_REQUIRES_X86_AVX;
19890 for (uint32_t channels = 9; channels < 16; channels++) {
19891 DWConvMicrokernelTester()
19892 .cr(8)
19893 .kr(25)
19894 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080019895 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019896 }
19897 }
19898
19899 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
19900 TEST_REQUIRES_X86_AVX;
19901 for (uint32_t channels = 9; channels < 16; channels++) {
19902 DWConvMicrokernelTester()
19903 .cr(8)
19904 .kr(25)
19905 .channels(channels)
19906 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019907 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019908 }
19909 }
19910
19911 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
19912 TEST_REQUIRES_X86_AVX;
19913 for (uint32_t channels = 9; channels < 16; channels++) {
19914 DWConvMicrokernelTester()
19915 .cr(8)
19916 .kr(25)
19917 .channels(channels)
19918 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019919 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019920 }
19921 }
19922
19923 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel) {
19924 TEST_REQUIRES_X86_AVX;
19925 for (size_t channels = 1; channels <= 40; channels += 7) {
19926 DWConvMicrokernelTester()
19927 .cr(8)
19928 .kr(25)
19929 .channels(channels)
19930 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019931 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019932 }
19933 }
19934
19935 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_step) {
19936 TEST_REQUIRES_X86_AVX;
19937 for (size_t channels = 1; channels <= 40; channels += 7) {
19938 for (size_t step = 2; step <= 25; step++) {
19939 DWConvMicrokernelTester()
19940 .cr(8)
19941 .kr(25)
19942 .channels(channels)
19943 .width(3)
19944 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080019945 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019946 }
19947 }
19948 }
19949
19950 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
19951 TEST_REQUIRES_X86_AVX;
19952 for (size_t channels = 1; channels <= 40; channels += 7) {
19953 DWConvMicrokernelTester()
19954 .cr(8)
19955 .kr(25)
19956 .channels(8)
19957 .width(5)
19958 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019959 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019960 }
19961 }
19962
19963 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
19964 TEST_REQUIRES_X86_AVX;
19965 for (size_t channels = 1; channels <= 40; channels += 7) {
19966 DWConvMicrokernelTester()
19967 .cr(8)
19968 .kr(25)
19969 .channels(channels)
19970 .width(3)
19971 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019972 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019973 }
19974 }
19975
19976 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
19977 TEST_REQUIRES_X86_AVX;
19978 for (size_t channels = 1; channels <= 40; channels += 7) {
19979 DWConvMicrokernelTester()
19980 .cr(8)
19981 .kr(25)
19982 .channels(channels)
19983 .width(3)
19984 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019985 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019986 }
19987 }
19988
19989 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, input_offset) {
19990 TEST_REQUIRES_X86_AVX;
19991 for (uint32_t channels = 16; channels < 128; channels += 24) {
19992 DWConvMicrokernelTester()
19993 .cr(8)
19994 .kr(25)
19995 .channels(channels)
19996 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080019997 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070019998 }
19999 }
20000
20001 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, zero) {
20002 TEST_REQUIRES_X86_AVX;
20003 for (uint32_t mz = 0; mz < 25; mz++) {
20004 for (uint32_t channels = 16; channels < 128; channels += 24) {
20005 DWConvMicrokernelTester()
20006 .cr(8)
20007 .kr(25)
20008 .channels(channels)
20009 .input_offset(176)
20010 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020011 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020012 }
20013 }
20014 }
20015#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20016
20017
20018#if XNN_ARCH_X86 || XNN_ARCH_X86_64
20019 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_eq_16) {
20020 TEST_REQUIRES_X86_AVX;
20021 DWConvMicrokernelTester()
20022 .cr(16)
20023 .kr(25)
20024 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080020025 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020026 }
20027
20028 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16) {
20029 TEST_REQUIRES_X86_AVX;
20030 for (uint32_t channels = 32; channels < 256; channels += 48) {
20031 DWConvMicrokernelTester()
20032 .cr(16)
20033 .kr(25)
20034 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020035 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020036 }
20037 }
20038
20039 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmin) {
20040 TEST_REQUIRES_X86_AVX;
20041 for (uint32_t channels = 32; channels < 256; channels += 48) {
20042 DWConvMicrokernelTester()
20043 .cr(16)
20044 .kr(25)
20045 .channels(channels)
20046 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020047 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020048 }
20049 }
20050
20051 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmax) {
20052 TEST_REQUIRES_X86_AVX;
20053 for (uint32_t channels = 32; channels < 256; channels += 48) {
20054 DWConvMicrokernelTester()
20055 .cr(16)
20056 .kr(25)
20057 .channels(channels)
20058 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020059 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020060 }
20061 }
20062
20063 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_lt_16) {
20064 TEST_REQUIRES_X86_AVX;
20065 for (uint32_t channels = 1; channels < 16; channels++) {
20066 DWConvMicrokernelTester()
20067 .cr(16)
20068 .kr(25)
20069 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020070 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020071 }
20072 }
20073
20074 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16) {
20075 TEST_REQUIRES_X86_AVX;
20076 for (uint32_t channels = 17; channels < 32; channels++) {
20077 DWConvMicrokernelTester()
20078 .cr(16)
20079 .kr(25)
20080 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020081 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020082 }
20083 }
20084
20085 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
20086 TEST_REQUIRES_X86_AVX;
20087 for (uint32_t channels = 17; channels < 32; channels++) {
20088 DWConvMicrokernelTester()
20089 .cr(16)
20090 .kr(25)
20091 .channels(channels)
20092 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020093 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020094 }
20095 }
20096
20097 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
20098 TEST_REQUIRES_X86_AVX;
20099 for (uint32_t channels = 17; channels < 32; channels++) {
20100 DWConvMicrokernelTester()
20101 .cr(16)
20102 .kr(25)
20103 .channels(channels)
20104 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020105 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020106 }
20107 }
20108
20109 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel) {
20110 TEST_REQUIRES_X86_AVX;
20111 for (size_t channels = 1; channels <= 80; channels += 15) {
20112 DWConvMicrokernelTester()
20113 .cr(16)
20114 .kr(25)
20115 .channels(channels)
20116 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020117 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020118 }
20119 }
20120
20121 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_step) {
20122 TEST_REQUIRES_X86_AVX;
20123 for (size_t channels = 1; channels <= 80; channels += 15) {
20124 for (size_t step = 2; step <= 25; step++) {
20125 DWConvMicrokernelTester()
20126 .cr(16)
20127 .kr(25)
20128 .channels(channels)
20129 .width(3)
20130 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020131 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020132 }
20133 }
20134 }
20135
20136 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
20137 TEST_REQUIRES_X86_AVX;
20138 for (size_t channels = 1; channels <= 80; channels += 15) {
20139 DWConvMicrokernelTester()
20140 .cr(16)
20141 .kr(25)
20142 .channels(16)
20143 .width(5)
20144 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020145 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020146 }
20147 }
20148
20149 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
20150 TEST_REQUIRES_X86_AVX;
20151 for (size_t channels = 1; channels <= 80; channels += 15) {
20152 DWConvMicrokernelTester()
20153 .cr(16)
20154 .kr(25)
20155 .channels(channels)
20156 .width(3)
20157 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020158 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020159 }
20160 }
20161
20162 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
20163 TEST_REQUIRES_X86_AVX;
20164 for (size_t channels = 1; channels <= 80; channels += 15) {
20165 DWConvMicrokernelTester()
20166 .cr(16)
20167 .kr(25)
20168 .channels(channels)
20169 .width(3)
20170 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020171 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020172 }
20173 }
20174
20175 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, input_offset) {
20176 TEST_REQUIRES_X86_AVX;
20177 for (uint32_t channels = 32; channels < 256; channels += 48) {
20178 DWConvMicrokernelTester()
20179 .cr(16)
20180 .kr(25)
20181 .channels(channels)
20182 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080020183 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020184 }
20185 }
20186
20187 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, zero) {
20188 TEST_REQUIRES_X86_AVX;
20189 for (uint32_t mz = 0; mz < 25; mz++) {
20190 for (uint32_t channels = 32; channels < 256; channels += 48) {
20191 DWConvMicrokernelTester()
20192 .cr(16)
20193 .kr(25)
20194 .channels(channels)
20195 .input_offset(304)
20196 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020197 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020198 }
20199 }
20200 }
20201#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20202
20203
20204#if XNN_ARCH_X86 || XNN_ARCH_X86_64
20205 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_eq_8) {
20206 TEST_REQUIRES_X86_XOP;
20207 DWConvMicrokernelTester()
20208 .cr(8)
20209 .kr(25)
20210 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020211 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020212 }
20213
20214 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8) {
20215 TEST_REQUIRES_X86_XOP;
20216 for (uint32_t channels = 16; channels < 128; channels += 24) {
20217 DWConvMicrokernelTester()
20218 .cr(8)
20219 .kr(25)
20220 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020221 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020222 }
20223 }
20224
20225 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmin) {
20226 TEST_REQUIRES_X86_XOP;
20227 for (uint32_t channels = 16; channels < 128; channels += 24) {
20228 DWConvMicrokernelTester()
20229 .cr(8)
20230 .kr(25)
20231 .channels(channels)
20232 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020233 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020234 }
20235 }
20236
20237 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmax) {
20238 TEST_REQUIRES_X86_XOP;
20239 for (uint32_t channels = 16; channels < 128; channels += 24) {
20240 DWConvMicrokernelTester()
20241 .cr(8)
20242 .kr(25)
20243 .channels(channels)
20244 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020245 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020246 }
20247 }
20248
20249 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_lt_8) {
20250 TEST_REQUIRES_X86_XOP;
20251 for (uint32_t channels = 1; channels < 8; channels++) {
20252 DWConvMicrokernelTester()
20253 .cr(8)
20254 .kr(25)
20255 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020256 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020257 }
20258 }
20259
20260 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8) {
20261 TEST_REQUIRES_X86_XOP;
20262 for (uint32_t channels = 9; channels < 16; channels++) {
20263 DWConvMicrokernelTester()
20264 .cr(8)
20265 .kr(25)
20266 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020267 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020268 }
20269 }
20270
20271 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
20272 TEST_REQUIRES_X86_XOP;
20273 for (uint32_t channels = 9; channels < 16; channels++) {
20274 DWConvMicrokernelTester()
20275 .cr(8)
20276 .kr(25)
20277 .channels(channels)
20278 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020279 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020280 }
20281 }
20282
20283 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
20284 TEST_REQUIRES_X86_XOP;
20285 for (uint32_t channels = 9; channels < 16; channels++) {
20286 DWConvMicrokernelTester()
20287 .cr(8)
20288 .kr(25)
20289 .channels(channels)
20290 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020291 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020292 }
20293 }
20294
20295 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel) {
20296 TEST_REQUIRES_X86_XOP;
20297 for (size_t channels = 1; channels <= 40; channels += 7) {
20298 DWConvMicrokernelTester()
20299 .cr(8)
20300 .kr(25)
20301 .channels(channels)
20302 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020303 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020304 }
20305 }
20306
20307 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_step) {
20308 TEST_REQUIRES_X86_XOP;
20309 for (size_t channels = 1; channels <= 40; channels += 7) {
20310 for (size_t step = 2; step <= 25; step++) {
20311 DWConvMicrokernelTester()
20312 .cr(8)
20313 .kr(25)
20314 .channels(channels)
20315 .width(3)
20316 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020317 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020318 }
20319 }
20320 }
20321
20322 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
20323 TEST_REQUIRES_X86_XOP;
20324 for (size_t channels = 1; channels <= 40; channels += 7) {
20325 DWConvMicrokernelTester()
20326 .cr(8)
20327 .kr(25)
20328 .channels(8)
20329 .width(5)
20330 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020331 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020332 }
20333 }
20334
20335 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
20336 TEST_REQUIRES_X86_XOP;
20337 for (size_t channels = 1; channels <= 40; channels += 7) {
20338 DWConvMicrokernelTester()
20339 .cr(8)
20340 .kr(25)
20341 .channels(channels)
20342 .width(3)
20343 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020344 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020345 }
20346 }
20347
20348 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
20349 TEST_REQUIRES_X86_XOP;
20350 for (size_t channels = 1; channels <= 40; channels += 7) {
20351 DWConvMicrokernelTester()
20352 .cr(8)
20353 .kr(25)
20354 .channels(channels)
20355 .width(3)
20356 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020357 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020358 }
20359 }
20360
20361 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, input_offset) {
20362 TEST_REQUIRES_X86_XOP;
20363 for (uint32_t channels = 16; channels < 128; channels += 24) {
20364 DWConvMicrokernelTester()
20365 .cr(8)
20366 .kr(25)
20367 .channels(channels)
20368 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080020369 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020370 }
20371 }
20372
20373 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, zero) {
20374 TEST_REQUIRES_X86_XOP;
20375 for (uint32_t mz = 0; mz < 25; mz++) {
20376 for (uint32_t channels = 16; channels < 128; channels += 24) {
20377 DWConvMicrokernelTester()
20378 .cr(8)
20379 .kr(25)
20380 .channels(channels)
20381 .input_offset(176)
20382 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020383 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020384 }
20385 }
20386 }
20387#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20388
20389
20390#if XNN_ARCH_X86 || XNN_ARCH_X86_64
20391 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_eq_16) {
20392 TEST_REQUIRES_X86_XOP;
20393 DWConvMicrokernelTester()
20394 .cr(16)
20395 .kr(25)
20396 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080020397 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020398 }
20399
20400 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16) {
20401 TEST_REQUIRES_X86_XOP;
20402 for (uint32_t channels = 32; channels < 256; channels += 48) {
20403 DWConvMicrokernelTester()
20404 .cr(16)
20405 .kr(25)
20406 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020407 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020408 }
20409 }
20410
20411 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmin) {
20412 TEST_REQUIRES_X86_XOP;
20413 for (uint32_t channels = 32; channels < 256; channels += 48) {
20414 DWConvMicrokernelTester()
20415 .cr(16)
20416 .kr(25)
20417 .channels(channels)
20418 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020419 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020420 }
20421 }
20422
20423 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmax) {
20424 TEST_REQUIRES_X86_XOP;
20425 for (uint32_t channels = 32; channels < 256; channels += 48) {
20426 DWConvMicrokernelTester()
20427 .cr(16)
20428 .kr(25)
20429 .channels(channels)
20430 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020431 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020432 }
20433 }
20434
20435 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_lt_16) {
20436 TEST_REQUIRES_X86_XOP;
20437 for (uint32_t channels = 1; channels < 16; channels++) {
20438 DWConvMicrokernelTester()
20439 .cr(16)
20440 .kr(25)
20441 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020442 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020443 }
20444 }
20445
20446 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16) {
20447 TEST_REQUIRES_X86_XOP;
20448 for (uint32_t channels = 17; channels < 32; channels++) {
20449 DWConvMicrokernelTester()
20450 .cr(16)
20451 .kr(25)
20452 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020453 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020454 }
20455 }
20456
20457 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
20458 TEST_REQUIRES_X86_XOP;
20459 for (uint32_t channels = 17; channels < 32; channels++) {
20460 DWConvMicrokernelTester()
20461 .cr(16)
20462 .kr(25)
20463 .channels(channels)
20464 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020465 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020466 }
20467 }
20468
20469 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
20470 TEST_REQUIRES_X86_XOP;
20471 for (uint32_t channels = 17; channels < 32; channels++) {
20472 DWConvMicrokernelTester()
20473 .cr(16)
20474 .kr(25)
20475 .channels(channels)
20476 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020477 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020478 }
20479 }
20480
20481 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel) {
20482 TEST_REQUIRES_X86_XOP;
20483 for (size_t channels = 1; channels <= 80; channels += 15) {
20484 DWConvMicrokernelTester()
20485 .cr(16)
20486 .kr(25)
20487 .channels(channels)
20488 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020489 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020490 }
20491 }
20492
20493 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_step) {
20494 TEST_REQUIRES_X86_XOP;
20495 for (size_t channels = 1; channels <= 80; channels += 15) {
20496 for (size_t step = 2; step <= 25; step++) {
20497 DWConvMicrokernelTester()
20498 .cr(16)
20499 .kr(25)
20500 .channels(channels)
20501 .width(3)
20502 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020503 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020504 }
20505 }
20506 }
20507
20508 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
20509 TEST_REQUIRES_X86_XOP;
20510 for (size_t channels = 1; channels <= 80; channels += 15) {
20511 DWConvMicrokernelTester()
20512 .cr(16)
20513 .kr(25)
20514 .channels(16)
20515 .width(5)
20516 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020517 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020518 }
20519 }
20520
20521 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
20522 TEST_REQUIRES_X86_XOP;
20523 for (size_t channels = 1; channels <= 80; channels += 15) {
20524 DWConvMicrokernelTester()
20525 .cr(16)
20526 .kr(25)
20527 .channels(channels)
20528 .width(3)
20529 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020530 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020531 }
20532 }
20533
20534 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
20535 TEST_REQUIRES_X86_XOP;
20536 for (size_t channels = 1; channels <= 80; channels += 15) {
20537 DWConvMicrokernelTester()
20538 .cr(16)
20539 .kr(25)
20540 .channels(channels)
20541 .width(3)
20542 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020543 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020544 }
20545 }
20546
20547 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, input_offset) {
20548 TEST_REQUIRES_X86_XOP;
20549 for (uint32_t channels = 32; channels < 256; channels += 48) {
20550 DWConvMicrokernelTester()
20551 .cr(16)
20552 .kr(25)
20553 .channels(channels)
20554 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080020555 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020556 }
20557 }
20558
20559 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, zero) {
20560 TEST_REQUIRES_X86_XOP;
20561 for (uint32_t mz = 0; mz < 25; mz++) {
20562 for (uint32_t channels = 32; channels < 256; channels += 48) {
20563 DWConvMicrokernelTester()
20564 .cr(16)
20565 .kr(25)
20566 .channels(channels)
20567 .input_offset(304)
20568 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020569 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan09668562021-07-26 16:52:20 -070020570 }
20571 }
20572 }
20573#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20574
20575
20576#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan881ab022021-07-28 13:49:26 -070020577 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_eq_16) {
Marat Dukhan82286892021-06-04 17:27:27 -070020578 TEST_REQUIRES_X86_AVX2;
20579 DWConvMicrokernelTester()
20580 .cr(16)
20581 .kr(25)
20582 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080020583 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020584 }
20585
Marat Dukhan881ab022021-07-28 13:49:26 -070020586 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16) {
Marat Dukhan82286892021-06-04 17:27:27 -070020587 TEST_REQUIRES_X86_AVX2;
20588 for (uint32_t channels = 32; channels < 256; channels += 48) {
20589 DWConvMicrokernelTester()
20590 .cr(16)
20591 .kr(25)
20592 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020593 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020594 }
20595 }
20596
Marat Dukhan881ab022021-07-28 13:49:26 -070020597 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
Marat Dukhan82286892021-06-04 17:27:27 -070020598 TEST_REQUIRES_X86_AVX2;
20599 for (uint32_t channels = 32; channels < 256; channels += 48) {
20600 DWConvMicrokernelTester()
20601 .cr(16)
20602 .kr(25)
20603 .channels(channels)
20604 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020605 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020606 }
20607 }
20608
Marat Dukhan881ab022021-07-28 13:49:26 -070020609 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
Marat Dukhan82286892021-06-04 17:27:27 -070020610 TEST_REQUIRES_X86_AVX2;
20611 for (uint32_t channels = 32; channels < 256; channels += 48) {
20612 DWConvMicrokernelTester()
20613 .cr(16)
20614 .kr(25)
20615 .channels(channels)
20616 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020617 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020618 }
20619 }
20620
Marat Dukhan881ab022021-07-28 13:49:26 -070020621 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_lt_16) {
Marat Dukhan82286892021-06-04 17:27:27 -070020622 TEST_REQUIRES_X86_AVX2;
20623 for (uint32_t channels = 1; channels < 16; channels++) {
20624 DWConvMicrokernelTester()
20625 .cr(16)
20626 .kr(25)
20627 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020628 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020629 }
20630 }
20631
Marat Dukhan881ab022021-07-28 13:49:26 -070020632 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16) {
Marat Dukhan82286892021-06-04 17:27:27 -070020633 TEST_REQUIRES_X86_AVX2;
20634 for (uint32_t channels = 17; channels < 32; channels++) {
20635 DWConvMicrokernelTester()
20636 .cr(16)
20637 .kr(25)
20638 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020639 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020640 }
20641 }
20642
Marat Dukhan881ab022021-07-28 13:49:26 -070020643 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
Marat Dukhan82286892021-06-04 17:27:27 -070020644 TEST_REQUIRES_X86_AVX2;
20645 for (uint32_t channels = 17; channels < 32; channels++) {
20646 DWConvMicrokernelTester()
20647 .cr(16)
20648 .kr(25)
20649 .channels(channels)
20650 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020651 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020652 }
20653 }
20654
Marat Dukhan881ab022021-07-28 13:49:26 -070020655 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
Marat Dukhan82286892021-06-04 17:27:27 -070020656 TEST_REQUIRES_X86_AVX2;
20657 for (uint32_t channels = 17; channels < 32; channels++) {
20658 DWConvMicrokernelTester()
20659 .cr(16)
20660 .kr(25)
20661 .channels(channels)
20662 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020663 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020664 }
20665 }
20666
Marat Dukhan881ab022021-07-28 13:49:26 -070020667 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel) {
Marat Dukhan82286892021-06-04 17:27:27 -070020668 TEST_REQUIRES_X86_AVX2;
20669 for (size_t channels = 1; channels <= 80; channels += 15) {
20670 DWConvMicrokernelTester()
20671 .cr(16)
20672 .kr(25)
20673 .channels(channels)
20674 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020675 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020676 }
20677 }
20678
Marat Dukhan881ab022021-07-28 13:49:26 -070020679 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
Marat Dukhan82286892021-06-04 17:27:27 -070020680 TEST_REQUIRES_X86_AVX2;
20681 for (size_t channels = 1; channels <= 80; channels += 15) {
20682 for (size_t step = 2; step <= 25; step++) {
20683 DWConvMicrokernelTester()
20684 .cr(16)
20685 .kr(25)
20686 .channels(channels)
20687 .width(3)
20688 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020689 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020690 }
20691 }
20692 }
20693
Marat Dukhan881ab022021-07-28 13:49:26 -070020694 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
Marat Dukhan82286892021-06-04 17:27:27 -070020695 TEST_REQUIRES_X86_AVX2;
20696 for (size_t channels = 1; channels <= 80; channels += 15) {
20697 DWConvMicrokernelTester()
20698 .cr(16)
20699 .kr(25)
20700 .channels(16)
20701 .width(5)
20702 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020703 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020704 }
20705 }
20706
Marat Dukhan881ab022021-07-28 13:49:26 -070020707 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
Marat Dukhan82286892021-06-04 17:27:27 -070020708 TEST_REQUIRES_X86_AVX2;
20709 for (size_t channels = 1; channels <= 80; channels += 15) {
20710 DWConvMicrokernelTester()
20711 .cr(16)
20712 .kr(25)
20713 .channels(channels)
20714 .width(3)
20715 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020716 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020717 }
20718 }
20719
Marat Dukhan881ab022021-07-28 13:49:26 -070020720 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
Marat Dukhan82286892021-06-04 17:27:27 -070020721 TEST_REQUIRES_X86_AVX2;
20722 for (size_t channels = 1; channels <= 80; channels += 15) {
20723 DWConvMicrokernelTester()
20724 .cr(16)
20725 .kr(25)
20726 .channels(channels)
20727 .width(3)
20728 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020729 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020730 }
20731 }
20732
Marat Dukhan881ab022021-07-28 13:49:26 -070020733 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, input_offset) {
Marat Dukhan82286892021-06-04 17:27:27 -070020734 TEST_REQUIRES_X86_AVX2;
20735 for (uint32_t channels = 32; channels < 256; channels += 48) {
20736 DWConvMicrokernelTester()
20737 .cr(16)
20738 .kr(25)
20739 .channels(channels)
20740 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080020741 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020742 }
20743 }
20744
Marat Dukhan881ab022021-07-28 13:49:26 -070020745 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, zero) {
Marat Dukhan82286892021-06-04 17:27:27 -070020746 TEST_REQUIRES_X86_AVX2;
20747 for (uint32_t mz = 0; mz < 25; mz++) {
20748 for (uint32_t channels = 32; channels < 256; channels += 48) {
20749 DWConvMicrokernelTester()
20750 .cr(16)
20751 .kr(25)
20752 .channels(channels)
20753 .input_offset(304)
20754 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020755 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020756 }
20757 }
20758 }
20759#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20760
20761
20762#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan881ab022021-07-28 13:49:26 -070020763 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_eq_32) {
Marat Dukhan82286892021-06-04 17:27:27 -070020764 TEST_REQUIRES_X86_AVX2;
20765 DWConvMicrokernelTester()
20766 .cr(32)
20767 .kr(25)
20768 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080020769 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020770 }
20771
Marat Dukhan881ab022021-07-28 13:49:26 -070020772 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32) {
Marat Dukhan82286892021-06-04 17:27:27 -070020773 TEST_REQUIRES_X86_AVX2;
20774 for (uint32_t channels = 64; channels < 512; channels += 96) {
20775 DWConvMicrokernelTester()
20776 .cr(32)
20777 .kr(25)
20778 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020779 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020780 }
20781 }
20782
Marat Dukhan881ab022021-07-28 13:49:26 -070020783 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
Marat Dukhan82286892021-06-04 17:27:27 -070020784 TEST_REQUIRES_X86_AVX2;
20785 for (uint32_t channels = 64; channels < 512; channels += 96) {
20786 DWConvMicrokernelTester()
20787 .cr(32)
20788 .kr(25)
20789 .channels(channels)
20790 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020791 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020792 }
20793 }
20794
Marat Dukhan881ab022021-07-28 13:49:26 -070020795 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
Marat Dukhan82286892021-06-04 17:27:27 -070020796 TEST_REQUIRES_X86_AVX2;
20797 for (uint32_t channels = 64; channels < 512; channels += 96) {
20798 DWConvMicrokernelTester()
20799 .cr(32)
20800 .kr(25)
20801 .channels(channels)
20802 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020803 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020804 }
20805 }
20806
Marat Dukhan881ab022021-07-28 13:49:26 -070020807 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_lt_32) {
Marat Dukhan82286892021-06-04 17:27:27 -070020808 TEST_REQUIRES_X86_AVX2;
20809 for (uint32_t channels = 1; channels < 32; channels++) {
20810 DWConvMicrokernelTester()
20811 .cr(32)
20812 .kr(25)
20813 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020814 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020815 }
20816 }
20817
Marat Dukhan881ab022021-07-28 13:49:26 -070020818 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32) {
Marat Dukhan82286892021-06-04 17:27:27 -070020819 TEST_REQUIRES_X86_AVX2;
20820 for (uint32_t channels = 33; channels < 64; channels++) {
20821 DWConvMicrokernelTester()
20822 .cr(32)
20823 .kr(25)
20824 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020825 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020826 }
20827 }
20828
Marat Dukhan881ab022021-07-28 13:49:26 -070020829 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
Marat Dukhan82286892021-06-04 17:27:27 -070020830 TEST_REQUIRES_X86_AVX2;
20831 for (uint32_t channels = 33; channels < 64; channels++) {
20832 DWConvMicrokernelTester()
20833 .cr(32)
20834 .kr(25)
20835 .channels(channels)
20836 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020837 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020838 }
20839 }
20840
Marat Dukhan881ab022021-07-28 13:49:26 -070020841 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
Marat Dukhan82286892021-06-04 17:27:27 -070020842 TEST_REQUIRES_X86_AVX2;
20843 for (uint32_t channels = 33; channels < 64; channels++) {
20844 DWConvMicrokernelTester()
20845 .cr(32)
20846 .kr(25)
20847 .channels(channels)
20848 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020849 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020850 }
20851 }
20852
Marat Dukhan881ab022021-07-28 13:49:26 -070020853 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel) {
Marat Dukhan82286892021-06-04 17:27:27 -070020854 TEST_REQUIRES_X86_AVX2;
20855 for (size_t channels = 1; channels <= 160; channels += 31) {
20856 DWConvMicrokernelTester()
20857 .cr(32)
20858 .kr(25)
20859 .channels(channels)
20860 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020861 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020862 }
20863 }
20864
Marat Dukhan881ab022021-07-28 13:49:26 -070020865 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
Marat Dukhan82286892021-06-04 17:27:27 -070020866 TEST_REQUIRES_X86_AVX2;
20867 for (size_t channels = 1; channels <= 160; channels += 31) {
20868 for (size_t step = 2; step <= 25; step++) {
20869 DWConvMicrokernelTester()
20870 .cr(32)
20871 .kr(25)
20872 .channels(channels)
20873 .width(3)
20874 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080020875 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020876 }
20877 }
20878 }
20879
Marat Dukhan881ab022021-07-28 13:49:26 -070020880 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
Marat Dukhan82286892021-06-04 17:27:27 -070020881 TEST_REQUIRES_X86_AVX2;
20882 for (size_t channels = 1; channels <= 160; channels += 31) {
20883 DWConvMicrokernelTester()
20884 .cr(32)
20885 .kr(25)
20886 .channels(32)
20887 .width(5)
20888 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080020889 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020890 }
20891 }
20892
Marat Dukhan881ab022021-07-28 13:49:26 -070020893 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
Marat Dukhan82286892021-06-04 17:27:27 -070020894 TEST_REQUIRES_X86_AVX2;
20895 for (size_t channels = 1; channels <= 160; channels += 31) {
20896 DWConvMicrokernelTester()
20897 .cr(32)
20898 .kr(25)
20899 .channels(channels)
20900 .width(3)
20901 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020902 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020903 }
20904 }
20905
Marat Dukhan881ab022021-07-28 13:49:26 -070020906 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
Marat Dukhan82286892021-06-04 17:27:27 -070020907 TEST_REQUIRES_X86_AVX2;
20908 for (size_t channels = 1; channels <= 160; channels += 31) {
20909 DWConvMicrokernelTester()
20910 .cr(32)
20911 .kr(25)
20912 .channels(channels)
20913 .width(3)
20914 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020915 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020916 }
20917 }
20918
Marat Dukhan881ab022021-07-28 13:49:26 -070020919 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, input_offset) {
Marat Dukhan82286892021-06-04 17:27:27 -070020920 TEST_REQUIRES_X86_AVX2;
20921 for (uint32_t channels = 64; channels < 512; channels += 96) {
20922 DWConvMicrokernelTester()
20923 .cr(32)
20924 .kr(25)
20925 .channels(channels)
20926 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080020927 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070020928 }
20929 }
20930
Marat Dukhan881ab022021-07-28 13:49:26 -070020931 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, zero) {
Marat Dukhan82286892021-06-04 17:27:27 -070020932 TEST_REQUIRES_X86_AVX2;
20933 for (uint32_t mz = 0; mz < 25; mz++) {
20934 for (uint32_t channels = 64; channels < 512; channels += 96) {
20935 DWConvMicrokernelTester()
20936 .cr(32)
20937 .kr(25)
20938 .channels(channels)
20939 .input_offset(592)
20940 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020941 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070020942 }
20943 }
20944 }
20945#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20946
20947
20948#if XNN_ARCH_X86 || XNN_ARCH_X86_64
20949 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_eq_16) {
20950 TEST_REQUIRES_X86_AVX2;
20951 DWConvMicrokernelTester()
20952 .cr(16)
20953 .kr(25)
20954 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080020955 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070020956 }
20957
20958 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16) {
20959 TEST_REQUIRES_X86_AVX2;
20960 for (uint32_t channels = 32; channels < 256; channels += 48) {
20961 DWConvMicrokernelTester()
20962 .cr(16)
20963 .kr(25)
20964 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080020965 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070020966 }
20967 }
20968
20969 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
20970 TEST_REQUIRES_X86_AVX2;
20971 for (uint32_t channels = 32; channels < 256; channels += 48) {
20972 DWConvMicrokernelTester()
20973 .cr(16)
20974 .kr(25)
20975 .channels(channels)
20976 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020977 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070020978 }
20979 }
20980
20981 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
20982 TEST_REQUIRES_X86_AVX2;
20983 for (uint32_t channels = 32; channels < 256; channels += 48) {
20984 DWConvMicrokernelTester()
20985 .cr(16)
20986 .kr(25)
20987 .channels(channels)
20988 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020989 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070020990 }
20991 }
20992
20993 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_lt_16) {
20994 TEST_REQUIRES_X86_AVX2;
20995 for (uint32_t channels = 1; channels < 16; channels++) {
20996 DWConvMicrokernelTester()
20997 .cr(16)
20998 .kr(25)
20999 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021000 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021001 }
21002 }
21003
21004 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16) {
21005 TEST_REQUIRES_X86_AVX2;
21006 for (uint32_t channels = 17; channels < 32; channels++) {
21007 DWConvMicrokernelTester()
21008 .cr(16)
21009 .kr(25)
21010 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021011 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021012 }
21013 }
21014
21015 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
21016 TEST_REQUIRES_X86_AVX2;
21017 for (uint32_t channels = 17; channels < 32; channels++) {
21018 DWConvMicrokernelTester()
21019 .cr(16)
21020 .kr(25)
21021 .channels(channels)
21022 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021023 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021024 }
21025 }
21026
21027 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
21028 TEST_REQUIRES_X86_AVX2;
21029 for (uint32_t channels = 17; channels < 32; channels++) {
21030 DWConvMicrokernelTester()
21031 .cr(16)
21032 .kr(25)
21033 .channels(channels)
21034 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021035 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021036 }
21037 }
21038
21039 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel) {
21040 TEST_REQUIRES_X86_AVX2;
21041 for (size_t channels = 1; channels <= 80; channels += 15) {
21042 DWConvMicrokernelTester()
21043 .cr(16)
21044 .kr(25)
21045 .channels(channels)
21046 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021047 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021048 }
21049 }
21050
21051 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
21052 TEST_REQUIRES_X86_AVX2;
21053 for (size_t channels = 1; channels <= 80; channels += 15) {
21054 for (size_t step = 2; step <= 25; step++) {
21055 DWConvMicrokernelTester()
21056 .cr(16)
21057 .kr(25)
21058 .channels(channels)
21059 .width(3)
21060 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021061 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021062 }
21063 }
21064 }
21065
21066 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
21067 TEST_REQUIRES_X86_AVX2;
21068 for (size_t channels = 1; channels <= 80; channels += 15) {
21069 DWConvMicrokernelTester()
21070 .cr(16)
21071 .kr(25)
21072 .channels(16)
21073 .width(5)
21074 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021075 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021076 }
21077 }
21078
21079 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
21080 TEST_REQUIRES_X86_AVX2;
21081 for (size_t channels = 1; channels <= 80; channels += 15) {
21082 DWConvMicrokernelTester()
21083 .cr(16)
21084 .kr(25)
21085 .channels(channels)
21086 .width(3)
21087 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021088 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021089 }
21090 }
21091
21092 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
21093 TEST_REQUIRES_X86_AVX2;
21094 for (size_t channels = 1; channels <= 80; channels += 15) {
21095 DWConvMicrokernelTester()
21096 .cr(16)
21097 .kr(25)
21098 .channels(channels)
21099 .width(3)
21100 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021101 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021102 }
21103 }
21104
21105 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, input_offset) {
21106 TEST_REQUIRES_X86_AVX2;
21107 for (uint32_t channels = 32; channels < 256; channels += 48) {
21108 DWConvMicrokernelTester()
21109 .cr(16)
21110 .kr(25)
21111 .channels(channels)
21112 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080021113 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021114 }
21115 }
21116
21117 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, zero) {
21118 TEST_REQUIRES_X86_AVX2;
21119 for (uint32_t mz = 0; mz < 25; mz++) {
21120 for (uint32_t channels = 32; channels < 256; channels += 48) {
21121 DWConvMicrokernelTester()
21122 .cr(16)
21123 .kr(25)
21124 .channels(channels)
21125 .input_offset(304)
21126 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021127 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021128 }
21129 }
21130 }
21131#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21132
21133
21134#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21135 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_eq_32) {
21136 TEST_REQUIRES_X86_AVX2;
21137 DWConvMicrokernelTester()
21138 .cr(32)
21139 .kr(25)
21140 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080021141 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021142 }
21143
21144 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32) {
21145 TEST_REQUIRES_X86_AVX2;
21146 for (uint32_t channels = 64; channels < 512; channels += 96) {
21147 DWConvMicrokernelTester()
21148 .cr(32)
21149 .kr(25)
21150 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021151 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021152 }
21153 }
21154
21155 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
21156 TEST_REQUIRES_X86_AVX2;
21157 for (uint32_t channels = 64; channels < 512; channels += 96) {
21158 DWConvMicrokernelTester()
21159 .cr(32)
21160 .kr(25)
21161 .channels(channels)
21162 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021163 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021164 }
21165 }
21166
21167 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
21168 TEST_REQUIRES_X86_AVX2;
21169 for (uint32_t channels = 64; channels < 512; channels += 96) {
21170 DWConvMicrokernelTester()
21171 .cr(32)
21172 .kr(25)
21173 .channels(channels)
21174 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021175 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021176 }
21177 }
21178
21179 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_lt_32) {
21180 TEST_REQUIRES_X86_AVX2;
21181 for (uint32_t channels = 1; channels < 32; channels++) {
21182 DWConvMicrokernelTester()
21183 .cr(32)
21184 .kr(25)
21185 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021186 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021187 }
21188 }
21189
21190 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32) {
21191 TEST_REQUIRES_X86_AVX2;
21192 for (uint32_t channels = 33; channels < 64; channels++) {
21193 DWConvMicrokernelTester()
21194 .cr(32)
21195 .kr(25)
21196 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021197 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021198 }
21199 }
21200
21201 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
21202 TEST_REQUIRES_X86_AVX2;
21203 for (uint32_t channels = 33; channels < 64; channels++) {
21204 DWConvMicrokernelTester()
21205 .cr(32)
21206 .kr(25)
21207 .channels(channels)
21208 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021209 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021210 }
21211 }
21212
21213 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
21214 TEST_REQUIRES_X86_AVX2;
21215 for (uint32_t channels = 33; channels < 64; channels++) {
21216 DWConvMicrokernelTester()
21217 .cr(32)
21218 .kr(25)
21219 .channels(channels)
21220 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021221 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021222 }
21223 }
21224
21225 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel) {
21226 TEST_REQUIRES_X86_AVX2;
21227 for (size_t channels = 1; channels <= 160; channels += 31) {
21228 DWConvMicrokernelTester()
21229 .cr(32)
21230 .kr(25)
21231 .channels(channels)
21232 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021233 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021234 }
21235 }
21236
21237 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
21238 TEST_REQUIRES_X86_AVX2;
21239 for (size_t channels = 1; channels <= 160; channels += 31) {
21240 for (size_t step = 2; step <= 25; step++) {
21241 DWConvMicrokernelTester()
21242 .cr(32)
21243 .kr(25)
21244 .channels(channels)
21245 .width(3)
21246 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021247 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021248 }
21249 }
21250 }
21251
21252 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
21253 TEST_REQUIRES_X86_AVX2;
21254 for (size_t channels = 1; channels <= 160; channels += 31) {
21255 DWConvMicrokernelTester()
21256 .cr(32)
21257 .kr(25)
21258 .channels(32)
21259 .width(5)
21260 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080021261 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021262 }
21263 }
21264
21265 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
21266 TEST_REQUIRES_X86_AVX2;
21267 for (size_t channels = 1; channels <= 160; channels += 31) {
21268 DWConvMicrokernelTester()
21269 .cr(32)
21270 .kr(25)
21271 .channels(channels)
21272 .width(3)
21273 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021274 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021275 }
21276 }
21277
21278 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
21279 TEST_REQUIRES_X86_AVX2;
21280 for (size_t channels = 1; channels <= 160; channels += 31) {
21281 DWConvMicrokernelTester()
21282 .cr(32)
21283 .kr(25)
21284 .channels(channels)
21285 .width(3)
21286 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021287 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021288 }
21289 }
21290
21291 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, input_offset) {
21292 TEST_REQUIRES_X86_AVX2;
21293 for (uint32_t channels = 64; channels < 512; channels += 96) {
21294 DWConvMicrokernelTester()
21295 .cr(32)
21296 .kr(25)
21297 .channels(channels)
21298 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080021299 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan881ab022021-07-28 13:49:26 -070021300 }
21301 }
21302
21303 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, zero) {
21304 TEST_REQUIRES_X86_AVX2;
21305 for (uint32_t mz = 0; mz < 25; mz++) {
21306 for (uint32_t channels = 64; channels < 512; channels += 96) {
21307 DWConvMicrokernelTester()
21308 .cr(32)
21309 .kr(25)
21310 .channels(channels)
21311 .input_offset(592)
21312 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021313 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070021314 }
21315 }
21316 }
21317#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21318
21319
21320#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021321 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
21322 TEST_REQUIRES_X86_AVX2;
21323 DWConvMicrokernelTester()
21324 .cr(16)
21325 .kr(25)
21326 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080021327 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021328 }
21329
21330 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
21331 TEST_REQUIRES_X86_AVX2;
21332 for (uint32_t channels = 32; channels < 256; channels += 48) {
21333 DWConvMicrokernelTester()
21334 .cr(16)
21335 .kr(25)
21336 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021337 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021338 }
21339 }
21340
21341 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
21342 TEST_REQUIRES_X86_AVX2;
21343 for (uint32_t channels = 32; channels < 256; channels += 48) {
21344 DWConvMicrokernelTester()
21345 .cr(16)
21346 .kr(25)
21347 .channels(channels)
21348 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021349 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021350 }
21351 }
21352
21353 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
21354 TEST_REQUIRES_X86_AVX2;
21355 for (uint32_t channels = 32; channels < 256; channels += 48) {
21356 DWConvMicrokernelTester()
21357 .cr(16)
21358 .kr(25)
21359 .channels(channels)
21360 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021361 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021362 }
21363 }
21364
21365 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
21366 TEST_REQUIRES_X86_AVX2;
21367 for (uint32_t channels = 1; channels < 16; channels++) {
21368 DWConvMicrokernelTester()
21369 .cr(16)
21370 .kr(25)
21371 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021372 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021373 }
21374 }
21375
21376 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
21377 TEST_REQUIRES_X86_AVX2;
21378 for (uint32_t channels = 17; channels < 32; channels++) {
21379 DWConvMicrokernelTester()
21380 .cr(16)
21381 .kr(25)
21382 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021383 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021384 }
21385 }
21386
21387 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
21388 TEST_REQUIRES_X86_AVX2;
21389 for (uint32_t channels = 17; channels < 32; channels++) {
21390 DWConvMicrokernelTester()
21391 .cr(16)
21392 .kr(25)
21393 .channels(channels)
21394 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021395 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021396 }
21397 }
21398
21399 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
21400 TEST_REQUIRES_X86_AVX2;
21401 for (uint32_t channels = 17; channels < 32; channels++) {
21402 DWConvMicrokernelTester()
21403 .cr(16)
21404 .kr(25)
21405 .channels(channels)
21406 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021407 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021408 }
21409 }
21410
21411 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
21412 TEST_REQUIRES_X86_AVX2;
21413 for (size_t channels = 1; channels <= 80; channels += 15) {
21414 DWConvMicrokernelTester()
21415 .cr(16)
21416 .kr(25)
21417 .channels(channels)
21418 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021419 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021420 }
21421 }
21422
21423 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
21424 TEST_REQUIRES_X86_AVX2;
21425 for (size_t channels = 1; channels <= 80; channels += 15) {
21426 for (size_t step = 2; step <= 25; step++) {
21427 DWConvMicrokernelTester()
21428 .cr(16)
21429 .kr(25)
21430 .channels(channels)
21431 .width(3)
21432 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021433 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021434 }
21435 }
21436 }
21437
21438 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
21439 TEST_REQUIRES_X86_AVX2;
21440 for (size_t channels = 1; channels <= 80; channels += 15) {
21441 DWConvMicrokernelTester()
21442 .cr(16)
21443 .kr(25)
21444 .channels(16)
21445 .width(5)
21446 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021447 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021448 }
21449 }
21450
21451 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
21452 TEST_REQUIRES_X86_AVX2;
21453 for (size_t channels = 1; channels <= 80; channels += 15) {
21454 DWConvMicrokernelTester()
21455 .cr(16)
21456 .kr(25)
21457 .channels(channels)
21458 .width(3)
21459 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021460 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021461 }
21462 }
21463
21464 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
21465 TEST_REQUIRES_X86_AVX2;
21466 for (size_t channels = 1; channels <= 80; channels += 15) {
21467 DWConvMicrokernelTester()
21468 .cr(16)
21469 .kr(25)
21470 .channels(channels)
21471 .width(3)
21472 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021473 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021474 }
21475 }
21476
21477 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
21478 TEST_REQUIRES_X86_AVX2;
21479 for (uint32_t channels = 32; channels < 256; channels += 48) {
21480 DWConvMicrokernelTester()
21481 .cr(16)
21482 .kr(25)
21483 .channels(channels)
21484 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080021485 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021486 }
21487 }
21488
21489 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
21490 TEST_REQUIRES_X86_AVX2;
21491 for (uint32_t mz = 0; mz < 25; mz++) {
21492 for (uint32_t channels = 32; channels < 256; channels += 48) {
21493 DWConvMicrokernelTester()
21494 .cr(16)
21495 .kr(25)
21496 .channels(channels)
21497 .input_offset(304)
21498 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021499 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021500 }
21501 }
21502 }
21503#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21504
21505
21506#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21507 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
21508 TEST_REQUIRES_X86_AVX2;
21509 DWConvMicrokernelTester()
21510 .cr(32)
21511 .kr(25)
21512 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080021513 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021514 }
21515
21516 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
21517 TEST_REQUIRES_X86_AVX2;
21518 for (uint32_t channels = 64; channels < 512; channels += 96) {
21519 DWConvMicrokernelTester()
21520 .cr(32)
21521 .kr(25)
21522 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021523 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021524 }
21525 }
21526
21527 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
21528 TEST_REQUIRES_X86_AVX2;
21529 for (uint32_t channels = 64; channels < 512; channels += 96) {
21530 DWConvMicrokernelTester()
21531 .cr(32)
21532 .kr(25)
21533 .channels(channels)
21534 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021535 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021536 }
21537 }
21538
21539 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
21540 TEST_REQUIRES_X86_AVX2;
21541 for (uint32_t channels = 64; channels < 512; channels += 96) {
21542 DWConvMicrokernelTester()
21543 .cr(32)
21544 .kr(25)
21545 .channels(channels)
21546 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021547 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021548 }
21549 }
21550
21551 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
21552 TEST_REQUIRES_X86_AVX2;
21553 for (uint32_t channels = 1; channels < 32; channels++) {
21554 DWConvMicrokernelTester()
21555 .cr(32)
21556 .kr(25)
21557 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021558 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021559 }
21560 }
21561
21562 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
21563 TEST_REQUIRES_X86_AVX2;
21564 for (uint32_t channels = 33; channels < 64; channels++) {
21565 DWConvMicrokernelTester()
21566 .cr(32)
21567 .kr(25)
21568 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021569 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021570 }
21571 }
21572
21573 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
21574 TEST_REQUIRES_X86_AVX2;
21575 for (uint32_t channels = 33; channels < 64; channels++) {
21576 DWConvMicrokernelTester()
21577 .cr(32)
21578 .kr(25)
21579 .channels(channels)
21580 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021581 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021582 }
21583 }
21584
21585 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
21586 TEST_REQUIRES_X86_AVX2;
21587 for (uint32_t channels = 33; channels < 64; channels++) {
21588 DWConvMicrokernelTester()
21589 .cr(32)
21590 .kr(25)
21591 .channels(channels)
21592 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021593 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021594 }
21595 }
21596
21597 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
21598 TEST_REQUIRES_X86_AVX2;
21599 for (size_t channels = 1; channels <= 160; channels += 31) {
21600 DWConvMicrokernelTester()
21601 .cr(32)
21602 .kr(25)
21603 .channels(channels)
21604 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021605 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021606 }
21607 }
21608
21609 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
21610 TEST_REQUIRES_X86_AVX2;
21611 for (size_t channels = 1; channels <= 160; channels += 31) {
21612 for (size_t step = 2; step <= 25; step++) {
21613 DWConvMicrokernelTester()
21614 .cr(32)
21615 .kr(25)
21616 .channels(channels)
21617 .width(3)
21618 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021619 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021620 }
21621 }
21622 }
21623
21624 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
21625 TEST_REQUIRES_X86_AVX2;
21626 for (size_t channels = 1; channels <= 160; channels += 31) {
21627 DWConvMicrokernelTester()
21628 .cr(32)
21629 .kr(25)
21630 .channels(32)
21631 .width(5)
21632 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080021633 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021634 }
21635 }
21636
21637 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
21638 TEST_REQUIRES_X86_AVX2;
21639 for (size_t channels = 1; channels <= 160; channels += 31) {
21640 DWConvMicrokernelTester()
21641 .cr(32)
21642 .kr(25)
21643 .channels(channels)
21644 .width(3)
21645 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021646 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021647 }
21648 }
21649
21650 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
21651 TEST_REQUIRES_X86_AVX2;
21652 for (size_t channels = 1; channels <= 160; channels += 31) {
21653 DWConvMicrokernelTester()
21654 .cr(32)
21655 .kr(25)
21656 .channels(channels)
21657 .width(3)
21658 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021659 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021660 }
21661 }
21662
21663 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
21664 TEST_REQUIRES_X86_AVX2;
21665 for (uint32_t channels = 64; channels < 512; channels += 96) {
21666 DWConvMicrokernelTester()
21667 .cr(32)
21668 .kr(25)
21669 .channels(channels)
21670 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080021671 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021672 }
21673 }
21674
21675 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
21676 TEST_REQUIRES_X86_AVX2;
21677 for (uint32_t mz = 0; mz < 25; mz++) {
21678 for (uint32_t channels = 64; channels < 512; channels += 96) {
21679 DWConvMicrokernelTester()
21680 .cr(32)
21681 .kr(25)
21682 .channels(channels)
21683 .input_offset(592)
21684 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021685 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan60bb7ec2021-07-28 18:51:28 -070021686 }
21687 }
21688 }
21689#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21690
21691
21692#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan98042f22021-06-15 00:43:13 -070021693 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_eq_8) {
21694 TEST_REQUIRES_X86_SSE41;
21695 DWConvMicrokernelTester()
21696 .cr(8)
21697 .kr(25)
21698 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021699 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021700 }
21701
21702 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8) {
21703 TEST_REQUIRES_X86_SSE41;
21704 for (uint32_t channels = 16; channels < 128; channels += 24) {
21705 DWConvMicrokernelTester()
21706 .cr(8)
21707 .kr(25)
21708 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021709 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021710 }
21711 }
21712
21713 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmin) {
21714 TEST_REQUIRES_X86_SSE41;
21715 for (uint32_t channels = 16; channels < 128; channels += 24) {
21716 DWConvMicrokernelTester()
21717 .cr(8)
21718 .kr(25)
21719 .channels(channels)
21720 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021721 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021722 }
21723 }
21724
21725 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmax) {
21726 TEST_REQUIRES_X86_SSE41;
21727 for (uint32_t channels = 16; channels < 128; channels += 24) {
21728 DWConvMicrokernelTester()
21729 .cr(8)
21730 .kr(25)
21731 .channels(channels)
21732 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021733 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021734 }
21735 }
21736
21737 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_lt_8) {
21738 TEST_REQUIRES_X86_SSE41;
21739 for (uint32_t channels = 1; channels < 8; channels++) {
21740 DWConvMicrokernelTester()
21741 .cr(8)
21742 .kr(25)
21743 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021744 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021745 }
21746 }
21747
21748 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8) {
21749 TEST_REQUIRES_X86_SSE41;
21750 for (uint32_t channels = 9; channels < 16; channels++) {
21751 DWConvMicrokernelTester()
21752 .cr(8)
21753 .kr(25)
21754 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021755 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021756 }
21757 }
21758
21759 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmin) {
21760 TEST_REQUIRES_X86_SSE41;
21761 for (uint32_t channels = 9; channels < 16; channels++) {
21762 DWConvMicrokernelTester()
21763 .cr(8)
21764 .kr(25)
21765 .channels(channels)
21766 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021767 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021768 }
21769 }
21770
21771 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmax) {
21772 TEST_REQUIRES_X86_SSE41;
21773 for (uint32_t channels = 9; channels < 16; channels++) {
21774 DWConvMicrokernelTester()
21775 .cr(8)
21776 .kr(25)
21777 .channels(channels)
21778 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021779 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021780 }
21781 }
21782
21783 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel) {
21784 TEST_REQUIRES_X86_SSE41;
21785 for (size_t channels = 1; channels <= 40; channels += 7) {
21786 DWConvMicrokernelTester()
21787 .cr(8)
21788 .kr(25)
21789 .channels(channels)
21790 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021791 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021792 }
21793 }
21794
21795 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_step) {
21796 TEST_REQUIRES_X86_SSE41;
21797 for (size_t channels = 1; channels <= 40; channels += 7) {
21798 for (size_t step = 2; step <= 25; step++) {
21799 DWConvMicrokernelTester()
21800 .cr(8)
21801 .kr(25)
21802 .channels(channels)
21803 .width(3)
21804 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021805 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021806 }
21807 }
21808 }
21809
21810 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_output_stride) {
21811 TEST_REQUIRES_X86_SSE41;
21812 for (size_t channels = 1; channels <= 40; channels += 7) {
21813 DWConvMicrokernelTester()
21814 .cr(8)
21815 .kr(25)
21816 .channels(8)
21817 .width(5)
21818 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021819 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021820 }
21821 }
21822
21823 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmin) {
21824 TEST_REQUIRES_X86_SSE41;
21825 for (size_t channels = 1; channels <= 40; channels += 7) {
21826 DWConvMicrokernelTester()
21827 .cr(8)
21828 .kr(25)
21829 .channels(channels)
21830 .width(3)
21831 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021832 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021833 }
21834 }
21835
21836 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmax) {
21837 TEST_REQUIRES_X86_SSE41;
21838 for (size_t channels = 1; channels <= 40; channels += 7) {
21839 DWConvMicrokernelTester()
21840 .cr(8)
21841 .kr(25)
21842 .channels(channels)
21843 .width(3)
21844 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021845 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021846 }
21847 }
21848
21849 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, input_offset) {
21850 TEST_REQUIRES_X86_SSE41;
21851 for (uint32_t channels = 16; channels < 128; channels += 24) {
21852 DWConvMicrokernelTester()
21853 .cr(8)
21854 .kr(25)
21855 .channels(channels)
21856 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080021857 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021858 }
21859 }
21860
21861 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, zero) {
21862 TEST_REQUIRES_X86_SSE41;
21863 for (uint32_t mz = 0; mz < 25; mz++) {
21864 for (uint32_t channels = 16; channels < 128; channels += 24) {
21865 DWConvMicrokernelTester()
21866 .cr(8)
21867 .kr(25)
21868 .channels(channels)
21869 .input_offset(176)
21870 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021871 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021872 }
21873 }
21874 }
21875#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21876
21877
21878#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21879 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_eq_16) {
21880 TEST_REQUIRES_X86_SSE41;
21881 DWConvMicrokernelTester()
21882 .cr(16)
21883 .kr(25)
21884 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080021885 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021886 }
21887
21888 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16) {
21889 TEST_REQUIRES_X86_SSE41;
21890 for (uint32_t channels = 32; channels < 256; channels += 48) {
21891 DWConvMicrokernelTester()
21892 .cr(16)
21893 .kr(25)
21894 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021895 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021896 }
21897 }
21898
21899 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmin) {
21900 TEST_REQUIRES_X86_SSE41;
21901 for (uint32_t channels = 32; channels < 256; channels += 48) {
21902 DWConvMicrokernelTester()
21903 .cr(16)
21904 .kr(25)
21905 .channels(channels)
21906 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021907 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021908 }
21909 }
21910
21911 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmax) {
21912 TEST_REQUIRES_X86_SSE41;
21913 for (uint32_t channels = 32; channels < 256; channels += 48) {
21914 DWConvMicrokernelTester()
21915 .cr(16)
21916 .kr(25)
21917 .channels(channels)
21918 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021919 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021920 }
21921 }
21922
21923 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_lt_16) {
21924 TEST_REQUIRES_X86_SSE41;
21925 for (uint32_t channels = 1; channels < 16; channels++) {
21926 DWConvMicrokernelTester()
21927 .cr(16)
21928 .kr(25)
21929 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021930 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021931 }
21932 }
21933
21934 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16) {
21935 TEST_REQUIRES_X86_SSE41;
21936 for (uint32_t channels = 17; channels < 32; channels++) {
21937 DWConvMicrokernelTester()
21938 .cr(16)
21939 .kr(25)
21940 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080021941 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021942 }
21943 }
21944
21945 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmin) {
21946 TEST_REQUIRES_X86_SSE41;
21947 for (uint32_t channels = 17; channels < 32; channels++) {
21948 DWConvMicrokernelTester()
21949 .cr(16)
21950 .kr(25)
21951 .channels(channels)
21952 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021953 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021954 }
21955 }
21956
21957 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmax) {
21958 TEST_REQUIRES_X86_SSE41;
21959 for (uint32_t channels = 17; channels < 32; channels++) {
21960 DWConvMicrokernelTester()
21961 .cr(16)
21962 .kr(25)
21963 .channels(channels)
21964 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021965 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021966 }
21967 }
21968
21969 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel) {
21970 TEST_REQUIRES_X86_SSE41;
21971 for (size_t channels = 1; channels <= 80; channels += 15) {
21972 DWConvMicrokernelTester()
21973 .cr(16)
21974 .kr(25)
21975 .channels(channels)
21976 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021977 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021978 }
21979 }
21980
21981 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_step) {
21982 TEST_REQUIRES_X86_SSE41;
21983 for (size_t channels = 1; channels <= 80; channels += 15) {
21984 for (size_t step = 2; step <= 25; step++) {
21985 DWConvMicrokernelTester()
21986 .cr(16)
21987 .kr(25)
21988 .channels(channels)
21989 .width(3)
21990 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080021991 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070021992 }
21993 }
21994 }
21995
21996 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_output_stride) {
21997 TEST_REQUIRES_X86_SSE41;
21998 for (size_t channels = 1; channels <= 80; channels += 15) {
21999 DWConvMicrokernelTester()
22000 .cr(16)
22001 .kr(25)
22002 .channels(16)
22003 .width(5)
22004 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022005 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022006 }
22007 }
22008
22009 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmin) {
22010 TEST_REQUIRES_X86_SSE41;
22011 for (size_t channels = 1; channels <= 80; channels += 15) {
22012 DWConvMicrokernelTester()
22013 .cr(16)
22014 .kr(25)
22015 .channels(channels)
22016 .width(3)
22017 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022018 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022019 }
22020 }
22021
22022 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmax) {
22023 TEST_REQUIRES_X86_SSE41;
22024 for (size_t channels = 1; channels <= 80; channels += 15) {
22025 DWConvMicrokernelTester()
22026 .cr(16)
22027 .kr(25)
22028 .channels(channels)
22029 .width(3)
22030 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022031 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022032 }
22033 }
22034
22035 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, input_offset) {
22036 TEST_REQUIRES_X86_SSE41;
22037 for (uint32_t channels = 32; channels < 256; channels += 48) {
22038 DWConvMicrokernelTester()
22039 .cr(16)
22040 .kr(25)
22041 .channels(channels)
22042 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080022043 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022044 }
22045 }
22046
22047 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, zero) {
22048 TEST_REQUIRES_X86_SSE41;
22049 for (uint32_t mz = 0; mz < 25; mz++) {
22050 for (uint32_t channels = 32; channels < 256; channels += 48) {
22051 DWConvMicrokernelTester()
22052 .cr(16)
22053 .kr(25)
22054 .channels(channels)
22055 .input_offset(304)
22056 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022057 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022058 }
22059 }
22060 }
22061#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22062
22063
22064#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22065 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_eq_24) {
22066 TEST_REQUIRES_X86_SSE41;
22067 DWConvMicrokernelTester()
22068 .cr(24)
22069 .kr(25)
22070 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080022071 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022072 }
22073
22074 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24) {
22075 TEST_REQUIRES_X86_SSE41;
22076 for (uint32_t channels = 48; channels < 384; channels += 72) {
22077 DWConvMicrokernelTester()
22078 .cr(24)
22079 .kr(25)
22080 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022081 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022082 }
22083 }
22084
22085 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmin) {
22086 TEST_REQUIRES_X86_SSE41;
22087 for (uint32_t channels = 48; channels < 384; channels += 72) {
22088 DWConvMicrokernelTester()
22089 .cr(24)
22090 .kr(25)
22091 .channels(channels)
22092 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022093 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022094 }
22095 }
22096
22097 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmax) {
22098 TEST_REQUIRES_X86_SSE41;
22099 for (uint32_t channels = 48; channels < 384; channels += 72) {
22100 DWConvMicrokernelTester()
22101 .cr(24)
22102 .kr(25)
22103 .channels(channels)
22104 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022105 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022106 }
22107 }
22108
22109 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_lt_24) {
22110 TEST_REQUIRES_X86_SSE41;
22111 for (uint32_t channels = 1; channels < 24; channels++) {
22112 DWConvMicrokernelTester()
22113 .cr(24)
22114 .kr(25)
22115 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022116 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022117 }
22118 }
22119
22120 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24) {
22121 TEST_REQUIRES_X86_SSE41;
22122 for (uint32_t channels = 25; channels < 48; channels++) {
22123 DWConvMicrokernelTester()
22124 .cr(24)
22125 .kr(25)
22126 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022127 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022128 }
22129 }
22130
22131 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmin) {
22132 TEST_REQUIRES_X86_SSE41;
22133 for (uint32_t channels = 25; channels < 48; channels++) {
22134 DWConvMicrokernelTester()
22135 .cr(24)
22136 .kr(25)
22137 .channels(channels)
22138 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022139 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022140 }
22141 }
22142
22143 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmax) {
22144 TEST_REQUIRES_X86_SSE41;
22145 for (uint32_t channels = 25; channels < 48; channels++) {
22146 DWConvMicrokernelTester()
22147 .cr(24)
22148 .kr(25)
22149 .channels(channels)
22150 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022151 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022152 }
22153 }
22154
22155 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel) {
22156 TEST_REQUIRES_X86_SSE41;
22157 for (size_t channels = 1; channels <= 120; channels += 23) {
22158 DWConvMicrokernelTester()
22159 .cr(24)
22160 .kr(25)
22161 .channels(channels)
22162 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022163 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022164 }
22165 }
22166
22167 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_step) {
22168 TEST_REQUIRES_X86_SSE41;
22169 for (size_t channels = 1; channels <= 120; channels += 23) {
22170 for (size_t step = 2; step <= 25; step++) {
22171 DWConvMicrokernelTester()
22172 .cr(24)
22173 .kr(25)
22174 .channels(channels)
22175 .width(3)
22176 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022177 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022178 }
22179 }
22180 }
22181
22182 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_output_stride) {
22183 TEST_REQUIRES_X86_SSE41;
22184 for (size_t channels = 1; channels <= 120; channels += 23) {
22185 DWConvMicrokernelTester()
22186 .cr(24)
22187 .kr(25)
22188 .channels(24)
22189 .width(5)
22190 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080022191 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022192 }
22193 }
22194
22195 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmin) {
22196 TEST_REQUIRES_X86_SSE41;
22197 for (size_t channels = 1; channels <= 120; channels += 23) {
22198 DWConvMicrokernelTester()
22199 .cr(24)
22200 .kr(25)
22201 .channels(channels)
22202 .width(3)
22203 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022204 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022205 }
22206 }
22207
22208 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmax) {
22209 TEST_REQUIRES_X86_SSE41;
22210 for (size_t channels = 1; channels <= 120; channels += 23) {
22211 DWConvMicrokernelTester()
22212 .cr(24)
22213 .kr(25)
22214 .channels(channels)
22215 .width(3)
22216 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022217 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022218 }
22219 }
22220
22221 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, input_offset) {
22222 TEST_REQUIRES_X86_SSE41;
22223 for (uint32_t channels = 48; channels < 384; channels += 72) {
22224 DWConvMicrokernelTester()
22225 .cr(24)
22226 .kr(25)
22227 .channels(channels)
22228 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080022229 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022230 }
22231 }
22232
22233 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, zero) {
22234 TEST_REQUIRES_X86_SSE41;
22235 for (uint32_t mz = 0; mz < 25; mz++) {
22236 for (uint32_t channels = 48; channels < 384; channels += 72) {
22237 DWConvMicrokernelTester()
22238 .cr(24)
22239 .kr(25)
22240 .channels(channels)
22241 .input_offset(464)
22242 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022243 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022244 }
22245 }
22246 }
22247#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22248
22249
22250#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22251 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_eq_8) {
22252 TEST_REQUIRES_X86_AVX;
22253 DWConvMicrokernelTester()
22254 .cr(8)
22255 .kr(25)
22256 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022257 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022258 }
22259
22260 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8) {
22261 TEST_REQUIRES_X86_AVX;
22262 for (uint32_t channels = 16; channels < 128; channels += 24) {
22263 DWConvMicrokernelTester()
22264 .cr(8)
22265 .kr(25)
22266 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022267 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022268 }
22269 }
22270
22271 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmin) {
22272 TEST_REQUIRES_X86_AVX;
22273 for (uint32_t channels = 16; channels < 128; channels += 24) {
22274 DWConvMicrokernelTester()
22275 .cr(8)
22276 .kr(25)
22277 .channels(channels)
22278 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022279 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022280 }
22281 }
22282
22283 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmax) {
22284 TEST_REQUIRES_X86_AVX;
22285 for (uint32_t channels = 16; channels < 128; channels += 24) {
22286 DWConvMicrokernelTester()
22287 .cr(8)
22288 .kr(25)
22289 .channels(channels)
22290 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022291 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022292 }
22293 }
22294
22295 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_lt_8) {
22296 TEST_REQUIRES_X86_AVX;
22297 for (uint32_t channels = 1; channels < 8; channels++) {
22298 DWConvMicrokernelTester()
22299 .cr(8)
22300 .kr(25)
22301 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022302 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022303 }
22304 }
22305
22306 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8) {
22307 TEST_REQUIRES_X86_AVX;
22308 for (uint32_t channels = 9; channels < 16; channels++) {
22309 DWConvMicrokernelTester()
22310 .cr(8)
22311 .kr(25)
22312 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022313 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022314 }
22315 }
22316
22317 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmin) {
22318 TEST_REQUIRES_X86_AVX;
22319 for (uint32_t channels = 9; channels < 16; channels++) {
22320 DWConvMicrokernelTester()
22321 .cr(8)
22322 .kr(25)
22323 .channels(channels)
22324 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022325 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022326 }
22327 }
22328
22329 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmax) {
22330 TEST_REQUIRES_X86_AVX;
22331 for (uint32_t channels = 9; channels < 16; channels++) {
22332 DWConvMicrokernelTester()
22333 .cr(8)
22334 .kr(25)
22335 .channels(channels)
22336 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022337 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022338 }
22339 }
22340
22341 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel) {
22342 TEST_REQUIRES_X86_AVX;
22343 for (size_t channels = 1; channels <= 40; channels += 7) {
22344 DWConvMicrokernelTester()
22345 .cr(8)
22346 .kr(25)
22347 .channels(channels)
22348 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022349 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022350 }
22351 }
22352
22353 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_step) {
22354 TEST_REQUIRES_X86_AVX;
22355 for (size_t channels = 1; channels <= 40; channels += 7) {
22356 for (size_t step = 2; step <= 25; step++) {
22357 DWConvMicrokernelTester()
22358 .cr(8)
22359 .kr(25)
22360 .channels(channels)
22361 .width(3)
22362 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022363 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022364 }
22365 }
22366 }
22367
22368 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_output_stride) {
22369 TEST_REQUIRES_X86_AVX;
22370 for (size_t channels = 1; channels <= 40; channels += 7) {
22371 DWConvMicrokernelTester()
22372 .cr(8)
22373 .kr(25)
22374 .channels(8)
22375 .width(5)
22376 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022377 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022378 }
22379 }
22380
22381 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmin) {
22382 TEST_REQUIRES_X86_AVX;
22383 for (size_t channels = 1; channels <= 40; channels += 7) {
22384 DWConvMicrokernelTester()
22385 .cr(8)
22386 .kr(25)
22387 .channels(channels)
22388 .width(3)
22389 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022390 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022391 }
22392 }
22393
22394 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmax) {
22395 TEST_REQUIRES_X86_AVX;
22396 for (size_t channels = 1; channels <= 40; channels += 7) {
22397 DWConvMicrokernelTester()
22398 .cr(8)
22399 .kr(25)
22400 .channels(channels)
22401 .width(3)
22402 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022403 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022404 }
22405 }
22406
22407 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, input_offset) {
22408 TEST_REQUIRES_X86_AVX;
22409 for (uint32_t channels = 16; channels < 128; channels += 24) {
22410 DWConvMicrokernelTester()
22411 .cr(8)
22412 .kr(25)
22413 .channels(channels)
22414 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080022415 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022416 }
22417 }
22418
22419 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, zero) {
22420 TEST_REQUIRES_X86_AVX;
22421 for (uint32_t mz = 0; mz < 25; mz++) {
22422 for (uint32_t channels = 16; channels < 128; channels += 24) {
22423 DWConvMicrokernelTester()
22424 .cr(8)
22425 .kr(25)
22426 .channels(channels)
22427 .input_offset(176)
22428 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022429 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022430 }
22431 }
22432 }
22433#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22434
22435
22436#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22437 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_eq_16) {
22438 TEST_REQUIRES_X86_AVX;
22439 DWConvMicrokernelTester()
22440 .cr(16)
22441 .kr(25)
22442 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080022443 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022444 }
22445
22446 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16) {
22447 TEST_REQUIRES_X86_AVX;
22448 for (uint32_t channels = 32; channels < 256; channels += 48) {
22449 DWConvMicrokernelTester()
22450 .cr(16)
22451 .kr(25)
22452 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022453 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022454 }
22455 }
22456
22457 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmin) {
22458 TEST_REQUIRES_X86_AVX;
22459 for (uint32_t channels = 32; channels < 256; channels += 48) {
22460 DWConvMicrokernelTester()
22461 .cr(16)
22462 .kr(25)
22463 .channels(channels)
22464 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022465 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022466 }
22467 }
22468
22469 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmax) {
22470 TEST_REQUIRES_X86_AVX;
22471 for (uint32_t channels = 32; channels < 256; channels += 48) {
22472 DWConvMicrokernelTester()
22473 .cr(16)
22474 .kr(25)
22475 .channels(channels)
22476 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022477 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022478 }
22479 }
22480
22481 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_lt_16) {
22482 TEST_REQUIRES_X86_AVX;
22483 for (uint32_t channels = 1; channels < 16; channels++) {
22484 DWConvMicrokernelTester()
22485 .cr(16)
22486 .kr(25)
22487 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022489 }
22490 }
22491
22492 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16) {
22493 TEST_REQUIRES_X86_AVX;
22494 for (uint32_t channels = 17; channels < 32; channels++) {
22495 DWConvMicrokernelTester()
22496 .cr(16)
22497 .kr(25)
22498 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022499 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022500 }
22501 }
22502
22503 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmin) {
22504 TEST_REQUIRES_X86_AVX;
22505 for (uint32_t channels = 17; channels < 32; channels++) {
22506 DWConvMicrokernelTester()
22507 .cr(16)
22508 .kr(25)
22509 .channels(channels)
22510 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022511 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022512 }
22513 }
22514
22515 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmax) {
22516 TEST_REQUIRES_X86_AVX;
22517 for (uint32_t channels = 17; channels < 32; channels++) {
22518 DWConvMicrokernelTester()
22519 .cr(16)
22520 .kr(25)
22521 .channels(channels)
22522 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022523 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022524 }
22525 }
22526
22527 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel) {
22528 TEST_REQUIRES_X86_AVX;
22529 for (size_t channels = 1; channels <= 80; channels += 15) {
22530 DWConvMicrokernelTester()
22531 .cr(16)
22532 .kr(25)
22533 .channels(channels)
22534 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022535 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022536 }
22537 }
22538
22539 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_step) {
22540 TEST_REQUIRES_X86_AVX;
22541 for (size_t channels = 1; channels <= 80; channels += 15) {
22542 for (size_t step = 2; step <= 25; step++) {
22543 DWConvMicrokernelTester()
22544 .cr(16)
22545 .kr(25)
22546 .channels(channels)
22547 .width(3)
22548 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022549 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022550 }
22551 }
22552 }
22553
22554 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_output_stride) {
22555 TEST_REQUIRES_X86_AVX;
22556 for (size_t channels = 1; channels <= 80; channels += 15) {
22557 DWConvMicrokernelTester()
22558 .cr(16)
22559 .kr(25)
22560 .channels(16)
22561 .width(5)
22562 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022563 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022564 }
22565 }
22566
22567 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmin) {
22568 TEST_REQUIRES_X86_AVX;
22569 for (size_t channels = 1; channels <= 80; channels += 15) {
22570 DWConvMicrokernelTester()
22571 .cr(16)
22572 .kr(25)
22573 .channels(channels)
22574 .width(3)
22575 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022576 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022577 }
22578 }
22579
22580 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmax) {
22581 TEST_REQUIRES_X86_AVX;
22582 for (size_t channels = 1; channels <= 80; channels += 15) {
22583 DWConvMicrokernelTester()
22584 .cr(16)
22585 .kr(25)
22586 .channels(channels)
22587 .width(3)
22588 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022589 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022590 }
22591 }
22592
22593 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, input_offset) {
22594 TEST_REQUIRES_X86_AVX;
22595 for (uint32_t channels = 32; channels < 256; channels += 48) {
22596 DWConvMicrokernelTester()
22597 .cr(16)
22598 .kr(25)
22599 .channels(channels)
22600 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080022601 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022602 }
22603 }
22604
22605 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, zero) {
22606 TEST_REQUIRES_X86_AVX;
22607 for (uint32_t mz = 0; mz < 25; mz++) {
22608 for (uint32_t channels = 32; channels < 256; channels += 48) {
22609 DWConvMicrokernelTester()
22610 .cr(16)
22611 .kr(25)
22612 .channels(channels)
22613 .input_offset(304)
22614 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022615 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022616 }
22617 }
22618 }
22619#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22620
22621
22622#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22623 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_eq_24) {
22624 TEST_REQUIRES_X86_AVX;
22625 DWConvMicrokernelTester()
22626 .cr(24)
22627 .kr(25)
22628 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080022629 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022630 }
22631
22632 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24) {
22633 TEST_REQUIRES_X86_AVX;
22634 for (uint32_t channels = 48; channels < 384; channels += 72) {
22635 DWConvMicrokernelTester()
22636 .cr(24)
22637 .kr(25)
22638 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022639 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022640 }
22641 }
22642
22643 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmin) {
22644 TEST_REQUIRES_X86_AVX;
22645 for (uint32_t channels = 48; channels < 384; channels += 72) {
22646 DWConvMicrokernelTester()
22647 .cr(24)
22648 .kr(25)
22649 .channels(channels)
22650 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022651 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022652 }
22653 }
22654
22655 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmax) {
22656 TEST_REQUIRES_X86_AVX;
22657 for (uint32_t channels = 48; channels < 384; channels += 72) {
22658 DWConvMicrokernelTester()
22659 .cr(24)
22660 .kr(25)
22661 .channels(channels)
22662 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022663 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022664 }
22665 }
22666
22667 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_lt_24) {
22668 TEST_REQUIRES_X86_AVX;
22669 for (uint32_t channels = 1; channels < 24; channels++) {
22670 DWConvMicrokernelTester()
22671 .cr(24)
22672 .kr(25)
22673 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022674 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022675 }
22676 }
22677
22678 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24) {
22679 TEST_REQUIRES_X86_AVX;
22680 for (uint32_t channels = 25; channels < 48; channels++) {
22681 DWConvMicrokernelTester()
22682 .cr(24)
22683 .kr(25)
22684 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022685 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022686 }
22687 }
22688
22689 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmin) {
22690 TEST_REQUIRES_X86_AVX;
22691 for (uint32_t channels = 25; channels < 48; channels++) {
22692 DWConvMicrokernelTester()
22693 .cr(24)
22694 .kr(25)
22695 .channels(channels)
22696 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022697 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022698 }
22699 }
22700
22701 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmax) {
22702 TEST_REQUIRES_X86_AVX;
22703 for (uint32_t channels = 25; channels < 48; channels++) {
22704 DWConvMicrokernelTester()
22705 .cr(24)
22706 .kr(25)
22707 .channels(channels)
22708 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022709 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022710 }
22711 }
22712
22713 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel) {
22714 TEST_REQUIRES_X86_AVX;
22715 for (size_t channels = 1; channels <= 120; channels += 23) {
22716 DWConvMicrokernelTester()
22717 .cr(24)
22718 .kr(25)
22719 .channels(channels)
22720 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022721 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022722 }
22723 }
22724
22725 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_step) {
22726 TEST_REQUIRES_X86_AVX;
22727 for (size_t channels = 1; channels <= 120; channels += 23) {
22728 for (size_t step = 2; step <= 25; step++) {
22729 DWConvMicrokernelTester()
22730 .cr(24)
22731 .kr(25)
22732 .channels(channels)
22733 .width(3)
22734 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022735 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022736 }
22737 }
22738 }
22739
22740 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_output_stride) {
22741 TEST_REQUIRES_X86_AVX;
22742 for (size_t channels = 1; channels <= 120; channels += 23) {
22743 DWConvMicrokernelTester()
22744 .cr(24)
22745 .kr(25)
22746 .channels(24)
22747 .width(5)
22748 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080022749 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022750 }
22751 }
22752
22753 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmin) {
22754 TEST_REQUIRES_X86_AVX;
22755 for (size_t channels = 1; channels <= 120; channels += 23) {
22756 DWConvMicrokernelTester()
22757 .cr(24)
22758 .kr(25)
22759 .channels(channels)
22760 .width(3)
22761 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022762 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022763 }
22764 }
22765
22766 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmax) {
22767 TEST_REQUIRES_X86_AVX;
22768 for (size_t channels = 1; channels <= 120; channels += 23) {
22769 DWConvMicrokernelTester()
22770 .cr(24)
22771 .kr(25)
22772 .channels(channels)
22773 .width(3)
22774 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022775 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022776 }
22777 }
22778
22779 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, input_offset) {
22780 TEST_REQUIRES_X86_AVX;
22781 for (uint32_t channels = 48; channels < 384; channels += 72) {
22782 DWConvMicrokernelTester()
22783 .cr(24)
22784 .kr(25)
22785 .channels(channels)
22786 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080022787 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022788 }
22789 }
22790
22791 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, zero) {
22792 TEST_REQUIRES_X86_AVX;
22793 for (uint32_t mz = 0; mz < 25; mz++) {
22794 for (uint32_t channels = 48; channels < 384; channels += 72) {
22795 DWConvMicrokernelTester()
22796 .cr(24)
22797 .kr(25)
22798 .channels(channels)
22799 .input_offset(464)
22800 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022801 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022802 }
22803 }
22804 }
22805#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22806
22807
22808#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22809 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_eq_8) {
22810 TEST_REQUIRES_X86_XOP;
22811 DWConvMicrokernelTester()
22812 .cr(8)
22813 .kr(25)
22814 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022815 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022816 }
22817
22818 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8) {
22819 TEST_REQUIRES_X86_XOP;
22820 for (uint32_t channels = 16; channels < 128; channels += 24) {
22821 DWConvMicrokernelTester()
22822 .cr(8)
22823 .kr(25)
22824 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022825 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022826 }
22827 }
22828
22829 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmin) {
22830 TEST_REQUIRES_X86_XOP;
22831 for (uint32_t channels = 16; channels < 128; channels += 24) {
22832 DWConvMicrokernelTester()
22833 .cr(8)
22834 .kr(25)
22835 .channels(channels)
22836 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022837 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022838 }
22839 }
22840
22841 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmax) {
22842 TEST_REQUIRES_X86_XOP;
22843 for (uint32_t channels = 16; channels < 128; channels += 24) {
22844 DWConvMicrokernelTester()
22845 .cr(8)
22846 .kr(25)
22847 .channels(channels)
22848 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022849 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022850 }
22851 }
22852
22853 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_lt_8) {
22854 TEST_REQUIRES_X86_XOP;
22855 for (uint32_t channels = 1; channels < 8; channels++) {
22856 DWConvMicrokernelTester()
22857 .cr(8)
22858 .kr(25)
22859 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022860 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022861 }
22862 }
22863
22864 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8) {
22865 TEST_REQUIRES_X86_XOP;
22866 for (uint32_t channels = 9; channels < 16; channels++) {
22867 DWConvMicrokernelTester()
22868 .cr(8)
22869 .kr(25)
22870 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080022871 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022872 }
22873 }
22874
22875 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmin) {
22876 TEST_REQUIRES_X86_XOP;
22877 for (uint32_t channels = 9; channels < 16; channels++) {
22878 DWConvMicrokernelTester()
22879 .cr(8)
22880 .kr(25)
22881 .channels(channels)
22882 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022883 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022884 }
22885 }
22886
22887 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmax) {
22888 TEST_REQUIRES_X86_XOP;
22889 for (uint32_t channels = 9; channels < 16; channels++) {
22890 DWConvMicrokernelTester()
22891 .cr(8)
22892 .kr(25)
22893 .channels(channels)
22894 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022895 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022896 }
22897 }
22898
22899 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel) {
22900 TEST_REQUIRES_X86_XOP;
22901 for (size_t channels = 1; channels <= 40; channels += 7) {
22902 DWConvMicrokernelTester()
22903 .cr(8)
22904 .kr(25)
22905 .channels(channels)
22906 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022907 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022908 }
22909 }
22910
22911 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_step) {
22912 TEST_REQUIRES_X86_XOP;
22913 for (size_t channels = 1; channels <= 40; channels += 7) {
22914 for (size_t step = 2; step <= 25; step++) {
22915 DWConvMicrokernelTester()
22916 .cr(8)
22917 .kr(25)
22918 .channels(channels)
22919 .width(3)
22920 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080022921 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022922 }
22923 }
22924 }
22925
22926 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_output_stride) {
22927 TEST_REQUIRES_X86_XOP;
22928 for (size_t channels = 1; channels <= 40; channels += 7) {
22929 DWConvMicrokernelTester()
22930 .cr(8)
22931 .kr(25)
22932 .channels(8)
22933 .width(5)
22934 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022935 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022936 }
22937 }
22938
22939 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmin) {
22940 TEST_REQUIRES_X86_XOP;
22941 for (size_t channels = 1; channels <= 40; channels += 7) {
22942 DWConvMicrokernelTester()
22943 .cr(8)
22944 .kr(25)
22945 .channels(channels)
22946 .width(3)
22947 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022948 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022949 }
22950 }
22951
22952 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmax) {
22953 TEST_REQUIRES_X86_XOP;
22954 for (size_t channels = 1; channels <= 40; channels += 7) {
22955 DWConvMicrokernelTester()
22956 .cr(8)
22957 .kr(25)
22958 .channels(channels)
22959 .width(3)
22960 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022961 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022962 }
22963 }
22964
22965 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, input_offset) {
22966 TEST_REQUIRES_X86_XOP;
22967 for (uint32_t channels = 16; channels < 128; channels += 24) {
22968 DWConvMicrokernelTester()
22969 .cr(8)
22970 .kr(25)
22971 .channels(channels)
22972 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080022973 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022974 }
22975 }
22976
22977 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, zero) {
22978 TEST_REQUIRES_X86_XOP;
22979 for (uint32_t mz = 0; mz < 25; mz++) {
22980 for (uint32_t channels = 16; channels < 128; channels += 24) {
22981 DWConvMicrokernelTester()
22982 .cr(8)
22983 .kr(25)
22984 .channels(channels)
22985 .input_offset(176)
22986 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022987 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070022988 }
22989 }
22990 }
22991#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22992
22993
22994#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22995 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_eq_16) {
22996 TEST_REQUIRES_X86_XOP;
22997 DWConvMicrokernelTester()
22998 .cr(16)
22999 .kr(25)
23000 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080023001 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023002 }
23003
23004 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16) {
23005 TEST_REQUIRES_X86_XOP;
23006 for (uint32_t channels = 32; channels < 256; channels += 48) {
23007 DWConvMicrokernelTester()
23008 .cr(16)
23009 .kr(25)
23010 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023011 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023012 }
23013 }
23014
23015 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmin) {
23016 TEST_REQUIRES_X86_XOP;
23017 for (uint32_t channels = 32; channels < 256; channels += 48) {
23018 DWConvMicrokernelTester()
23019 .cr(16)
23020 .kr(25)
23021 .channels(channels)
23022 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023023 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023024 }
23025 }
23026
23027 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmax) {
23028 TEST_REQUIRES_X86_XOP;
23029 for (uint32_t channels = 32; channels < 256; channels += 48) {
23030 DWConvMicrokernelTester()
23031 .cr(16)
23032 .kr(25)
23033 .channels(channels)
23034 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023035 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023036 }
23037 }
23038
23039 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_lt_16) {
23040 TEST_REQUIRES_X86_XOP;
23041 for (uint32_t channels = 1; channels < 16; channels++) {
23042 DWConvMicrokernelTester()
23043 .cr(16)
23044 .kr(25)
23045 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023046 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023047 }
23048 }
23049
23050 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16) {
23051 TEST_REQUIRES_X86_XOP;
23052 for (uint32_t channels = 17; channels < 32; channels++) {
23053 DWConvMicrokernelTester()
23054 .cr(16)
23055 .kr(25)
23056 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023057 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023058 }
23059 }
23060
23061 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmin) {
23062 TEST_REQUIRES_X86_XOP;
23063 for (uint32_t channels = 17; channels < 32; channels++) {
23064 DWConvMicrokernelTester()
23065 .cr(16)
23066 .kr(25)
23067 .channels(channels)
23068 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023069 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023070 }
23071 }
23072
23073 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmax) {
23074 TEST_REQUIRES_X86_XOP;
23075 for (uint32_t channels = 17; channels < 32; channels++) {
23076 DWConvMicrokernelTester()
23077 .cr(16)
23078 .kr(25)
23079 .channels(channels)
23080 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023081 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023082 }
23083 }
23084
23085 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel) {
23086 TEST_REQUIRES_X86_XOP;
23087 for (size_t channels = 1; channels <= 80; channels += 15) {
23088 DWConvMicrokernelTester()
23089 .cr(16)
23090 .kr(25)
23091 .channels(channels)
23092 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023093 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023094 }
23095 }
23096
23097 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_step) {
23098 TEST_REQUIRES_X86_XOP;
23099 for (size_t channels = 1; channels <= 80; channels += 15) {
23100 for (size_t step = 2; step <= 25; step++) {
23101 DWConvMicrokernelTester()
23102 .cr(16)
23103 .kr(25)
23104 .channels(channels)
23105 .width(3)
23106 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080023107 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023108 }
23109 }
23110 }
23111
23112 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_output_stride) {
23113 TEST_REQUIRES_X86_XOP;
23114 for (size_t channels = 1; channels <= 80; channels += 15) {
23115 DWConvMicrokernelTester()
23116 .cr(16)
23117 .kr(25)
23118 .channels(16)
23119 .width(5)
23120 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023121 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023122 }
23123 }
23124
23125 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmin) {
23126 TEST_REQUIRES_X86_XOP;
23127 for (size_t channels = 1; channels <= 80; channels += 15) {
23128 DWConvMicrokernelTester()
23129 .cr(16)
23130 .kr(25)
23131 .channels(channels)
23132 .width(3)
23133 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023134 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023135 }
23136 }
23137
23138 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmax) {
23139 TEST_REQUIRES_X86_XOP;
23140 for (size_t channels = 1; channels <= 80; channels += 15) {
23141 DWConvMicrokernelTester()
23142 .cr(16)
23143 .kr(25)
23144 .channels(channels)
23145 .width(3)
23146 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023147 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023148 }
23149 }
23150
23151 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, input_offset) {
23152 TEST_REQUIRES_X86_XOP;
23153 for (uint32_t channels = 32; channels < 256; channels += 48) {
23154 DWConvMicrokernelTester()
23155 .cr(16)
23156 .kr(25)
23157 .channels(channels)
23158 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080023159 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023160 }
23161 }
23162
23163 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, zero) {
23164 TEST_REQUIRES_X86_XOP;
23165 for (uint32_t mz = 0; mz < 25; mz++) {
23166 for (uint32_t channels = 32; channels < 256; channels += 48) {
23167 DWConvMicrokernelTester()
23168 .cr(16)
23169 .kr(25)
23170 .channels(channels)
23171 .input_offset(304)
23172 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080023173 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023174 }
23175 }
23176 }
23177#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23178
23179
23180#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23181 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_eq_24) {
23182 TEST_REQUIRES_X86_XOP;
23183 DWConvMicrokernelTester()
23184 .cr(24)
23185 .kr(25)
23186 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080023187 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023188 }
23189
23190 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24) {
23191 TEST_REQUIRES_X86_XOP;
23192 for (uint32_t channels = 48; channels < 384; channels += 72) {
23193 DWConvMicrokernelTester()
23194 .cr(24)
23195 .kr(25)
23196 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023197 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023198 }
23199 }
23200
23201 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmin) {
23202 TEST_REQUIRES_X86_XOP;
23203 for (uint32_t channels = 48; channels < 384; channels += 72) {
23204 DWConvMicrokernelTester()
23205 .cr(24)
23206 .kr(25)
23207 .channels(channels)
23208 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023209 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023210 }
23211 }
23212
23213 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmax) {
23214 TEST_REQUIRES_X86_XOP;
23215 for (uint32_t channels = 48; channels < 384; channels += 72) {
23216 DWConvMicrokernelTester()
23217 .cr(24)
23218 .kr(25)
23219 .channels(channels)
23220 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023221 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023222 }
23223 }
23224
23225 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_lt_24) {
23226 TEST_REQUIRES_X86_XOP;
23227 for (uint32_t channels = 1; channels < 24; channels++) {
23228 DWConvMicrokernelTester()
23229 .cr(24)
23230 .kr(25)
23231 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023232 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023233 }
23234 }
23235
23236 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24) {
23237 TEST_REQUIRES_X86_XOP;
23238 for (uint32_t channels = 25; channels < 48; channels++) {
23239 DWConvMicrokernelTester()
23240 .cr(24)
23241 .kr(25)
23242 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023243 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023244 }
23245 }
23246
23247 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmin) {
23248 TEST_REQUIRES_X86_XOP;
23249 for (uint32_t channels = 25; channels < 48; channels++) {
23250 DWConvMicrokernelTester()
23251 .cr(24)
23252 .kr(25)
23253 .channels(channels)
23254 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023255 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023256 }
23257 }
23258
23259 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmax) {
23260 TEST_REQUIRES_X86_XOP;
23261 for (uint32_t channels = 25; channels < 48; channels++) {
23262 DWConvMicrokernelTester()
23263 .cr(24)
23264 .kr(25)
23265 .channels(channels)
23266 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023267 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023268 }
23269 }
23270
23271 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel) {
23272 TEST_REQUIRES_X86_XOP;
23273 for (size_t channels = 1; channels <= 120; channels += 23) {
23274 DWConvMicrokernelTester()
23275 .cr(24)
23276 .kr(25)
23277 .channels(channels)
23278 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023279 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023280 }
23281 }
23282
23283 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_step) {
23284 TEST_REQUIRES_X86_XOP;
23285 for (size_t channels = 1; channels <= 120; channels += 23) {
23286 for (size_t step = 2; step <= 25; step++) {
23287 DWConvMicrokernelTester()
23288 .cr(24)
23289 .kr(25)
23290 .channels(channels)
23291 .width(3)
23292 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080023293 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023294 }
23295 }
23296 }
23297
23298 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_output_stride) {
23299 TEST_REQUIRES_X86_XOP;
23300 for (size_t channels = 1; channels <= 120; channels += 23) {
23301 DWConvMicrokernelTester()
23302 .cr(24)
23303 .kr(25)
23304 .channels(24)
23305 .width(5)
23306 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080023307 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023308 }
23309 }
23310
23311 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmin) {
23312 TEST_REQUIRES_X86_XOP;
23313 for (size_t channels = 1; channels <= 120; channels += 23) {
23314 DWConvMicrokernelTester()
23315 .cr(24)
23316 .kr(25)
23317 .channels(channels)
23318 .width(3)
23319 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023320 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023321 }
23322 }
23323
23324 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmax) {
23325 TEST_REQUIRES_X86_XOP;
23326 for (size_t channels = 1; channels <= 120; channels += 23) {
23327 DWConvMicrokernelTester()
23328 .cr(24)
23329 .kr(25)
23330 .channels(channels)
23331 .width(3)
23332 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023333 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023334 }
23335 }
23336
23337 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, input_offset) {
23338 TEST_REQUIRES_X86_XOP;
23339 for (uint32_t channels = 48; channels < 384; channels += 72) {
23340 DWConvMicrokernelTester()
23341 .cr(24)
23342 .kr(25)
23343 .channels(channels)
23344 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080023345 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023346 }
23347 }
23348
23349 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, zero) {
23350 TEST_REQUIRES_X86_XOP;
23351 for (uint32_t mz = 0; mz < 25; mz++) {
23352 for (uint32_t channels = 48; channels < 384; channels += 72) {
23353 DWConvMicrokernelTester()
23354 .cr(24)
23355 .kr(25)
23356 .channels(channels)
23357 .input_offset(464)
23358 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080023359 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_minmax_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070023360 }
23361 }
23362 }
23363#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23364
23365
23366#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan82286892021-06-04 17:27:27 -070023367 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_eq_8) {
23368 TEST_REQUIRES_X86_AVX2;
23369 DWConvMicrokernelTester()
23370 .cr(8)
23371 .kr(25)
23372 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023373 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023374 }
23375
23376 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8) {
23377 TEST_REQUIRES_X86_AVX2;
23378 for (uint32_t channels = 16; channels < 128; channels += 24) {
23379 DWConvMicrokernelTester()
23380 .cr(8)
23381 .kr(25)
23382 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023383 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023384 }
23385 }
23386
23387 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
23388 TEST_REQUIRES_X86_AVX2;
23389 for (uint32_t channels = 16; channels < 128; channels += 24) {
23390 DWConvMicrokernelTester()
23391 .cr(8)
23392 .kr(25)
23393 .channels(channels)
23394 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023395 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023396 }
23397 }
23398
23399 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
23400 TEST_REQUIRES_X86_AVX2;
23401 for (uint32_t channels = 16; channels < 128; channels += 24) {
23402 DWConvMicrokernelTester()
23403 .cr(8)
23404 .kr(25)
23405 .channels(channels)
23406 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023407 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023408 }
23409 }
23410
23411 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_lt_8) {
23412 TEST_REQUIRES_X86_AVX2;
23413 for (uint32_t channels = 1; channels < 8; channels++) {
23414 DWConvMicrokernelTester()
23415 .cr(8)
23416 .kr(25)
23417 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023418 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023419 }
23420 }
23421
23422 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8) {
23423 TEST_REQUIRES_X86_AVX2;
23424 for (uint32_t channels = 9; channels < 16; channels++) {
23425 DWConvMicrokernelTester()
23426 .cr(8)
23427 .kr(25)
23428 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023429 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023430 }
23431 }
23432
23433 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
23434 TEST_REQUIRES_X86_AVX2;
23435 for (uint32_t channels = 9; channels < 16; channels++) {
23436 DWConvMicrokernelTester()
23437 .cr(8)
23438 .kr(25)
23439 .channels(channels)
23440 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023441 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023442 }
23443 }
23444
23445 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
23446 TEST_REQUIRES_X86_AVX2;
23447 for (uint32_t channels = 9; channels < 16; channels++) {
23448 DWConvMicrokernelTester()
23449 .cr(8)
23450 .kr(25)
23451 .channels(channels)
23452 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023453 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023454 }
23455 }
23456
23457 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel) {
23458 TEST_REQUIRES_X86_AVX2;
23459 for (size_t channels = 1; channels <= 40; channels += 7) {
23460 DWConvMicrokernelTester()
23461 .cr(8)
23462 .kr(25)
23463 .channels(channels)
23464 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023465 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023466 }
23467 }
23468
23469 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_step) {
23470 TEST_REQUIRES_X86_AVX2;
23471 for (size_t channels = 1; channels <= 40; channels += 7) {
23472 for (size_t step = 2; step <= 25; step++) {
23473 DWConvMicrokernelTester()
23474 .cr(8)
23475 .kr(25)
23476 .channels(channels)
23477 .width(3)
23478 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080023479 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023480 }
23481 }
23482 }
23483
23484 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
23485 TEST_REQUIRES_X86_AVX2;
23486 for (size_t channels = 1; channels <= 40; channels += 7) {
23487 DWConvMicrokernelTester()
23488 .cr(8)
23489 .kr(25)
23490 .channels(8)
23491 .width(5)
23492 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023493 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023494 }
23495 }
23496
23497 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
23498 TEST_REQUIRES_X86_AVX2;
23499 for (size_t channels = 1; channels <= 40; channels += 7) {
23500 DWConvMicrokernelTester()
23501 .cr(8)
23502 .kr(25)
23503 .channels(channels)
23504 .width(3)
23505 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023506 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023507 }
23508 }
23509
23510 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
23511 TEST_REQUIRES_X86_AVX2;
23512 for (size_t channels = 1; channels <= 40; channels += 7) {
23513 DWConvMicrokernelTester()
23514 .cr(8)
23515 .kr(25)
23516 .channels(channels)
23517 .width(3)
23518 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023519 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023520 }
23521 }
23522
23523 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_offset) {
23524 TEST_REQUIRES_X86_AVX2;
23525 for (uint32_t channels = 16; channels < 128; channels += 24) {
23526 DWConvMicrokernelTester()
23527 .cr(8)
23528 .kr(25)
23529 .channels(channels)
23530 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080023531 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023532 }
23533 }
23534
23535 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, zero) {
23536 TEST_REQUIRES_X86_AVX2;
23537 for (uint32_t mz = 0; mz < 25; mz++) {
23538 for (uint32_t channels = 16; channels < 128; channels += 24) {
23539 DWConvMicrokernelTester()
23540 .cr(8)
23541 .kr(25)
23542 .channels(channels)
23543 .input_offset(176)
23544 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080023545 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023546 }
23547 }
23548 }
23549#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23550
23551
23552#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23553 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_eq_16) {
23554 TEST_REQUIRES_X86_AVX2;
23555 DWConvMicrokernelTester()
23556 .cr(16)
23557 .kr(25)
23558 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080023559 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023560 }
23561
23562 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16) {
23563 TEST_REQUIRES_X86_AVX2;
23564 for (uint32_t channels = 32; channels < 256; channels += 48) {
23565 DWConvMicrokernelTester()
23566 .cr(16)
23567 .kr(25)
23568 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023569 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023570 }
23571 }
23572
23573 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
23574 TEST_REQUIRES_X86_AVX2;
23575 for (uint32_t channels = 32; channels < 256; channels += 48) {
23576 DWConvMicrokernelTester()
23577 .cr(16)
23578 .kr(25)
23579 .channels(channels)
23580 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023581 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023582 }
23583 }
23584
23585 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
23586 TEST_REQUIRES_X86_AVX2;
23587 for (uint32_t channels = 32; channels < 256; channels += 48) {
23588 DWConvMicrokernelTester()
23589 .cr(16)
23590 .kr(25)
23591 .channels(channels)
23592 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023593 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023594 }
23595 }
23596
23597 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_lt_16) {
23598 TEST_REQUIRES_X86_AVX2;
23599 for (uint32_t channels = 1; channels < 16; channels++) {
23600 DWConvMicrokernelTester()
23601 .cr(16)
23602 .kr(25)
23603 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023604 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023605 }
23606 }
23607
23608 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16) {
23609 TEST_REQUIRES_X86_AVX2;
23610 for (uint32_t channels = 17; channels < 32; channels++) {
23611 DWConvMicrokernelTester()
23612 .cr(16)
23613 .kr(25)
23614 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023615 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023616 }
23617 }
23618
23619 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
23620 TEST_REQUIRES_X86_AVX2;
23621 for (uint32_t channels = 17; channels < 32; channels++) {
23622 DWConvMicrokernelTester()
23623 .cr(16)
23624 .kr(25)
23625 .channels(channels)
23626 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023627 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023628 }
23629 }
23630
23631 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
23632 TEST_REQUIRES_X86_AVX2;
23633 for (uint32_t channels = 17; channels < 32; channels++) {
23634 DWConvMicrokernelTester()
23635 .cr(16)
23636 .kr(25)
23637 .channels(channels)
23638 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023639 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023640 }
23641 }
23642
23643 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel) {
23644 TEST_REQUIRES_X86_AVX2;
23645 for (size_t channels = 1; channels <= 80; channels += 15) {
23646 DWConvMicrokernelTester()
23647 .cr(16)
23648 .kr(25)
23649 .channels(channels)
23650 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023651 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023652 }
23653 }
23654
23655 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_step) {
23656 TEST_REQUIRES_X86_AVX2;
23657 for (size_t channels = 1; channels <= 80; channels += 15) {
23658 for (size_t step = 2; step <= 25; step++) {
23659 DWConvMicrokernelTester()
23660 .cr(16)
23661 .kr(25)
23662 .channels(channels)
23663 .width(3)
23664 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080023665 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023666 }
23667 }
23668 }
23669
23670 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
23671 TEST_REQUIRES_X86_AVX2;
23672 for (size_t channels = 1; channels <= 80; channels += 15) {
23673 DWConvMicrokernelTester()
23674 .cr(16)
23675 .kr(25)
23676 .channels(16)
23677 .width(5)
23678 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023679 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023680 }
23681 }
23682
23683 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
23684 TEST_REQUIRES_X86_AVX2;
23685 for (size_t channels = 1; channels <= 80; channels += 15) {
23686 DWConvMicrokernelTester()
23687 .cr(16)
23688 .kr(25)
23689 .channels(channels)
23690 .width(3)
23691 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023692 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023693 }
23694 }
23695
23696 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
23697 TEST_REQUIRES_X86_AVX2;
23698 for (size_t channels = 1; channels <= 80; channels += 15) {
23699 DWConvMicrokernelTester()
23700 .cr(16)
23701 .kr(25)
23702 .channels(channels)
23703 .width(3)
23704 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023705 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023706 }
23707 }
23708
23709 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_offset) {
23710 TEST_REQUIRES_X86_AVX2;
23711 for (uint32_t channels = 32; channels < 256; channels += 48) {
23712 DWConvMicrokernelTester()
23713 .cr(16)
23714 .kr(25)
23715 .channels(channels)
23716 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080023717 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023718 }
23719 }
23720
23721 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, zero) {
23722 TEST_REQUIRES_X86_AVX2;
23723 for (uint32_t mz = 0; mz < 25; mz++) {
23724 for (uint32_t channels = 32; channels < 256; channels += 48) {
23725 DWConvMicrokernelTester()
23726 .cr(16)
23727 .kr(25)
23728 .channels(channels)
23729 .input_offset(304)
23730 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080023731 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023732 }
23733 }
23734 }
23735#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23736
23737
23738#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23739 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_eq_24) {
23740 TEST_REQUIRES_X86_AVX2;
23741 DWConvMicrokernelTester()
23742 .cr(24)
23743 .kr(25)
23744 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080023745 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023746 }
23747
23748 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24) {
23749 TEST_REQUIRES_X86_AVX2;
23750 for (uint32_t channels = 48; channels < 384; channels += 72) {
23751 DWConvMicrokernelTester()
23752 .cr(24)
23753 .kr(25)
23754 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023755 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023756 }
23757 }
23758
23759 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmin) {
23760 TEST_REQUIRES_X86_AVX2;
23761 for (uint32_t channels = 48; channels < 384; channels += 72) {
23762 DWConvMicrokernelTester()
23763 .cr(24)
23764 .kr(25)
23765 .channels(channels)
23766 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023767 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023768 }
23769 }
23770
23771 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmax) {
23772 TEST_REQUIRES_X86_AVX2;
23773 for (uint32_t channels = 48; channels < 384; channels += 72) {
23774 DWConvMicrokernelTester()
23775 .cr(24)
23776 .kr(25)
23777 .channels(channels)
23778 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023779 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023780 }
23781 }
23782
23783 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_lt_24) {
23784 TEST_REQUIRES_X86_AVX2;
23785 for (uint32_t channels = 1; channels < 24; channels++) {
23786 DWConvMicrokernelTester()
23787 .cr(24)
23788 .kr(25)
23789 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023790 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023791 }
23792 }
23793
23794 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24) {
23795 TEST_REQUIRES_X86_AVX2;
23796 for (uint32_t channels = 25; channels < 48; channels++) {
23797 DWConvMicrokernelTester()
23798 .cr(24)
23799 .kr(25)
23800 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023801 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023802 }
23803 }
23804
23805 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmin) {
23806 TEST_REQUIRES_X86_AVX2;
23807 for (uint32_t channels = 25; channels < 48; channels++) {
23808 DWConvMicrokernelTester()
23809 .cr(24)
23810 .kr(25)
23811 .channels(channels)
23812 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023813 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023814 }
23815 }
23816
23817 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmax) {
23818 TEST_REQUIRES_X86_AVX2;
23819 for (uint32_t channels = 25; channels < 48; channels++) {
23820 DWConvMicrokernelTester()
23821 .cr(24)
23822 .kr(25)
23823 .channels(channels)
23824 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023825 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023826 }
23827 }
23828
23829 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel) {
23830 TEST_REQUIRES_X86_AVX2;
23831 for (size_t channels = 1; channels <= 120; channels += 23) {
23832 DWConvMicrokernelTester()
23833 .cr(24)
23834 .kr(25)
23835 .channels(channels)
23836 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023837 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023838 }
23839 }
23840
23841 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_step) {
23842 TEST_REQUIRES_X86_AVX2;
23843 for (size_t channels = 1; channels <= 120; channels += 23) {
23844 for (size_t step = 2; step <= 25; step++) {
23845 DWConvMicrokernelTester()
23846 .cr(24)
23847 .kr(25)
23848 .channels(channels)
23849 .width(3)
23850 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080023851 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023852 }
23853 }
23854 }
23855
23856 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_output_stride) {
23857 TEST_REQUIRES_X86_AVX2;
23858 for (size_t channels = 1; channels <= 120; channels += 23) {
23859 DWConvMicrokernelTester()
23860 .cr(24)
23861 .kr(25)
23862 .channels(24)
23863 .width(5)
23864 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080023865 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023866 }
23867 }
23868
23869 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmin) {
23870 TEST_REQUIRES_X86_AVX2;
23871 for (size_t channels = 1; channels <= 120; channels += 23) {
23872 DWConvMicrokernelTester()
23873 .cr(24)
23874 .kr(25)
23875 .channels(channels)
23876 .width(3)
23877 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023878 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023879 }
23880 }
23881
23882 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmax) {
23883 TEST_REQUIRES_X86_AVX2;
23884 for (size_t channels = 1; channels <= 120; channels += 23) {
23885 DWConvMicrokernelTester()
23886 .cr(24)
23887 .kr(25)
23888 .channels(channels)
23889 .width(3)
23890 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023891 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023892 }
23893 }
23894
23895 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, input_offset) {
23896 TEST_REQUIRES_X86_AVX2;
23897 for (uint32_t channels = 48; channels < 384; channels += 72) {
23898 DWConvMicrokernelTester()
23899 .cr(24)
23900 .kr(25)
23901 .channels(channels)
23902 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080023903 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023904 }
23905 }
23906
23907 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, zero) {
23908 TEST_REQUIRES_X86_AVX2;
23909 for (uint32_t mz = 0; mz < 25; mz++) {
23910 for (uint32_t channels = 48; channels < 384; channels += 72) {
23911 DWConvMicrokernelTester()
23912 .cr(24)
23913 .kr(25)
23914 .channels(channels)
23915 .input_offset(464)
23916 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080023917 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023918 }
23919 }
23920 }
23921#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23922
23923
23924#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23925 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_eq_32) {
23926 TEST_REQUIRES_X86_AVX2;
23927 DWConvMicrokernelTester()
23928 .cr(32)
23929 .kr(25)
23930 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080023931 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023932 }
23933
23934 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32) {
23935 TEST_REQUIRES_X86_AVX2;
23936 for (uint32_t channels = 64; channels < 512; channels += 96) {
23937 DWConvMicrokernelTester()
23938 .cr(32)
23939 .kr(25)
23940 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023941 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023942 }
23943 }
23944
23945 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
23946 TEST_REQUIRES_X86_AVX2;
23947 for (uint32_t channels = 64; channels < 512; channels += 96) {
23948 DWConvMicrokernelTester()
23949 .cr(32)
23950 .kr(25)
23951 .channels(channels)
23952 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023953 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023954 }
23955 }
23956
23957 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
23958 TEST_REQUIRES_X86_AVX2;
23959 for (uint32_t channels = 64; channels < 512; channels += 96) {
23960 DWConvMicrokernelTester()
23961 .cr(32)
23962 .kr(25)
23963 .channels(channels)
23964 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023965 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023966 }
23967 }
23968
23969 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_lt_32) {
23970 TEST_REQUIRES_X86_AVX2;
23971 for (uint32_t channels = 1; channels < 32; channels++) {
23972 DWConvMicrokernelTester()
23973 .cr(32)
23974 .kr(25)
23975 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023976 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023977 }
23978 }
23979
23980 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32) {
23981 TEST_REQUIRES_X86_AVX2;
23982 for (uint32_t channels = 33; channels < 64; channels++) {
23983 DWConvMicrokernelTester()
23984 .cr(32)
23985 .kr(25)
23986 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080023987 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070023988 }
23989 }
23990
23991 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
23992 TEST_REQUIRES_X86_AVX2;
23993 for (uint32_t channels = 33; channels < 64; channels++) {
23994 DWConvMicrokernelTester()
23995 .cr(32)
23996 .kr(25)
23997 .channels(channels)
23998 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023999 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070024000 }
24001 }
24002
24003 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
24004 TEST_REQUIRES_X86_AVX2;
24005 for (uint32_t channels = 33; channels < 64; channels++) {
24006 DWConvMicrokernelTester()
24007 .cr(32)
24008 .kr(25)
24009 .channels(channels)
24010 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024011 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070024012 }
24013 }
24014
24015 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel) {
24016 TEST_REQUIRES_X86_AVX2;
24017 for (size_t channels = 1; channels <= 160; channels += 31) {
24018 DWConvMicrokernelTester()
24019 .cr(32)
24020 .kr(25)
24021 .channels(channels)
24022 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024023 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070024024 }
24025 }
24026
24027 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_step) {
24028 TEST_REQUIRES_X86_AVX2;
24029 for (size_t channels = 1; channels <= 160; channels += 31) {
24030 for (size_t step = 2; step <= 25; step++) {
24031 DWConvMicrokernelTester()
24032 .cr(32)
24033 .kr(25)
24034 .channels(channels)
24035 .width(3)
24036 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080024037 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070024038 }
24039 }
24040 }
24041
24042 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
24043 TEST_REQUIRES_X86_AVX2;
24044 for (size_t channels = 1; channels <= 160; channels += 31) {
24045 DWConvMicrokernelTester()
24046 .cr(32)
24047 .kr(25)
24048 .channels(32)
24049 .width(5)
24050 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080024051 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070024052 }
24053 }
24054
24055 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
24056 TEST_REQUIRES_X86_AVX2;
24057 for (size_t channels = 1; channels <= 160; channels += 31) {
24058 DWConvMicrokernelTester()
24059 .cr(32)
24060 .kr(25)
24061 .channels(channels)
24062 .width(3)
24063 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024064 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070024065 }
24066 }
24067
24068 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
24069 TEST_REQUIRES_X86_AVX2;
24070 for (size_t channels = 1; channels <= 160; channels += 31) {
24071 DWConvMicrokernelTester()
24072 .cr(32)
24073 .kr(25)
24074 .channels(channels)
24075 .width(3)
24076 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024077 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070024078 }
24079 }
24080
24081 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_offset) {
24082 TEST_REQUIRES_X86_AVX2;
24083 for (uint32_t channels = 64; channels < 512; channels += 96) {
24084 DWConvMicrokernelTester()
24085 .cr(32)
24086 .kr(25)
24087 .channels(channels)
24088 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080024089 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070024090 }
24091 }
24092
24093 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, zero) {
24094 TEST_REQUIRES_X86_AVX2;
24095 for (uint32_t mz = 0; mz < 25; mz++) {
24096 for (uint32_t channels = 64; channels < 512; channels += 96) {
24097 DWConvMicrokernelTester()
24098 .cr(32)
24099 .kr(25)
24100 .channels(channels)
24101 .input_offset(592)
24102 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080024103 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_minmax_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan82286892021-06-04 17:27:27 -070024104 }
24105 }
24106 }
24107#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan98042f22021-06-15 00:43:13 -070024108
24109
24110#if XNN_ARCH_X86 || XNN_ARCH_X86_64
24111 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_eq_16) {
24112 TEST_REQUIRES_X86_AVX512SKX;
24113 DWConvMicrokernelTester()
24114 .cr(16)
24115 .kr(25)
24116 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080024117 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024118 }
24119
24120 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16) {
24121 TEST_REQUIRES_X86_AVX512SKX;
24122 for (uint32_t channels = 32; channels < 256; channels += 48) {
24123 DWConvMicrokernelTester()
24124 .cr(16)
24125 .kr(25)
24126 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024127 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024128 }
24129 }
24130
24131 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
24132 TEST_REQUIRES_X86_AVX512SKX;
24133 for (uint32_t channels = 32; channels < 256; channels += 48) {
24134 DWConvMicrokernelTester()
24135 .cr(16)
24136 .kr(25)
24137 .channels(channels)
24138 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024139 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024140 }
24141 }
24142
24143 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
24144 TEST_REQUIRES_X86_AVX512SKX;
24145 for (uint32_t channels = 32; channels < 256; channels += 48) {
24146 DWConvMicrokernelTester()
24147 .cr(16)
24148 .kr(25)
24149 .channels(channels)
24150 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024151 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024152 }
24153 }
24154
24155 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_lt_16) {
24156 TEST_REQUIRES_X86_AVX512SKX;
24157 for (uint32_t channels = 1; channels < 16; channels++) {
24158 DWConvMicrokernelTester()
24159 .cr(16)
24160 .kr(25)
24161 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024162 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024163 }
24164 }
24165
24166 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16) {
24167 TEST_REQUIRES_X86_AVX512SKX;
24168 for (uint32_t channels = 17; channels < 32; channels++) {
24169 DWConvMicrokernelTester()
24170 .cr(16)
24171 .kr(25)
24172 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024173 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024174 }
24175 }
24176
24177 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
24178 TEST_REQUIRES_X86_AVX512SKX;
24179 for (uint32_t channels = 17; channels < 32; channels++) {
24180 DWConvMicrokernelTester()
24181 .cr(16)
24182 .kr(25)
24183 .channels(channels)
24184 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024185 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024186 }
24187 }
24188
24189 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
24190 TEST_REQUIRES_X86_AVX512SKX;
24191 for (uint32_t channels = 17; channels < 32; channels++) {
24192 DWConvMicrokernelTester()
24193 .cr(16)
24194 .kr(25)
24195 .channels(channels)
24196 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024197 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024198 }
24199 }
24200
24201 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel) {
24202 TEST_REQUIRES_X86_AVX512SKX;
24203 for (size_t channels = 1; channels <= 80; channels += 15) {
24204 DWConvMicrokernelTester()
24205 .cr(16)
24206 .kr(25)
24207 .channels(channels)
24208 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024209 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024210 }
24211 }
24212
24213 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
24214 TEST_REQUIRES_X86_AVX512SKX;
24215 for (size_t channels = 1; channels <= 80; channels += 15) {
24216 for (size_t step = 2; step <= 25; step++) {
24217 DWConvMicrokernelTester()
24218 .cr(16)
24219 .kr(25)
24220 .channels(channels)
24221 .width(3)
24222 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080024223 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024224 }
24225 }
24226 }
24227
24228 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
24229 TEST_REQUIRES_X86_AVX512SKX;
24230 for (size_t channels = 1; channels <= 80; channels += 15) {
24231 DWConvMicrokernelTester()
24232 .cr(16)
24233 .kr(25)
24234 .channels(16)
24235 .width(5)
24236 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080024237 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024238 }
24239 }
24240
24241 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
24242 TEST_REQUIRES_X86_AVX512SKX;
24243 for (size_t channels = 1; channels <= 80; channels += 15) {
24244 DWConvMicrokernelTester()
24245 .cr(16)
24246 .kr(25)
24247 .channels(channels)
24248 .width(3)
24249 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024250 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024251 }
24252 }
24253
24254 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
24255 TEST_REQUIRES_X86_AVX512SKX;
24256 for (size_t channels = 1; channels <= 80; channels += 15) {
24257 DWConvMicrokernelTester()
24258 .cr(16)
24259 .kr(25)
24260 .channels(channels)
24261 .width(3)
24262 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024263 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024264 }
24265 }
24266
24267 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_offset) {
24268 TEST_REQUIRES_X86_AVX512SKX;
24269 for (uint32_t channels = 32; channels < 256; channels += 48) {
24270 DWConvMicrokernelTester()
24271 .cr(16)
24272 .kr(25)
24273 .channels(channels)
24274 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080024275 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024276 }
24277 }
24278
24279 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, zero) {
24280 TEST_REQUIRES_X86_AVX512SKX;
24281 for (uint32_t mz = 0; mz < 25; mz++) {
24282 for (uint32_t channels = 32; channels < 256; channels += 48) {
24283 DWConvMicrokernelTester()
24284 .cr(16)
24285 .kr(25)
24286 .channels(channels)
24287 .input_offset(304)
24288 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080024289 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024290 }
24291 }
24292 }
24293#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24294
24295
24296#if XNN_ARCH_X86 || XNN_ARCH_X86_64
24297 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_eq_32) {
24298 TEST_REQUIRES_X86_AVX512SKX;
24299 DWConvMicrokernelTester()
24300 .cr(32)
24301 .kr(25)
24302 .channels(32)
Marat Dukhan50323b82022-01-11 00:12:01 -080024303 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024304 }
24305
24306 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32) {
24307 TEST_REQUIRES_X86_AVX512SKX;
24308 for (uint32_t channels = 64; channels < 512; channels += 96) {
24309 DWConvMicrokernelTester()
24310 .cr(32)
24311 .kr(25)
24312 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024313 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024314 }
24315 }
24316
24317 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
24318 TEST_REQUIRES_X86_AVX512SKX;
24319 for (uint32_t channels = 64; channels < 512; channels += 96) {
24320 DWConvMicrokernelTester()
24321 .cr(32)
24322 .kr(25)
24323 .channels(channels)
24324 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024325 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024326 }
24327 }
24328
24329 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
24330 TEST_REQUIRES_X86_AVX512SKX;
24331 for (uint32_t channels = 64; channels < 512; channels += 96) {
24332 DWConvMicrokernelTester()
24333 .cr(32)
24334 .kr(25)
24335 .channels(channels)
24336 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024337 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024338 }
24339 }
24340
24341 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_lt_32) {
24342 TEST_REQUIRES_X86_AVX512SKX;
24343 for (uint32_t channels = 1; channels < 32; channels++) {
24344 DWConvMicrokernelTester()
24345 .cr(32)
24346 .kr(25)
24347 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024348 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024349 }
24350 }
24351
24352 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32) {
24353 TEST_REQUIRES_X86_AVX512SKX;
24354 for (uint32_t channels = 33; channels < 64; channels++) {
24355 DWConvMicrokernelTester()
24356 .cr(32)
24357 .kr(25)
24358 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024359 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024360 }
24361 }
24362
24363 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
24364 TEST_REQUIRES_X86_AVX512SKX;
24365 for (uint32_t channels = 33; channels < 64; channels++) {
24366 DWConvMicrokernelTester()
24367 .cr(32)
24368 .kr(25)
24369 .channels(channels)
24370 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024371 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024372 }
24373 }
24374
24375 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
24376 TEST_REQUIRES_X86_AVX512SKX;
24377 for (uint32_t channels = 33; channels < 64; channels++) {
24378 DWConvMicrokernelTester()
24379 .cr(32)
24380 .kr(25)
24381 .channels(channels)
24382 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024383 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024384 }
24385 }
24386
24387 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel) {
24388 TEST_REQUIRES_X86_AVX512SKX;
24389 for (size_t channels = 1; channels <= 160; channels += 31) {
24390 DWConvMicrokernelTester()
24391 .cr(32)
24392 .kr(25)
24393 .channels(channels)
24394 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024395 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024396 }
24397 }
24398
24399 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
24400 TEST_REQUIRES_X86_AVX512SKX;
24401 for (size_t channels = 1; channels <= 160; channels += 31) {
24402 for (size_t step = 2; step <= 25; step++) {
24403 DWConvMicrokernelTester()
24404 .cr(32)
24405 .kr(25)
24406 .channels(channels)
24407 .width(3)
24408 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080024409 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024410 }
24411 }
24412 }
24413
24414 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
24415 TEST_REQUIRES_X86_AVX512SKX;
24416 for (size_t channels = 1; channels <= 160; channels += 31) {
24417 DWConvMicrokernelTester()
24418 .cr(32)
24419 .kr(25)
24420 .channels(32)
24421 .width(5)
24422 .output_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080024423 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024424 }
24425 }
24426
24427 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
24428 TEST_REQUIRES_X86_AVX512SKX;
24429 for (size_t channels = 1; channels <= 160; channels += 31) {
24430 DWConvMicrokernelTester()
24431 .cr(32)
24432 .kr(25)
24433 .channels(channels)
24434 .width(3)
24435 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024436 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024437 }
24438 }
24439
24440 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
24441 TEST_REQUIRES_X86_AVX512SKX;
24442 for (size_t channels = 1; channels <= 160; channels += 31) {
24443 DWConvMicrokernelTester()
24444 .cr(32)
24445 .kr(25)
24446 .channels(channels)
24447 .width(3)
24448 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024449 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024450 }
24451 }
24452
24453 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_offset) {
24454 TEST_REQUIRES_X86_AVX512SKX;
24455 for (uint32_t channels = 64; channels < 512; channels += 96) {
24456 DWConvMicrokernelTester()
24457 .cr(32)
24458 .kr(25)
24459 .channels(channels)
24460 .input_offset(592)
Marat Dukhan50323b82022-01-11 00:12:01 -080024461 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024462 }
24463 }
24464
24465 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, zero) {
24466 TEST_REQUIRES_X86_AVX512SKX;
24467 for (uint32_t mz = 0; mz < 25; mz++) {
24468 for (uint32_t channels = 64; channels < 512; channels += 96) {
24469 DWConvMicrokernelTester()
24470 .cr(32)
24471 .kr(25)
24472 .channels(channels)
24473 .input_offset(592)
24474 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080024475 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_minmax_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan98042f22021-06-15 00:43:13 -070024476 }
24477 }
24478 }
24479#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan313eef72021-06-30 16:11:31 -070024480
24481
Marat Dukhan4c617792021-12-21 15:47:58 -080024482#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070024483 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_eq_8) {
24484 DWConvMicrokernelTester()
24485 .cr(8)
24486 .kr(25)
24487 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080024488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024489 }
24490
24491 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8) {
24492 for (uint32_t channels = 16; channels < 128; channels += 24) {
24493 DWConvMicrokernelTester()
24494 .cr(8)
24495 .kr(25)
24496 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024497 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024498 }
24499 }
24500
24501 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmin) {
24502 for (uint32_t channels = 16; channels < 128; channels += 24) {
24503 DWConvMicrokernelTester()
24504 .cr(8)
24505 .kr(25)
24506 .channels(channels)
24507 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024508 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024509 }
24510 }
24511
24512 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmax) {
24513 for (uint32_t channels = 16; channels < 128; channels += 24) {
24514 DWConvMicrokernelTester()
24515 .cr(8)
24516 .kr(25)
24517 .channels(channels)
24518 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024519 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024520 }
24521 }
24522
24523 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_lt_8) {
24524 for (uint32_t channels = 1; channels < 8; channels++) {
24525 DWConvMicrokernelTester()
24526 .cr(8)
24527 .kr(25)
24528 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024529 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024530 }
24531 }
24532
24533 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8) {
24534 for (uint32_t channels = 9; channels < 16; channels++) {
24535 DWConvMicrokernelTester()
24536 .cr(8)
24537 .kr(25)
24538 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024539 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024540 }
24541 }
24542
24543 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmin) {
24544 for (uint32_t channels = 9; channels < 16; channels++) {
24545 DWConvMicrokernelTester()
24546 .cr(8)
24547 .kr(25)
24548 .channels(channels)
24549 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024551 }
24552 }
24553
24554 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmax) {
24555 for (uint32_t channels = 9; channels < 16; channels++) {
24556 DWConvMicrokernelTester()
24557 .cr(8)
24558 .kr(25)
24559 .channels(channels)
24560 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024561 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024562 }
24563 }
24564
24565 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel) {
24566 for (size_t channels = 1; channels <= 40; channels += 7) {
24567 DWConvMicrokernelTester()
24568 .cr(8)
24569 .kr(25)
24570 .channels(channels)
24571 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024573 }
24574 }
24575
24576 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_step) {
24577 for (size_t channels = 1; channels <= 40; channels += 7) {
24578 for (size_t step = 2; step <= 25; step++) {
24579 DWConvMicrokernelTester()
24580 .cr(8)
24581 .kr(25)
24582 .channels(channels)
24583 .width(3)
24584 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080024585 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024586 }
24587 }
24588 }
24589
24590 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
24591 for (size_t channels = 1; channels <= 40; channels += 7) {
24592 DWConvMicrokernelTester()
24593 .cr(8)
24594 .kr(25)
24595 .channels(8)
24596 .width(5)
24597 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024599 }
24600 }
24601
24602 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmin) {
24603 for (size_t channels = 1; channels <= 40; channels += 7) {
24604 DWConvMicrokernelTester()
24605 .cr(8)
24606 .kr(25)
24607 .channels(channels)
24608 .width(3)
24609 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024610 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024611 }
24612 }
24613
24614 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmax) {
24615 for (size_t channels = 1; channels <= 40; channels += 7) {
24616 DWConvMicrokernelTester()
24617 .cr(8)
24618 .kr(25)
24619 .channels(channels)
24620 .width(3)
24621 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024622 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024623 }
24624 }
24625
24626 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, input_offset) {
24627 for (uint32_t channels = 16; channels < 128; channels += 24) {
24628 DWConvMicrokernelTester()
24629 .cr(8)
24630 .kr(25)
24631 .channels(channels)
24632 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080024633 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024634 }
24635 }
24636
24637 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, zero) {
24638 for (uint32_t mz = 0; mz < 25; mz++) {
24639 for (uint32_t channels = 16; channels < 128; channels += 24) {
24640 DWConvMicrokernelTester()
24641 .cr(8)
24642 .kr(25)
24643 .channels(channels)
24644 .input_offset(176)
24645 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080024646 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024647 }
24648 }
24649 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024650#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070024651
24652
Marat Dukhan4c617792021-12-21 15:47:58 -080024653#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070024654 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_eq_16) {
24655 DWConvMicrokernelTester()
24656 .cr(16)
24657 .kr(25)
24658 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080024659 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024660 }
24661
24662 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16) {
24663 for (uint32_t channels = 32; channels < 256; channels += 48) {
24664 DWConvMicrokernelTester()
24665 .cr(16)
24666 .kr(25)
24667 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024668 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024669 }
24670 }
24671
24672 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmin) {
24673 for (uint32_t channels = 32; channels < 256; channels += 48) {
24674 DWConvMicrokernelTester()
24675 .cr(16)
24676 .kr(25)
24677 .channels(channels)
24678 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024679 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024680 }
24681 }
24682
24683 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmax) {
24684 for (uint32_t channels = 32; channels < 256; channels += 48) {
24685 DWConvMicrokernelTester()
24686 .cr(16)
24687 .kr(25)
24688 .channels(channels)
24689 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024691 }
24692 }
24693
24694 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_lt_16) {
24695 for (uint32_t channels = 1; channels < 16; channels++) {
24696 DWConvMicrokernelTester()
24697 .cr(16)
24698 .kr(25)
24699 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024700 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024701 }
24702 }
24703
24704 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16) {
24705 for (uint32_t channels = 17; channels < 32; channels++) {
24706 DWConvMicrokernelTester()
24707 .cr(16)
24708 .kr(25)
24709 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024710 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024711 }
24712 }
24713
24714 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmin) {
24715 for (uint32_t channels = 17; channels < 32; channels++) {
24716 DWConvMicrokernelTester()
24717 .cr(16)
24718 .kr(25)
24719 .channels(channels)
24720 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024721 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024722 }
24723 }
24724
24725 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmax) {
24726 for (uint32_t channels = 17; channels < 32; channels++) {
24727 DWConvMicrokernelTester()
24728 .cr(16)
24729 .kr(25)
24730 .channels(channels)
24731 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024732 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024733 }
24734 }
24735
24736 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel) {
24737 for (size_t channels = 1; channels <= 80; channels += 15) {
24738 DWConvMicrokernelTester()
24739 .cr(16)
24740 .kr(25)
24741 .channels(channels)
24742 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024743 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024744 }
24745 }
24746
24747 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_step) {
24748 for (size_t channels = 1; channels <= 80; channels += 15) {
24749 for (size_t step = 2; step <= 25; step++) {
24750 DWConvMicrokernelTester()
24751 .cr(16)
24752 .kr(25)
24753 .channels(channels)
24754 .width(3)
24755 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080024756 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024757 }
24758 }
24759 }
24760
24761 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
24762 for (size_t channels = 1; channels <= 80; channels += 15) {
24763 DWConvMicrokernelTester()
24764 .cr(16)
24765 .kr(25)
24766 .channels(16)
24767 .width(5)
24768 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080024769 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024770 }
24771 }
24772
24773 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmin) {
24774 for (size_t channels = 1; channels <= 80; channels += 15) {
24775 DWConvMicrokernelTester()
24776 .cr(16)
24777 .kr(25)
24778 .channels(channels)
24779 .width(3)
24780 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024781 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024782 }
24783 }
24784
24785 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmax) {
24786 for (size_t channels = 1; channels <= 80; channels += 15) {
24787 DWConvMicrokernelTester()
24788 .cr(16)
24789 .kr(25)
24790 .channels(channels)
24791 .width(3)
24792 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024793 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024794 }
24795 }
24796
24797 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, input_offset) {
24798 for (uint32_t channels = 32; channels < 256; channels += 48) {
24799 DWConvMicrokernelTester()
24800 .cr(16)
24801 .kr(25)
24802 .channels(channels)
24803 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080024804 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024805 }
24806 }
24807
24808 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, zero) {
24809 for (uint32_t mz = 0; mz < 25; mz++) {
24810 for (uint32_t channels = 32; channels < 256; channels += 48) {
24811 DWConvMicrokernelTester()
24812 .cr(16)
24813 .kr(25)
24814 .channels(channels)
24815 .input_offset(304)
24816 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080024817 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024818 }
24819 }
24820 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024821#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070024822
24823
Marat Dukhan4c617792021-12-21 15:47:58 -080024824#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan313eef72021-06-30 16:11:31 -070024825 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_eq_24) {
24826 DWConvMicrokernelTester()
24827 .cr(24)
24828 .kr(25)
24829 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080024830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024831 }
24832
24833 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24) {
24834 for (uint32_t channels = 48; channels < 384; channels += 72) {
24835 DWConvMicrokernelTester()
24836 .cr(24)
24837 .kr(25)
24838 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024839 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024840 }
24841 }
24842
24843 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmin) {
24844 for (uint32_t channels = 48; channels < 384; channels += 72) {
24845 DWConvMicrokernelTester()
24846 .cr(24)
24847 .kr(25)
24848 .channels(channels)
24849 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024850 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024851 }
24852 }
24853
24854 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmax) {
24855 for (uint32_t channels = 48; channels < 384; channels += 72) {
24856 DWConvMicrokernelTester()
24857 .cr(24)
24858 .kr(25)
24859 .channels(channels)
24860 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024861 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024862 }
24863 }
24864
24865 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_lt_24) {
24866 for (uint32_t channels = 1; channels < 24; channels++) {
24867 DWConvMicrokernelTester()
24868 .cr(24)
24869 .kr(25)
24870 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024871 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024872 }
24873 }
24874
24875 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24) {
24876 for (uint32_t channels = 25; channels < 48; channels++) {
24877 DWConvMicrokernelTester()
24878 .cr(24)
24879 .kr(25)
24880 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080024881 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024882 }
24883 }
24884
24885 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmin) {
24886 for (uint32_t channels = 25; channels < 48; channels++) {
24887 DWConvMicrokernelTester()
24888 .cr(24)
24889 .kr(25)
24890 .channels(channels)
24891 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024892 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024893 }
24894 }
24895
24896 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmax) {
24897 for (uint32_t channels = 25; channels < 48; channels++) {
24898 DWConvMicrokernelTester()
24899 .cr(24)
24900 .kr(25)
24901 .channels(channels)
24902 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024903 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024904 }
24905 }
24906
24907 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel) {
24908 for (size_t channels = 1; channels <= 120; channels += 23) {
24909 DWConvMicrokernelTester()
24910 .cr(24)
24911 .kr(25)
24912 .channels(channels)
24913 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024914 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024915 }
24916 }
24917
24918 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_step) {
24919 for (size_t channels = 1; channels <= 120; channels += 23) {
24920 for (size_t step = 2; step <= 25; step++) {
24921 DWConvMicrokernelTester()
24922 .cr(24)
24923 .kr(25)
24924 .channels(channels)
24925 .width(3)
24926 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080024927 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024928 }
24929 }
24930 }
24931
24932 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
24933 for (size_t channels = 1; channels <= 120; channels += 23) {
24934 DWConvMicrokernelTester()
24935 .cr(24)
24936 .kr(25)
24937 .channels(24)
24938 .width(5)
24939 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080024940 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024941 }
24942 }
24943
24944 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmin) {
24945 for (size_t channels = 1; channels <= 120; channels += 23) {
24946 DWConvMicrokernelTester()
24947 .cr(24)
24948 .kr(25)
24949 .channels(channels)
24950 .width(3)
24951 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024952 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024953 }
24954 }
24955
24956 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmax) {
24957 for (size_t channels = 1; channels <= 120; channels += 23) {
24958 DWConvMicrokernelTester()
24959 .cr(24)
24960 .kr(25)
24961 .channels(channels)
24962 .width(3)
24963 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024964 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024965 }
24966 }
24967
24968 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, input_offset) {
24969 for (uint32_t channels = 48; channels < 384; channels += 72) {
24970 DWConvMicrokernelTester()
24971 .cr(24)
24972 .kr(25)
24973 .channels(channels)
24974 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080024975 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024976 }
24977 }
24978
24979 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, zero) {
24980 for (uint32_t mz = 0; mz < 25; mz++) {
24981 for (uint32_t channels = 48; channels < 384; channels += 72) {
24982 DWConvMicrokernelTester()
24983 .cr(24)
24984 .kr(25)
24985 .channels(channels)
24986 .input_offset(464)
24987 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080024988 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan313eef72021-06-30 16:11:31 -070024989 }
24990 }
24991 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024992#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan57547062021-06-30 16:53:29 -070024993
24994
Marat Dukhan4c617792021-12-21 15:47:58 -080024995#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070024996 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_eq_8) {
24997 DWConvMicrokernelTester()
24998 .cr(8)
24999 .kr(25)
25000 .channels(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080025001 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025002 }
25003
25004 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8) {
25005 for (uint32_t channels = 16; channels < 128; channels += 24) {
25006 DWConvMicrokernelTester()
25007 .cr(8)
25008 .kr(25)
25009 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025010 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025011 }
25012 }
25013
25014 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
25015 for (uint32_t channels = 16; channels < 128; channels += 24) {
25016 DWConvMicrokernelTester()
25017 .cr(8)
25018 .kr(25)
25019 .channels(channels)
25020 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025021 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025022 }
25023 }
25024
25025 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
25026 for (uint32_t channels = 16; channels < 128; channels += 24) {
25027 DWConvMicrokernelTester()
25028 .cr(8)
25029 .kr(25)
25030 .channels(channels)
25031 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025032 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025033 }
25034 }
25035
25036 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_lt_8) {
25037 for (uint32_t channels = 1; channels < 8; channels++) {
25038 DWConvMicrokernelTester()
25039 .cr(8)
25040 .kr(25)
25041 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025042 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025043 }
25044 }
25045
25046 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8) {
25047 for (uint32_t channels = 9; channels < 16; channels++) {
25048 DWConvMicrokernelTester()
25049 .cr(8)
25050 .kr(25)
25051 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025052 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025053 }
25054 }
25055
25056 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
25057 for (uint32_t channels = 9; channels < 16; channels++) {
25058 DWConvMicrokernelTester()
25059 .cr(8)
25060 .kr(25)
25061 .channels(channels)
25062 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025063 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025064 }
25065 }
25066
25067 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
25068 for (uint32_t channels = 9; channels < 16; channels++) {
25069 DWConvMicrokernelTester()
25070 .cr(8)
25071 .kr(25)
25072 .channels(channels)
25073 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025074 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025075 }
25076 }
25077
25078 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel) {
25079 for (size_t channels = 1; channels <= 40; channels += 7) {
25080 DWConvMicrokernelTester()
25081 .cr(8)
25082 .kr(25)
25083 .channels(channels)
25084 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025085 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025086 }
25087 }
25088
25089 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
25090 for (size_t channels = 1; channels <= 40; channels += 7) {
25091 for (size_t step = 2; step <= 25; step++) {
25092 DWConvMicrokernelTester()
25093 .cr(8)
25094 .kr(25)
25095 .channels(channels)
25096 .width(3)
25097 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080025098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025099 }
25100 }
25101 }
25102
25103 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
25104 for (size_t channels = 1; channels <= 40; channels += 7) {
25105 DWConvMicrokernelTester()
25106 .cr(8)
25107 .kr(25)
25108 .channels(8)
25109 .width(5)
25110 .output_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025111 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025112 }
25113 }
25114
25115 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
25116 for (size_t channels = 1; channels <= 40; channels += 7) {
25117 DWConvMicrokernelTester()
25118 .cr(8)
25119 .kr(25)
25120 .channels(channels)
25121 .width(3)
25122 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025123 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025124 }
25125 }
25126
25127 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
25128 for (size_t channels = 1; channels <= 40; channels += 7) {
25129 DWConvMicrokernelTester()
25130 .cr(8)
25131 .kr(25)
25132 .channels(channels)
25133 .width(3)
25134 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025135 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025136 }
25137 }
25138
25139 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, input_offset) {
25140 for (uint32_t channels = 16; channels < 128; channels += 24) {
25141 DWConvMicrokernelTester()
25142 .cr(8)
25143 .kr(25)
25144 .channels(channels)
25145 .input_offset(176)
Marat Dukhan50323b82022-01-11 00:12:01 -080025146 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025147 }
25148 }
25149
25150 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, zero) {
25151 for (uint32_t mz = 0; mz < 25; mz++) {
25152 for (uint32_t channels = 16; channels < 128; channels += 24) {
25153 DWConvMicrokernelTester()
25154 .cr(8)
25155 .kr(25)
25156 .channels(channels)
25157 .input_offset(176)
25158 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025159 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025160 }
25161 }
25162 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025163#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070025164
25165
Marat Dukhan4c617792021-12-21 15:47:58 -080025166#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070025167 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_eq_16) {
25168 DWConvMicrokernelTester()
25169 .cr(16)
25170 .kr(25)
25171 .channels(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080025172 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025173 }
25174
25175 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16) {
25176 for (uint32_t channels = 32; channels < 256; channels += 48) {
25177 DWConvMicrokernelTester()
25178 .cr(16)
25179 .kr(25)
25180 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025181 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025182 }
25183 }
25184
25185 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
25186 for (uint32_t channels = 32; channels < 256; channels += 48) {
25187 DWConvMicrokernelTester()
25188 .cr(16)
25189 .kr(25)
25190 .channels(channels)
25191 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025192 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025193 }
25194 }
25195
25196 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
25197 for (uint32_t channels = 32; channels < 256; channels += 48) {
25198 DWConvMicrokernelTester()
25199 .cr(16)
25200 .kr(25)
25201 .channels(channels)
25202 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025203 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025204 }
25205 }
25206
25207 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_lt_16) {
25208 for (uint32_t channels = 1; channels < 16; channels++) {
25209 DWConvMicrokernelTester()
25210 .cr(16)
25211 .kr(25)
25212 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025213 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025214 }
25215 }
25216
25217 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16) {
25218 for (uint32_t channels = 17; channels < 32; channels++) {
25219 DWConvMicrokernelTester()
25220 .cr(16)
25221 .kr(25)
25222 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025223 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025224 }
25225 }
25226
25227 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
25228 for (uint32_t channels = 17; channels < 32; channels++) {
25229 DWConvMicrokernelTester()
25230 .cr(16)
25231 .kr(25)
25232 .channels(channels)
25233 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025234 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025235 }
25236 }
25237
25238 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
25239 for (uint32_t channels = 17; channels < 32; channels++) {
25240 DWConvMicrokernelTester()
25241 .cr(16)
25242 .kr(25)
25243 .channels(channels)
25244 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025245 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025246 }
25247 }
25248
25249 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel) {
25250 for (size_t channels = 1; channels <= 80; channels += 15) {
25251 DWConvMicrokernelTester()
25252 .cr(16)
25253 .kr(25)
25254 .channels(channels)
25255 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025256 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025257 }
25258 }
25259
25260 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
25261 for (size_t channels = 1; channels <= 80; channels += 15) {
25262 for (size_t step = 2; step <= 25; step++) {
25263 DWConvMicrokernelTester()
25264 .cr(16)
25265 .kr(25)
25266 .channels(channels)
25267 .width(3)
25268 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080025269 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025270 }
25271 }
25272 }
25273
25274 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
25275 for (size_t channels = 1; channels <= 80; channels += 15) {
25276 DWConvMicrokernelTester()
25277 .cr(16)
25278 .kr(25)
25279 .channels(16)
25280 .width(5)
25281 .output_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080025282 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025283 }
25284 }
25285
25286 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
25287 for (size_t channels = 1; channels <= 80; channels += 15) {
25288 DWConvMicrokernelTester()
25289 .cr(16)
25290 .kr(25)
25291 .channels(channels)
25292 .width(3)
25293 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025295 }
25296 }
25297
25298 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
25299 for (size_t channels = 1; channels <= 80; channels += 15) {
25300 DWConvMicrokernelTester()
25301 .cr(16)
25302 .kr(25)
25303 .channels(channels)
25304 .width(3)
25305 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025307 }
25308 }
25309
25310 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, input_offset) {
25311 for (uint32_t channels = 32; channels < 256; channels += 48) {
25312 DWConvMicrokernelTester()
25313 .cr(16)
25314 .kr(25)
25315 .channels(channels)
25316 .input_offset(304)
Marat Dukhan50323b82022-01-11 00:12:01 -080025317 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025318 }
25319 }
25320
25321 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, zero) {
25322 for (uint32_t mz = 0; mz < 25; mz++) {
25323 for (uint32_t channels = 32; channels < 256; channels += 48) {
25324 DWConvMicrokernelTester()
25325 .cr(16)
25326 .kr(25)
25327 .channels(channels)
25328 .input_offset(304)
25329 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025331 }
25332 }
25333 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025334#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070025335
25336
Marat Dukhan4c617792021-12-21 15:47:58 -080025337#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070025338 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_eq_24) {
25339 DWConvMicrokernelTester()
25340 .cr(24)
25341 .kr(25)
25342 .channels(24)
Marat Dukhan50323b82022-01-11 00:12:01 -080025343 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025344 }
25345
25346 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24) {
25347 for (uint32_t channels = 48; channels < 384; channels += 72) {
25348 DWConvMicrokernelTester()
25349 .cr(24)
25350 .kr(25)
25351 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025352 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025353 }
25354 }
25355
25356 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
25357 for (uint32_t channels = 48; channels < 384; channels += 72) {
25358 DWConvMicrokernelTester()
25359 .cr(24)
25360 .kr(25)
25361 .channels(channels)
25362 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025363 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025364 }
25365 }
25366
25367 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
25368 for (uint32_t channels = 48; channels < 384; channels += 72) {
25369 DWConvMicrokernelTester()
25370 .cr(24)
25371 .kr(25)
25372 .channels(channels)
25373 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025374 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025375 }
25376 }
25377
25378 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_lt_24) {
25379 for (uint32_t channels = 1; channels < 24; channels++) {
25380 DWConvMicrokernelTester()
25381 .cr(24)
25382 .kr(25)
25383 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025384 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025385 }
25386 }
25387
25388 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24) {
25389 for (uint32_t channels = 25; channels < 48; channels++) {
25390 DWConvMicrokernelTester()
25391 .cr(24)
25392 .kr(25)
25393 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025394 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025395 }
25396 }
25397
25398 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
25399 for (uint32_t channels = 25; channels < 48; channels++) {
25400 DWConvMicrokernelTester()
25401 .cr(24)
25402 .kr(25)
25403 .channels(channels)
25404 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025405 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025406 }
25407 }
25408
25409 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
25410 for (uint32_t channels = 25; channels < 48; channels++) {
25411 DWConvMicrokernelTester()
25412 .cr(24)
25413 .kr(25)
25414 .channels(channels)
25415 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025416 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025417 }
25418 }
25419
25420 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel) {
25421 for (size_t channels = 1; channels <= 120; channels += 23) {
25422 DWConvMicrokernelTester()
25423 .cr(24)
25424 .kr(25)
25425 .channels(channels)
25426 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025427 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025428 }
25429 }
25430
25431 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
25432 for (size_t channels = 1; channels <= 120; channels += 23) {
25433 for (size_t step = 2; step <= 25; step++) {
25434 DWConvMicrokernelTester()
25435 .cr(24)
25436 .kr(25)
25437 .channels(channels)
25438 .width(3)
25439 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080025440 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025441 }
25442 }
25443 }
25444
25445 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
25446 for (size_t channels = 1; channels <= 120; channels += 23) {
25447 DWConvMicrokernelTester()
25448 .cr(24)
25449 .kr(25)
25450 .channels(24)
25451 .width(5)
25452 .output_stride(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080025453 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025454 }
25455 }
25456
25457 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
25458 for (size_t channels = 1; channels <= 120; channels += 23) {
25459 DWConvMicrokernelTester()
25460 .cr(24)
25461 .kr(25)
25462 .channels(channels)
25463 .width(3)
25464 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025465 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025466 }
25467 }
25468
25469 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
25470 for (size_t channels = 1; channels <= 120; channels += 23) {
25471 DWConvMicrokernelTester()
25472 .cr(24)
25473 .kr(25)
25474 .channels(channels)
25475 .width(3)
25476 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025477 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025478 }
25479 }
25480
25481 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, input_offset) {
25482 for (uint32_t channels = 48; channels < 384; channels += 72) {
25483 DWConvMicrokernelTester()
25484 .cr(24)
25485 .kr(25)
25486 .channels(channels)
25487 .input_offset(464)
Marat Dukhan50323b82022-01-11 00:12:01 -080025488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025489 }
25490 }
25491
25492 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, zero) {
25493 for (uint32_t mz = 0; mz < 25; mz++) {
25494 for (uint32_t channels = 48; channels < 384; channels += 72) {
25495 DWConvMicrokernelTester()
25496 .cr(24)
25497 .kr(25)
25498 .channels(channels)
25499 .input_offset(464)
25500 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025501 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_minmax_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan9cedb592021-08-17 17:25:24 -070025502 }
25503 }
25504 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025505#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9cedb592021-08-17 17:25:24 -070025506
25507
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025508#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25509 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_eq_1) {
25510 DWConvMicrokernelTester()
25511 .cr(1)
25512 .kr(25)
25513 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025514 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025515 }
25516
25517 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1) {
25518 for (uint32_t channels = 2; channels < 10; channels++) {
25519 DWConvMicrokernelTester()
25520 .cr(1)
25521 .kr(25)
25522 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025523 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025524 }
25525 }
25526
25527 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmin) {
25528 for (uint32_t channels = 2; channels < 10; channels++) {
25529 DWConvMicrokernelTester()
25530 .cr(1)
25531 .kr(25)
25532 .channels(channels)
25533 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025534 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025535 }
25536 }
25537
25538 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmax) {
25539 for (uint32_t channels = 2; channels < 10; channels++) {
25540 DWConvMicrokernelTester()
25541 .cr(1)
25542 .kr(25)
25543 .channels(channels)
25544 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025545 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025546 }
25547 }
25548
25549 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel) {
25550 for (size_t channels = 1; channels <= 5; channels += 1) {
25551 DWConvMicrokernelTester()
25552 .cr(1)
25553 .kr(25)
25554 .channels(channels)
25555 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025556 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025557 }
25558 }
25559
25560 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_step) {
25561 for (size_t channels = 1; channels <= 5; channels += 1) {
25562 for (size_t step = 2; step <= 25; step++) {
25563 DWConvMicrokernelTester()
25564 .cr(1)
25565 .kr(25)
25566 .channels(channels)
25567 .width(3)
25568 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080025569 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025570 }
25571 }
25572 }
25573
25574 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_output_stride) {
25575 for (size_t channels = 1; channels <= 5; channels += 1) {
25576 DWConvMicrokernelTester()
25577 .cr(1)
25578 .kr(25)
25579 .channels(1)
25580 .width(5)
25581 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025582 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025583 }
25584 }
25585
25586 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmin) {
25587 for (size_t channels = 1; channels <= 5; channels += 1) {
25588 DWConvMicrokernelTester()
25589 .cr(1)
25590 .kr(25)
25591 .channels(channels)
25592 .width(3)
25593 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025594 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025595 }
25596 }
25597
25598 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmax) {
25599 for (size_t channels = 1; channels <= 5; channels += 1) {
25600 DWConvMicrokernelTester()
25601 .cr(1)
25602 .kr(25)
25603 .channels(channels)
25604 .width(3)
25605 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025606 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025607 }
25608 }
25609
25610 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, input_offset) {
25611 for (uint32_t channels = 2; channels < 16; channels += 3) {
25612 DWConvMicrokernelTester()
25613 .cr(1)
25614 .kr(25)
25615 .channels(channels)
25616 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080025617 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025618 }
25619 }
25620
25621 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, zero) {
25622 for (uint32_t mz = 0; mz < 25; mz++) {
25623 for (uint32_t channels = 2; channels < 16; channels += 3) {
25624 DWConvMicrokernelTester()
25625 .cr(1)
25626 .kr(25)
25627 .channels(channels)
25628 .input_offset(48)
25629 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025630 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025631 }
25632 }
25633 }
25634#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25635
25636
25637#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25638 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_eq_2) {
25639 DWConvMicrokernelTester()
25640 .cr(2)
25641 .kr(25)
25642 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080025643 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025644 }
25645
25646 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2) {
25647 for (uint32_t channels = 4; channels < 32; channels += 6) {
25648 DWConvMicrokernelTester()
25649 .cr(2)
25650 .kr(25)
25651 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025652 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025653 }
25654 }
25655
25656 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmin) {
25657 for (uint32_t channels = 4; channels < 32; channels += 6) {
25658 DWConvMicrokernelTester()
25659 .cr(2)
25660 .kr(25)
25661 .channels(channels)
25662 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025663 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025664 }
25665 }
25666
25667 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmax) {
25668 for (uint32_t channels = 4; channels < 32; channels += 6) {
25669 DWConvMicrokernelTester()
25670 .cr(2)
25671 .kr(25)
25672 .channels(channels)
25673 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025674 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025675 }
25676 }
25677
25678 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_lt_2) {
25679 for (uint32_t channels = 1; channels < 2; channels++) {
25680 DWConvMicrokernelTester()
25681 .cr(2)
25682 .kr(25)
25683 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025684 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025685 }
25686 }
25687
25688 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2) {
25689 for (uint32_t channels = 3; channels < 4; channels++) {
25690 DWConvMicrokernelTester()
25691 .cr(2)
25692 .kr(25)
25693 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025694 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025695 }
25696 }
25697
25698 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmin) {
25699 for (uint32_t channels = 3; channels < 4; channels++) {
25700 DWConvMicrokernelTester()
25701 .cr(2)
25702 .kr(25)
25703 .channels(channels)
25704 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025705 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025706 }
25707 }
25708
25709 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmax) {
25710 for (uint32_t channels = 3; channels < 4; channels++) {
25711 DWConvMicrokernelTester()
25712 .cr(2)
25713 .kr(25)
25714 .channels(channels)
25715 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025716 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025717 }
25718 }
25719
25720 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel) {
25721 for (size_t channels = 1; channels <= 10; channels += 1) {
25722 DWConvMicrokernelTester()
25723 .cr(2)
25724 .kr(25)
25725 .channels(channels)
25726 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025727 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025728 }
25729 }
25730
25731 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_step) {
25732 for (size_t channels = 1; channels <= 10; channels += 1) {
25733 for (size_t step = 2; step <= 25; step++) {
25734 DWConvMicrokernelTester()
25735 .cr(2)
25736 .kr(25)
25737 .channels(channels)
25738 .width(3)
25739 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080025740 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025741 }
25742 }
25743 }
25744
25745 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_output_stride) {
25746 for (size_t channels = 1; channels <= 10; channels += 1) {
25747 DWConvMicrokernelTester()
25748 .cr(2)
25749 .kr(25)
25750 .channels(2)
25751 .width(5)
25752 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080025753 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025754 }
25755 }
25756
25757 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmin) {
25758 for (size_t channels = 1; channels <= 10; channels += 1) {
25759 DWConvMicrokernelTester()
25760 .cr(2)
25761 .kr(25)
25762 .channels(channels)
25763 .width(3)
25764 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025765 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025766 }
25767 }
25768
25769 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmax) {
25770 for (size_t channels = 1; channels <= 10; channels += 1) {
25771 DWConvMicrokernelTester()
25772 .cr(2)
25773 .kr(25)
25774 .channels(channels)
25775 .width(3)
25776 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025777 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025778 }
25779 }
25780
25781 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, input_offset) {
25782 for (uint32_t channels = 4; channels < 32; channels += 6) {
25783 DWConvMicrokernelTester()
25784 .cr(2)
25785 .kr(25)
25786 .channels(channels)
25787 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080025788 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025789 }
25790 }
25791
25792 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, zero) {
25793 for (uint32_t mz = 0; mz < 25; mz++) {
25794 for (uint32_t channels = 4; channels < 32; channels += 6) {
25795 DWConvMicrokernelTester()
25796 .cr(2)
25797 .kr(25)
25798 .channels(channels)
25799 .input_offset(80)
25800 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025801 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025802 }
25803 }
25804 }
25805#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25806
25807
25808#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25809 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_eq_4) {
25810 DWConvMicrokernelTester()
25811 .cr(4)
25812 .kr(25)
25813 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080025814 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025815 }
25816
25817 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4) {
25818 for (uint32_t channels = 8; channels < 64; channels += 12) {
25819 DWConvMicrokernelTester()
25820 .cr(4)
25821 .kr(25)
25822 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025823 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025824 }
25825 }
25826
25827 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmin) {
25828 for (uint32_t channels = 8; channels < 64; channels += 12) {
25829 DWConvMicrokernelTester()
25830 .cr(4)
25831 .kr(25)
25832 .channels(channels)
25833 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025834 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025835 }
25836 }
25837
25838 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmax) {
25839 for (uint32_t channels = 8; channels < 64; channels += 12) {
25840 DWConvMicrokernelTester()
25841 .cr(4)
25842 .kr(25)
25843 .channels(channels)
25844 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025845 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025846 }
25847 }
25848
25849 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_lt_4) {
25850 for (uint32_t channels = 1; channels < 4; channels++) {
25851 DWConvMicrokernelTester()
25852 .cr(4)
25853 .kr(25)
25854 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025855 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025856 }
25857 }
25858
25859 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4) {
25860 for (uint32_t channels = 5; channels < 8; channels++) {
25861 DWConvMicrokernelTester()
25862 .cr(4)
25863 .kr(25)
25864 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025865 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025866 }
25867 }
25868
25869 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmin) {
25870 for (uint32_t channels = 5; channels < 8; channels++) {
25871 DWConvMicrokernelTester()
25872 .cr(4)
25873 .kr(25)
25874 .channels(channels)
25875 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025877 }
25878 }
25879
25880 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmax) {
25881 for (uint32_t channels = 5; channels < 8; channels++) {
25882 DWConvMicrokernelTester()
25883 .cr(4)
25884 .kr(25)
25885 .channels(channels)
25886 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025887 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025888 }
25889 }
25890
25891 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel) {
25892 for (size_t channels = 1; channels <= 20; channels += 3) {
25893 DWConvMicrokernelTester()
25894 .cr(4)
25895 .kr(25)
25896 .channels(channels)
25897 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025899 }
25900 }
25901
25902 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_step) {
25903 for (size_t channels = 1; channels <= 20; channels += 3) {
25904 for (size_t step = 2; step <= 25; step++) {
25905 DWConvMicrokernelTester()
25906 .cr(4)
25907 .kr(25)
25908 .channels(channels)
25909 .width(3)
25910 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080025911 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025912 }
25913 }
25914 }
25915
25916 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_output_stride) {
25917 for (size_t channels = 1; channels <= 20; channels += 3) {
25918 DWConvMicrokernelTester()
25919 .cr(4)
25920 .kr(25)
25921 .channels(4)
25922 .width(5)
25923 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080025924 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025925 }
25926 }
25927
25928 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmin) {
25929 for (size_t channels = 1; channels <= 20; channels += 3) {
25930 DWConvMicrokernelTester()
25931 .cr(4)
25932 .kr(25)
25933 .channels(channels)
25934 .width(3)
25935 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025936 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025937 }
25938 }
25939
25940 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmax) {
25941 for (size_t channels = 1; channels <= 20; channels += 3) {
25942 DWConvMicrokernelTester()
25943 .cr(4)
25944 .kr(25)
25945 .channels(channels)
25946 .width(3)
25947 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025948 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025949 }
25950 }
25951
25952 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, input_offset) {
25953 for (uint32_t channels = 8; channels < 64; channels += 12) {
25954 DWConvMicrokernelTester()
25955 .cr(4)
25956 .kr(25)
25957 .channels(channels)
25958 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080025959 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025960 }
25961 }
25962
25963 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, zero) {
25964 for (uint32_t mz = 0; mz < 25; mz++) {
25965 for (uint32_t channels = 8; channels < 64; channels += 12) {
25966 DWConvMicrokernelTester()
25967 .cr(4)
25968 .kr(25)
25969 .channels(channels)
25970 .input_offset(112)
25971 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025972 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080025973 }
25974 }
25975 }
25976#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25977
25978
Marat Dukhan2ac722e2022-01-04 01:54:20 -080025979TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
Marat Dukhan57547062021-06-30 16:53:29 -070025980 DWConvMicrokernelTester()
25981 .cr(1)
25982 .kr(25)
25983 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025984 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070025985}
25986
Marat Dukhan2ac722e2022-01-04 01:54:20 -080025987TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1) {
Marat Dukhan57547062021-06-30 16:53:29 -070025988 for (uint32_t channels = 2; channels < 10; channels++) {
25989 DWConvMicrokernelTester()
25990 .cr(1)
25991 .kr(25)
25992 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080025993 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070025994 }
25995}
25996
Marat Dukhan2ac722e2022-01-04 01:54:20 -080025997TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070025998 for (uint32_t channels = 2; channels < 10; channels++) {
25999 DWConvMicrokernelTester()
26000 .cr(1)
26001 .kr(25)
26002 .channels(channels)
26003 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026004 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026005 }
26006}
26007
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026008TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070026009 for (uint32_t channels = 2; channels < 10; channels++) {
26010 DWConvMicrokernelTester()
26011 .cr(1)
26012 .kr(25)
26013 .channels(channels)
26014 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026015 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026016 }
26017}
26018
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026019TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel) {
Marat Dukhan57547062021-06-30 16:53:29 -070026020 for (size_t channels = 1; channels <= 5; channels += 1) {
26021 DWConvMicrokernelTester()
26022 .cr(1)
26023 .kr(25)
26024 .channels(channels)
26025 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026026 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026027 }
26028}
26029
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026030TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan57547062021-06-30 16:53:29 -070026031 for (size_t channels = 1; channels <= 5; channels += 1) {
26032 for (size_t step = 2; step <= 25; step++) {
26033 DWConvMicrokernelTester()
26034 .cr(1)
26035 .kr(25)
26036 .channels(channels)
26037 .width(3)
26038 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080026039 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026040 }
26041 }
26042}
26043
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026044TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan57547062021-06-30 16:53:29 -070026045 for (size_t channels = 1; channels <= 5; channels += 1) {
26046 DWConvMicrokernelTester()
26047 .cr(1)
26048 .kr(25)
26049 .channels(1)
26050 .width(5)
26051 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026052 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026053 }
26054}
26055
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026056TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070026057 for (size_t channels = 1; channels <= 5; channels += 1) {
26058 DWConvMicrokernelTester()
26059 .cr(1)
26060 .kr(25)
26061 .channels(channels)
26062 .width(3)
26063 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026064 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026065 }
26066}
26067
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026068TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070026069 for (size_t channels = 1; channels <= 5; channels += 1) {
26070 DWConvMicrokernelTester()
26071 .cr(1)
26072 .kr(25)
26073 .channels(channels)
26074 .width(3)
26075 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026076 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026077 }
26078}
26079
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026080TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, input_offset) {
Marat Dukhan57547062021-06-30 16:53:29 -070026081 for (uint32_t channels = 2; channels < 16; channels += 3) {
26082 DWConvMicrokernelTester()
26083 .cr(1)
26084 .kr(25)
26085 .channels(channels)
26086 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080026087 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026088 }
26089}
26090
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026091TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, zero) {
Marat Dukhan57547062021-06-30 16:53:29 -070026092 for (uint32_t mz = 0; mz < 25; mz++) {
26093 for (uint32_t channels = 2; channels < 16; channels += 3) {
26094 DWConvMicrokernelTester()
26095 .cr(1)
26096 .kr(25)
26097 .channels(channels)
26098 .input_offset(48)
26099 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026100 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026101 }
26102 }
26103}
26104
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026105TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_eq_2) {
Marat Dukhan57547062021-06-30 16:53:29 -070026106 DWConvMicrokernelTester()
26107 .cr(2)
26108 .kr(25)
26109 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080026110 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026111}
26112
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026113TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2) {
Marat Dukhan57547062021-06-30 16:53:29 -070026114 for (uint32_t channels = 4; channels < 32; channels += 6) {
26115 DWConvMicrokernelTester()
26116 .cr(2)
26117 .kr(25)
26118 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026119 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026120 }
26121}
26122
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026123TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070026124 for (uint32_t channels = 4; channels < 32; channels += 6) {
26125 DWConvMicrokernelTester()
26126 .cr(2)
26127 .kr(25)
26128 .channels(channels)
26129 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026131 }
26132}
26133
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026134TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070026135 for (uint32_t channels = 4; channels < 32; channels += 6) {
26136 DWConvMicrokernelTester()
26137 .cr(2)
26138 .kr(25)
26139 .channels(channels)
26140 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026141 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026142 }
26143}
26144
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026145TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_lt_2) {
Marat Dukhan57547062021-06-30 16:53:29 -070026146 for (uint32_t channels = 1; channels < 2; channels++) {
26147 DWConvMicrokernelTester()
26148 .cr(2)
26149 .kr(25)
26150 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026151 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026152 }
26153}
26154
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026155TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2) {
Marat Dukhan57547062021-06-30 16:53:29 -070026156 for (uint32_t channels = 3; channels < 4; channels++) {
26157 DWConvMicrokernelTester()
26158 .cr(2)
26159 .kr(25)
26160 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026161 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026162 }
26163}
26164
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026165TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070026166 for (uint32_t channels = 3; channels < 4; channels++) {
26167 DWConvMicrokernelTester()
26168 .cr(2)
26169 .kr(25)
26170 .channels(channels)
26171 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026172 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026173 }
26174}
26175
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026176TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070026177 for (uint32_t channels = 3; channels < 4; channels++) {
26178 DWConvMicrokernelTester()
26179 .cr(2)
26180 .kr(25)
26181 .channels(channels)
26182 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026183 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026184 }
26185}
26186
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026187TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel) {
Marat Dukhan57547062021-06-30 16:53:29 -070026188 for (size_t channels = 1; channels <= 10; channels += 1) {
26189 DWConvMicrokernelTester()
26190 .cr(2)
26191 .kr(25)
26192 .channels(channels)
26193 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026194 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026195 }
26196}
26197
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026198TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan57547062021-06-30 16:53:29 -070026199 for (size_t channels = 1; channels <= 10; channels += 1) {
26200 for (size_t step = 2; step <= 25; step++) {
26201 DWConvMicrokernelTester()
26202 .cr(2)
26203 .kr(25)
26204 .channels(channels)
26205 .width(3)
26206 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080026207 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026208 }
26209 }
26210}
26211
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026212TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan57547062021-06-30 16:53:29 -070026213 for (size_t channels = 1; channels <= 10; channels += 1) {
26214 DWConvMicrokernelTester()
26215 .cr(2)
26216 .kr(25)
26217 .channels(2)
26218 .width(5)
26219 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080026220 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026221 }
26222}
26223
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026224TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070026225 for (size_t channels = 1; channels <= 10; channels += 1) {
26226 DWConvMicrokernelTester()
26227 .cr(2)
26228 .kr(25)
26229 .channels(channels)
26230 .width(3)
26231 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026232 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026233 }
26234}
26235
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026236TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070026237 for (size_t channels = 1; channels <= 10; channels += 1) {
26238 DWConvMicrokernelTester()
26239 .cr(2)
26240 .kr(25)
26241 .channels(channels)
26242 .width(3)
26243 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026244 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026245 }
26246}
26247
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026248TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, input_offset) {
Marat Dukhan57547062021-06-30 16:53:29 -070026249 for (uint32_t channels = 4; channels < 32; channels += 6) {
26250 DWConvMicrokernelTester()
26251 .cr(2)
26252 .kr(25)
26253 .channels(channels)
26254 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080026255 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026256 }
26257}
26258
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026259TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, zero) {
Marat Dukhan57547062021-06-30 16:53:29 -070026260 for (uint32_t mz = 0; mz < 25; mz++) {
26261 for (uint32_t channels = 4; channels < 32; channels += 6) {
26262 DWConvMicrokernelTester()
26263 .cr(2)
26264 .kr(25)
26265 .channels(channels)
26266 .input_offset(80)
26267 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026268 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026269 }
26270 }
26271}
26272
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026273TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_eq_4) {
Marat Dukhan57547062021-06-30 16:53:29 -070026274 DWConvMicrokernelTester()
26275 .cr(4)
26276 .kr(25)
26277 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080026278 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026279}
26280
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026281TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4) {
Marat Dukhan57547062021-06-30 16:53:29 -070026282 for (uint32_t channels = 8; channels < 64; channels += 12) {
26283 DWConvMicrokernelTester()
26284 .cr(4)
26285 .kr(25)
26286 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026287 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026288 }
26289}
26290
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026291TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070026292 for (uint32_t channels = 8; channels < 64; channels += 12) {
26293 DWConvMicrokernelTester()
26294 .cr(4)
26295 .kr(25)
26296 .channels(channels)
26297 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026298 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026299 }
26300}
26301
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026302TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070026303 for (uint32_t channels = 8; channels < 64; channels += 12) {
26304 DWConvMicrokernelTester()
26305 .cr(4)
26306 .kr(25)
26307 .channels(channels)
26308 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026309 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026310 }
26311}
26312
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026313TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_lt_4) {
Marat Dukhan57547062021-06-30 16:53:29 -070026314 for (uint32_t channels = 1; channels < 4; channels++) {
26315 DWConvMicrokernelTester()
26316 .cr(4)
26317 .kr(25)
26318 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026319 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026320 }
26321}
26322
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026323TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4) {
Marat Dukhan57547062021-06-30 16:53:29 -070026324 for (uint32_t channels = 5; channels < 8; channels++) {
26325 DWConvMicrokernelTester()
26326 .cr(4)
26327 .kr(25)
26328 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026329 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026330 }
26331}
26332
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026333TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070026334 for (uint32_t channels = 5; channels < 8; channels++) {
26335 DWConvMicrokernelTester()
26336 .cr(4)
26337 .kr(25)
26338 .channels(channels)
26339 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026341 }
26342}
26343
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026344TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070026345 for (uint32_t channels = 5; channels < 8; channels++) {
26346 DWConvMicrokernelTester()
26347 .cr(4)
26348 .kr(25)
26349 .channels(channels)
26350 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026351 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026352 }
26353}
26354
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026355TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel) {
Marat Dukhan57547062021-06-30 16:53:29 -070026356 for (size_t channels = 1; channels <= 20; channels += 3) {
26357 DWConvMicrokernelTester()
26358 .cr(4)
26359 .kr(25)
26360 .channels(channels)
26361 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026362 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026363 }
26364}
26365
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026366TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_step) {
Marat Dukhan57547062021-06-30 16:53:29 -070026367 for (size_t channels = 1; channels <= 20; channels += 3) {
26368 for (size_t step = 2; step <= 25; step++) {
26369 DWConvMicrokernelTester()
26370 .cr(4)
26371 .kr(25)
26372 .channels(channels)
26373 .width(3)
26374 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080026375 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026376 }
26377 }
26378}
26379
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026380TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
Marat Dukhan57547062021-06-30 16:53:29 -070026381 for (size_t channels = 1; channels <= 20; channels += 3) {
26382 DWConvMicrokernelTester()
26383 .cr(4)
26384 .kr(25)
26385 .channels(4)
26386 .width(5)
26387 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080026388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026389 }
26390}
26391
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026392TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmin) {
Marat Dukhan57547062021-06-30 16:53:29 -070026393 for (size_t channels = 1; channels <= 20; channels += 3) {
26394 DWConvMicrokernelTester()
26395 .cr(4)
26396 .kr(25)
26397 .channels(channels)
26398 .width(3)
26399 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026401 }
26402}
26403
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026404TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmax) {
Marat Dukhan57547062021-06-30 16:53:29 -070026405 for (size_t channels = 1; channels <= 20; channels += 3) {
26406 DWConvMicrokernelTester()
26407 .cr(4)
26408 .kr(25)
26409 .channels(channels)
26410 .width(3)
26411 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026412 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026413 }
26414}
26415
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026416TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, input_offset) {
Marat Dukhan57547062021-06-30 16:53:29 -070026417 for (uint32_t channels = 8; channels < 64; channels += 12) {
26418 DWConvMicrokernelTester()
26419 .cr(4)
26420 .kr(25)
26421 .channels(channels)
26422 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080026423 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026424 }
26425}
26426
Marat Dukhan2ac722e2022-01-04 01:54:20 -080026427TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, zero) {
Marat Dukhan57547062021-06-30 16:53:29 -070026428 for (uint32_t mz = 0; mz < 25; mz++) {
26429 for (uint32_t channels = 8; channels < 64; channels += 12) {
26430 DWConvMicrokernelTester()
26431 .cr(4)
26432 .kr(25)
26433 .channels(channels)
26434 .input_offset(112)
26435 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026436 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan57547062021-06-30 16:53:29 -070026437 }
26438 }
Marat Dukhan272d4d92022-01-04 15:07:14 -080026439}
26440
26441TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
26442 DWConvMicrokernelTester()
26443 .cr(1)
26444 .kr(25)
26445 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026446 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026447}
26448
26449TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
26450 for (uint32_t channels = 2; channels < 10; channels++) {
26451 DWConvMicrokernelTester()
26452 .cr(1)
26453 .kr(25)
26454 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026455 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026456 }
26457}
26458
26459TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
26460 for (uint32_t channels = 2; channels < 10; channels++) {
26461 DWConvMicrokernelTester()
26462 .cr(1)
26463 .kr(25)
26464 .channels(channels)
26465 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026466 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026467 }
26468}
26469
26470TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
26471 for (uint32_t channels = 2; channels < 10; channels++) {
26472 DWConvMicrokernelTester()
26473 .cr(1)
26474 .kr(25)
26475 .channels(channels)
26476 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026477 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026478 }
26479}
26480
26481TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
26482 for (size_t channels = 1; channels <= 5; channels += 1) {
26483 DWConvMicrokernelTester()
26484 .cr(1)
26485 .kr(25)
26486 .channels(channels)
26487 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026489 }
26490}
26491
26492TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
26493 for (size_t channels = 1; channels <= 5; channels += 1) {
26494 for (size_t step = 2; step <= 25; step++) {
26495 DWConvMicrokernelTester()
26496 .cr(1)
26497 .kr(25)
26498 .channels(channels)
26499 .width(3)
26500 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080026501 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026502 }
26503 }
26504}
26505
26506TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
26507 for (size_t channels = 1; channels <= 5; channels += 1) {
26508 DWConvMicrokernelTester()
26509 .cr(1)
26510 .kr(25)
26511 .channels(1)
26512 .width(5)
26513 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026514 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026515 }
26516}
26517
26518TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
26519 for (size_t channels = 1; channels <= 5; channels += 1) {
26520 DWConvMicrokernelTester()
26521 .cr(1)
26522 .kr(25)
26523 .channels(channels)
26524 .width(3)
26525 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026527 }
26528}
26529
26530TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
26531 for (size_t channels = 1; channels <= 5; channels += 1) {
26532 DWConvMicrokernelTester()
26533 .cr(1)
26534 .kr(25)
26535 .channels(channels)
26536 .width(3)
26537 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026539 }
26540}
26541
26542TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
26543 for (uint32_t channels = 2; channels < 16; channels += 3) {
26544 DWConvMicrokernelTester()
26545 .cr(1)
26546 .kr(25)
26547 .channels(channels)
26548 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080026549 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026550 }
26551}
26552
26553TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
26554 for (uint32_t mz = 0; mz < 25; mz++) {
26555 for (uint32_t channels = 2; channels < 16; channels += 3) {
26556 DWConvMicrokernelTester()
26557 .cr(1)
26558 .kr(25)
26559 .channels(channels)
26560 .input_offset(48)
26561 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026562 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026563 }
26564 }
26565}
26566
26567TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
26568 DWConvMicrokernelTester()
26569 .cr(2)
26570 .kr(25)
26571 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080026572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026573}
26574
26575TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
26576 for (uint32_t channels = 4; channels < 32; channels += 6) {
26577 DWConvMicrokernelTester()
26578 .cr(2)
26579 .kr(25)
26580 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026581 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026582 }
26583}
26584
26585TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
26586 for (uint32_t channels = 4; channels < 32; channels += 6) {
26587 DWConvMicrokernelTester()
26588 .cr(2)
26589 .kr(25)
26590 .channels(channels)
26591 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026592 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026593 }
26594}
26595
26596TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
26597 for (uint32_t channels = 4; channels < 32; channels += 6) {
26598 DWConvMicrokernelTester()
26599 .cr(2)
26600 .kr(25)
26601 .channels(channels)
26602 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026603 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026604 }
26605}
26606
26607TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
26608 for (uint32_t channels = 1; channels < 2; channels++) {
26609 DWConvMicrokernelTester()
26610 .cr(2)
26611 .kr(25)
26612 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026613 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026614 }
26615}
26616
26617TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
26618 for (uint32_t channels = 3; channels < 4; channels++) {
26619 DWConvMicrokernelTester()
26620 .cr(2)
26621 .kr(25)
26622 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026623 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026624 }
26625}
26626
26627TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
26628 for (uint32_t channels = 3; channels < 4; channels++) {
26629 DWConvMicrokernelTester()
26630 .cr(2)
26631 .kr(25)
26632 .channels(channels)
26633 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026634 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026635 }
26636}
26637
26638TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
26639 for (uint32_t channels = 3; channels < 4; channels++) {
26640 DWConvMicrokernelTester()
26641 .cr(2)
26642 .kr(25)
26643 .channels(channels)
26644 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026645 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026646 }
26647}
26648
26649TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
26650 for (size_t channels = 1; channels <= 10; channels += 1) {
26651 DWConvMicrokernelTester()
26652 .cr(2)
26653 .kr(25)
26654 .channels(channels)
26655 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026657 }
26658}
26659
26660TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
26661 for (size_t channels = 1; channels <= 10; channels += 1) {
26662 for (size_t step = 2; step <= 25; step++) {
26663 DWConvMicrokernelTester()
26664 .cr(2)
26665 .kr(25)
26666 .channels(channels)
26667 .width(3)
26668 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080026669 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026670 }
26671 }
26672}
26673
26674TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
26675 for (size_t channels = 1; channels <= 10; channels += 1) {
26676 DWConvMicrokernelTester()
26677 .cr(2)
26678 .kr(25)
26679 .channels(2)
26680 .width(5)
26681 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080026682 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026683 }
26684}
26685
26686TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
26687 for (size_t channels = 1; channels <= 10; channels += 1) {
26688 DWConvMicrokernelTester()
26689 .cr(2)
26690 .kr(25)
26691 .channels(channels)
26692 .width(3)
26693 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026694 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026695 }
26696}
26697
26698TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
26699 for (size_t channels = 1; channels <= 10; channels += 1) {
26700 DWConvMicrokernelTester()
26701 .cr(2)
26702 .kr(25)
26703 .channels(channels)
26704 .width(3)
26705 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026706 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026707 }
26708}
26709
26710TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
26711 for (uint32_t channels = 4; channels < 32; channels += 6) {
26712 DWConvMicrokernelTester()
26713 .cr(2)
26714 .kr(25)
26715 .channels(channels)
26716 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080026717 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026718 }
26719}
26720
26721TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
26722 for (uint32_t mz = 0; mz < 25; mz++) {
26723 for (uint32_t channels = 4; channels < 32; channels += 6) {
26724 DWConvMicrokernelTester()
26725 .cr(2)
26726 .kr(25)
26727 .channels(channels)
26728 .input_offset(80)
26729 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026730 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026731 }
26732 }
26733}
26734
26735TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
26736 DWConvMicrokernelTester()
26737 .cr(4)
26738 .kr(25)
26739 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080026740 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026741}
26742
26743TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
26744 for (uint32_t channels = 8; channels < 64; channels += 12) {
26745 DWConvMicrokernelTester()
26746 .cr(4)
26747 .kr(25)
26748 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026749 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026750 }
26751}
26752
26753TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
26754 for (uint32_t channels = 8; channels < 64; channels += 12) {
26755 DWConvMicrokernelTester()
26756 .cr(4)
26757 .kr(25)
26758 .channels(channels)
26759 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026761 }
26762}
26763
26764TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
26765 for (uint32_t channels = 8; channels < 64; channels += 12) {
26766 DWConvMicrokernelTester()
26767 .cr(4)
26768 .kr(25)
26769 .channels(channels)
26770 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026771 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026772 }
26773}
26774
26775TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
26776 for (uint32_t channels = 1; channels < 4; channels++) {
26777 DWConvMicrokernelTester()
26778 .cr(4)
26779 .kr(25)
26780 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026781 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026782 }
26783}
26784
26785TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
26786 for (uint32_t channels = 5; channels < 8; channels++) {
26787 DWConvMicrokernelTester()
26788 .cr(4)
26789 .kr(25)
26790 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026791 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026792 }
26793}
26794
26795TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
26796 for (uint32_t channels = 5; channels < 8; channels++) {
26797 DWConvMicrokernelTester()
26798 .cr(4)
26799 .kr(25)
26800 .channels(channels)
26801 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026802 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026803 }
26804}
26805
26806TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
26807 for (uint32_t channels = 5; channels < 8; channels++) {
26808 DWConvMicrokernelTester()
26809 .cr(4)
26810 .kr(25)
26811 .channels(channels)
26812 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026813 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026814 }
26815}
26816
26817TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
26818 for (size_t channels = 1; channels <= 20; channels += 3) {
26819 DWConvMicrokernelTester()
26820 .cr(4)
26821 .kr(25)
26822 .channels(channels)
26823 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026824 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026825 }
26826}
26827
26828TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
26829 for (size_t channels = 1; channels <= 20; channels += 3) {
26830 for (size_t step = 2; step <= 25; step++) {
26831 DWConvMicrokernelTester()
26832 .cr(4)
26833 .kr(25)
26834 .channels(channels)
26835 .width(3)
26836 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080026837 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026838 }
26839 }
26840}
26841
26842TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
26843 for (size_t channels = 1; channels <= 20; channels += 3) {
26844 DWConvMicrokernelTester()
26845 .cr(4)
26846 .kr(25)
26847 .channels(4)
26848 .width(5)
26849 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080026850 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026851 }
26852}
26853
26854TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
26855 for (size_t channels = 1; channels <= 20; channels += 3) {
26856 DWConvMicrokernelTester()
26857 .cr(4)
26858 .kr(25)
26859 .channels(channels)
26860 .width(3)
26861 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026862 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026863 }
26864}
26865
26866TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
26867 for (size_t channels = 1; channels <= 20; channels += 3) {
26868 DWConvMicrokernelTester()
26869 .cr(4)
26870 .kr(25)
26871 .channels(channels)
26872 .width(3)
26873 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026874 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026875 }
26876}
26877
26878TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
26879 for (uint32_t channels = 8; channels < 64; channels += 12) {
26880 DWConvMicrokernelTester()
26881 .cr(4)
26882 .kr(25)
26883 .channels(channels)
26884 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080026885 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026886 }
26887}
26888
26889TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
26890 for (uint32_t mz = 0; mz < 25; mz++) {
26891 for (uint32_t channels = 8; channels < 64; channels += 12) {
26892 DWConvMicrokernelTester()
26893 .cr(4)
26894 .kr(25)
26895 .channels(channels)
26896 .input_offset(112)
26897 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026899 }
26900 }
26901}
26902
26903TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
26904 DWConvMicrokernelTester()
26905 .cr(1)
26906 .kr(25)
26907 .channels(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026908 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026909}
26910
26911TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
26912 for (uint32_t channels = 2; channels < 10; channels++) {
26913 DWConvMicrokernelTester()
26914 .cr(1)
26915 .kr(25)
26916 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080026917 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026918 }
26919}
26920
26921TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
26922 for (uint32_t channels = 2; channels < 10; channels++) {
26923 DWConvMicrokernelTester()
26924 .cr(1)
26925 .kr(25)
26926 .channels(channels)
26927 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026928 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026929 }
26930}
26931
26932TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
26933 for (uint32_t channels = 2; channels < 10; channels++) {
26934 DWConvMicrokernelTester()
26935 .cr(1)
26936 .kr(25)
26937 .channels(channels)
26938 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026939 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026940 }
26941}
26942
26943TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
26944 for (size_t channels = 1; channels <= 5; channels += 1) {
26945 DWConvMicrokernelTester()
26946 .cr(1)
26947 .kr(25)
26948 .channels(channels)
26949 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026950 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026951 }
26952}
26953
26954TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
26955 for (size_t channels = 1; channels <= 5; channels += 1) {
26956 for (size_t step = 2; step <= 25; step++) {
26957 DWConvMicrokernelTester()
26958 .cr(1)
26959 .kr(25)
26960 .channels(channels)
26961 .width(3)
26962 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080026963 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026964 }
26965 }
26966}
26967
26968TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
26969 for (size_t channels = 1; channels <= 5; channels += 1) {
26970 DWConvMicrokernelTester()
26971 .cr(1)
26972 .kr(25)
26973 .channels(1)
26974 .width(5)
26975 .output_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026976 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026977 }
26978}
26979
26980TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
26981 for (size_t channels = 1; channels <= 5; channels += 1) {
26982 DWConvMicrokernelTester()
26983 .cr(1)
26984 .kr(25)
26985 .channels(channels)
26986 .width(3)
26987 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026988 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080026989 }
26990}
26991
26992TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
26993 for (size_t channels = 1; channels <= 5; channels += 1) {
26994 DWConvMicrokernelTester()
26995 .cr(1)
26996 .kr(25)
26997 .channels(channels)
26998 .width(3)
26999 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027000 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027001 }
27002}
27003
27004TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
27005 for (uint32_t channels = 2; channels < 16; channels += 3) {
27006 DWConvMicrokernelTester()
27007 .cr(1)
27008 .kr(25)
27009 .channels(channels)
27010 .input_offset(48)
Marat Dukhan50323b82022-01-11 00:12:01 -080027011 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027012 }
27013}
27014
27015TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
27016 for (uint32_t mz = 0; mz < 25; mz++) {
27017 for (uint32_t channels = 2; channels < 16; channels += 3) {
27018 DWConvMicrokernelTester()
27019 .cr(1)
27020 .kr(25)
27021 .channels(channels)
27022 .input_offset(48)
27023 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080027024 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027025 }
27026 }
27027}
27028
27029TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
27030 DWConvMicrokernelTester()
27031 .cr(2)
27032 .kr(25)
27033 .channels(2)
Marat Dukhan50323b82022-01-11 00:12:01 -080027034 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027035}
27036
27037TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
27038 for (uint32_t channels = 4; channels < 32; channels += 6) {
27039 DWConvMicrokernelTester()
27040 .cr(2)
27041 .kr(25)
27042 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080027043 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027044 }
27045}
27046
27047TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
27048 for (uint32_t channels = 4; channels < 32; channels += 6) {
27049 DWConvMicrokernelTester()
27050 .cr(2)
27051 .kr(25)
27052 .channels(channels)
27053 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027054 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027055 }
27056}
27057
27058TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
27059 for (uint32_t channels = 4; channels < 32; channels += 6) {
27060 DWConvMicrokernelTester()
27061 .cr(2)
27062 .kr(25)
27063 .channels(channels)
27064 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027065 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027066 }
27067}
27068
27069TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
27070 for (uint32_t channels = 1; channels < 2; channels++) {
27071 DWConvMicrokernelTester()
27072 .cr(2)
27073 .kr(25)
27074 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080027075 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027076 }
27077}
27078
27079TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
27080 for (uint32_t channels = 3; channels < 4; channels++) {
27081 DWConvMicrokernelTester()
27082 .cr(2)
27083 .kr(25)
27084 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080027085 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027086 }
27087}
27088
27089TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
27090 for (uint32_t channels = 3; channels < 4; channels++) {
27091 DWConvMicrokernelTester()
27092 .cr(2)
27093 .kr(25)
27094 .channels(channels)
27095 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027096 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027097 }
27098}
27099
27100TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
27101 for (uint32_t channels = 3; channels < 4; channels++) {
27102 DWConvMicrokernelTester()
27103 .cr(2)
27104 .kr(25)
27105 .channels(channels)
27106 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027107 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027108 }
27109}
27110
27111TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
27112 for (size_t channels = 1; channels <= 10; channels += 1) {
27113 DWConvMicrokernelTester()
27114 .cr(2)
27115 .kr(25)
27116 .channels(channels)
27117 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027118 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027119 }
27120}
27121
27122TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
27123 for (size_t channels = 1; channels <= 10; channels += 1) {
27124 for (size_t step = 2; step <= 25; step++) {
27125 DWConvMicrokernelTester()
27126 .cr(2)
27127 .kr(25)
27128 .channels(channels)
27129 .width(3)
27130 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080027131 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027132 }
27133 }
27134}
27135
27136TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
27137 for (size_t channels = 1; channels <= 10; channels += 1) {
27138 DWConvMicrokernelTester()
27139 .cr(2)
27140 .kr(25)
27141 .channels(2)
27142 .width(5)
27143 .output_stride(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080027144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027145 }
27146}
27147
27148TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
27149 for (size_t channels = 1; channels <= 10; channels += 1) {
27150 DWConvMicrokernelTester()
27151 .cr(2)
27152 .kr(25)
27153 .channels(channels)
27154 .width(3)
27155 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027156 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027157 }
27158}
27159
27160TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
27161 for (size_t channels = 1; channels <= 10; channels += 1) {
27162 DWConvMicrokernelTester()
27163 .cr(2)
27164 .kr(25)
27165 .channels(channels)
27166 .width(3)
27167 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027169 }
27170}
27171
27172TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
27173 for (uint32_t channels = 4; channels < 32; channels += 6) {
27174 DWConvMicrokernelTester()
27175 .cr(2)
27176 .kr(25)
27177 .channels(channels)
27178 .input_offset(80)
Marat Dukhan50323b82022-01-11 00:12:01 -080027179 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027180 }
27181}
27182
27183TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
27184 for (uint32_t mz = 0; mz < 25; mz++) {
27185 for (uint32_t channels = 4; channels < 32; channels += 6) {
27186 DWConvMicrokernelTester()
27187 .cr(2)
27188 .kr(25)
27189 .channels(channels)
27190 .input_offset(80)
27191 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080027192 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027193 }
27194 }
27195}
27196
27197TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
27198 DWConvMicrokernelTester()
27199 .cr(4)
27200 .kr(25)
27201 .channels(4)
Marat Dukhan50323b82022-01-11 00:12:01 -080027202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027203}
27204
27205TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
27206 for (uint32_t channels = 8; channels < 64; channels += 12) {
27207 DWConvMicrokernelTester()
27208 .cr(4)
27209 .kr(25)
27210 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080027211 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027212 }
27213}
27214
27215TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
27216 for (uint32_t channels = 8; channels < 64; channels += 12) {
27217 DWConvMicrokernelTester()
27218 .cr(4)
27219 .kr(25)
27220 .channels(channels)
27221 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027222 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027223 }
27224}
27225
27226TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
27227 for (uint32_t channels = 8; channels < 64; channels += 12) {
27228 DWConvMicrokernelTester()
27229 .cr(4)
27230 .kr(25)
27231 .channels(channels)
27232 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027233 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027234 }
27235}
27236
27237TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
27238 for (uint32_t channels = 1; channels < 4; channels++) {
27239 DWConvMicrokernelTester()
27240 .cr(4)
27241 .kr(25)
27242 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080027243 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027244 }
27245}
27246
27247TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
27248 for (uint32_t channels = 5; channels < 8; channels++) {
27249 DWConvMicrokernelTester()
27250 .cr(4)
27251 .kr(25)
27252 .channels(channels)
Marat Dukhan50323b82022-01-11 00:12:01 -080027253 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027254 }
27255}
27256
27257TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
27258 for (uint32_t channels = 5; channels < 8; channels++) {
27259 DWConvMicrokernelTester()
27260 .cr(4)
27261 .kr(25)
27262 .channels(channels)
27263 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027264 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027265 }
27266}
27267
27268TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
27269 for (uint32_t channels = 5; channels < 8; channels++) {
27270 DWConvMicrokernelTester()
27271 .cr(4)
27272 .kr(25)
27273 .channels(channels)
27274 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027275 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027276 }
27277}
27278
27279TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
27280 for (size_t channels = 1; channels <= 20; channels += 3) {
27281 DWConvMicrokernelTester()
27282 .cr(4)
27283 .kr(25)
27284 .channels(channels)
27285 .width(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027286 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027287 }
27288}
27289
27290TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
27291 for (size_t channels = 1; channels <= 20; channels += 3) {
27292 for (size_t step = 2; step <= 25; step++) {
27293 DWConvMicrokernelTester()
27294 .cr(4)
27295 .kr(25)
27296 .channels(channels)
27297 .width(3)
27298 .step(step)
Marat Dukhan50323b82022-01-11 00:12:01 -080027299 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027300 }
27301 }
27302}
27303
27304TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
27305 for (size_t channels = 1; channels <= 20; channels += 3) {
27306 DWConvMicrokernelTester()
27307 .cr(4)
27308 .kr(25)
27309 .channels(4)
27310 .width(5)
27311 .output_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080027312 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027313 }
27314}
27315
27316TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
27317 for (size_t channels = 1; channels <= 20; channels += 3) {
27318 DWConvMicrokernelTester()
27319 .cr(4)
27320 .kr(25)
27321 .channels(channels)
27322 .width(3)
27323 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027324 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027325 }
27326}
27327
27328TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
27329 for (size_t channels = 1; channels <= 20; channels += 3) {
27330 DWConvMicrokernelTester()
27331 .cr(4)
27332 .kr(25)
27333 .channels(channels)
27334 .width(3)
27335 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027336 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027337 }
27338}
27339
27340TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
27341 for (uint32_t channels = 8; channels < 64; channels += 12) {
27342 DWConvMicrokernelTester()
27343 .cr(4)
27344 .kr(25)
27345 .channels(channels)
27346 .input_offset(112)
Marat Dukhan50323b82022-01-11 00:12:01 -080027347 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027348 }
27349}
27350
27351TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
27352 for (uint32_t mz = 0; mz < 25; mz++) {
27353 for (uint32_t channels = 8; channels < 64; channels += 12) {
27354 DWConvMicrokernelTester()
27355 .cr(4)
27356 .kr(25)
27357 .channels(channels)
27358 .input_offset(112)
27359 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080027360 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080027361 }
27362 }
Marat Dukhan57547062021-06-30 16:53:29 -070027363}