blob: 0b4351adcab7a84580caf23d3e6f75b03ccb4179 [file] [log] [blame]
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/qs8-vadd-minmax.yaml
8// Generator: tools/generate-vbinary-test.py
9
10
11#include <gtest/gtest.h>
12
13#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
Marat Dukhan87bd5112021-08-02 11:43:53 -070016#include <xnnpack/params-init.h>
Marat Dukhan64287252021-09-07 16:20:03 -070017#include <xnnpack/vaddsub.h>
Marat Dukhand9f3ad42020-08-10 12:30:58 -070018#include "vadd-microkernel-tester.h"
19
20
Marat Dukhanba7b2792020-09-02 14:26:45 -070021#if XNN_ARCH_ARM || XNN_ARCH_ARM64
22 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_eq_8) {
23 TEST_REQUIRES_ARM_NEON;
24 VAddMicrokernelTester()
25 .batch_size(8)
Marat Dukhan66913242021-07-20 16:11:23 -070026 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -070027 }
28
29 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_div_8) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32 VAddMicrokernelTester()
33 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -070034 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -070035 }
36 }
37
38 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_lt_8) {
39 TEST_REQUIRES_ARM_NEON;
40 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41 VAddMicrokernelTester()
42 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -070043 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -070044 }
45 }
46
47 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_gt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50 VAddMicrokernelTester()
51 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -070052 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -070053 }
54 }
55
56 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_a) {
57 TEST_REQUIRES_ARM_NEON;
58 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59 VAddMicrokernelTester()
60 .batch_size(batch_size)
61 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -070062 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -070063 }
64 }
65
66 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_b) {
67 TEST_REQUIRES_ARM_NEON;
68 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69 VAddMicrokernelTester()
70 .batch_size(batch_size)
71 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -070072 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -070073 }
74 }
75
76 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_a_and_b) {
77 TEST_REQUIRES_ARM_NEON;
78 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
79 VAddMicrokernelTester()
80 .batch_size(batch_size)
81 .inplace_a(true)
82 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -070083 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -070084 }
85 }
86
87 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, a_zero_point) {
88 TEST_REQUIRES_ARM_NEON;
89 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
90 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
91 VAddMicrokernelTester()
92 .batch_size(batch_size)
93 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -070094 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -070095 }
96 }
97 }
98
99 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, b_zero_point) {
100 TEST_REQUIRES_ARM_NEON;
101 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
102 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
103 VAddMicrokernelTester()
104 .batch_size(batch_size)
105 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700106 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700107 }
108 }
109 }
110
111 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, y_zero_point) {
112 TEST_REQUIRES_ARM_NEON;
113 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
114 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
115 VAddMicrokernelTester()
116 .batch_size(batch_size)
117 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700118 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700119 }
120 }
121 }
122
123 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, a_scale) {
124 TEST_REQUIRES_ARM_NEON;
125 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
126 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
127 VAddMicrokernelTester()
128 .batch_size(batch_size)
129 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700130 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700131 }
132 }
133 }
134
135 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, b_scale) {
136 TEST_REQUIRES_ARM_NEON;
137 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
138 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
139 VAddMicrokernelTester()
140 .batch_size(batch_size)
141 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700142 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700143 }
144 }
145 }
146
147 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, y_scale) {
148 TEST_REQUIRES_ARM_NEON;
149 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
151 VAddMicrokernelTester()
152 .batch_size(batch_size)
153 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700154 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700155 }
156 }
157 }
158
159 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, qmin) {
160 TEST_REQUIRES_ARM_NEON;
161 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
162 VAddMicrokernelTester()
163 .batch_size(batch_size)
164 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -0700165 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700166 }
167 }
168
169 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, qmax) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
172 VAddMicrokernelTester()
173 .batch_size(batch_size)
174 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -0700175 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700176 }
177 }
178#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
179
180
181#if XNN_ARCH_ARM || XNN_ARCH_ARM64
182 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_eq_16) {
183 TEST_REQUIRES_ARM_NEON;
184 VAddMicrokernelTester()
185 .batch_size(16)
Marat Dukhan66913242021-07-20 16:11:23 -0700186 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700187 }
188
189 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_div_16) {
190 TEST_REQUIRES_ARM_NEON;
191 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
192 VAddMicrokernelTester()
193 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700194 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700195 }
196 }
197
198 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_lt_16) {
199 TEST_REQUIRES_ARM_NEON;
200 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
201 VAddMicrokernelTester()
202 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700203 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700204 }
205 }
206
207 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_gt_16) {
208 TEST_REQUIRES_ARM_NEON;
209 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
210 VAddMicrokernelTester()
211 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700212 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700213 }
214 }
215
216 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_a) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219 VAddMicrokernelTester()
220 .batch_size(batch_size)
221 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -0700222 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700223 }
224 }
225
226 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_b) {
227 TEST_REQUIRES_ARM_NEON;
228 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
229 VAddMicrokernelTester()
230 .batch_size(batch_size)
231 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -0700232 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700233 }
234 }
235
236 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_a_and_b) {
237 TEST_REQUIRES_ARM_NEON;
238 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
239 VAddMicrokernelTester()
240 .batch_size(batch_size)
241 .inplace_a(true)
242 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -0700243 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700244 }
245 }
246
247 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, a_zero_point) {
248 TEST_REQUIRES_ARM_NEON;
249 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
250 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
251 VAddMicrokernelTester()
252 .batch_size(batch_size)
253 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700254 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700255 }
256 }
257 }
258
259 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, b_zero_point) {
260 TEST_REQUIRES_ARM_NEON;
261 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
262 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
263 VAddMicrokernelTester()
264 .batch_size(batch_size)
265 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700266 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700267 }
268 }
269 }
270
271 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, y_zero_point) {
272 TEST_REQUIRES_ARM_NEON;
273 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
274 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
275 VAddMicrokernelTester()
276 .batch_size(batch_size)
277 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700278 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700279 }
280 }
281 }
282
283 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, a_scale) {
284 TEST_REQUIRES_ARM_NEON;
285 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
286 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
287 VAddMicrokernelTester()
288 .batch_size(batch_size)
289 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700290 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700291 }
292 }
293 }
294
295 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, b_scale) {
296 TEST_REQUIRES_ARM_NEON;
297 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
298 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
299 VAddMicrokernelTester()
300 .batch_size(batch_size)
301 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700302 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700303 }
304 }
305 }
306
307 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, y_scale) {
308 TEST_REQUIRES_ARM_NEON;
309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
310 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
311 VAddMicrokernelTester()
312 .batch_size(batch_size)
313 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700314 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700315 }
316 }
317 }
318
319 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, qmin) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
322 VAddMicrokernelTester()
323 .batch_size(batch_size)
324 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -0700325 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700326 }
327 }
328
329 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, qmax) {
330 TEST_REQUIRES_ARM_NEON;
331 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
332 VAddMicrokernelTester()
333 .batch_size(batch_size)
334 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -0700335 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700336 }
337 }
338#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
339
340
341#if XNN_ARCH_ARM || XNN_ARCH_ARM64
342 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_eq_24) {
343 TEST_REQUIRES_ARM_NEON;
344 VAddMicrokernelTester()
345 .batch_size(24)
Marat Dukhan66913242021-07-20 16:11:23 -0700346 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700347 }
348
349 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_div_24) {
350 TEST_REQUIRES_ARM_NEON;
351 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
352 VAddMicrokernelTester()
353 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700354 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700355 }
356 }
357
358 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_lt_24) {
359 TEST_REQUIRES_ARM_NEON;
360 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
361 VAddMicrokernelTester()
362 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700363 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700364 }
365 }
366
367 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_gt_24) {
368 TEST_REQUIRES_ARM_NEON;
369 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
370 VAddMicrokernelTester()
371 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700372 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700373 }
374 }
375
376 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_a) {
377 TEST_REQUIRES_ARM_NEON;
378 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
379 VAddMicrokernelTester()
380 .batch_size(batch_size)
381 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -0700382 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700383 }
384 }
385
386 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_b) {
387 TEST_REQUIRES_ARM_NEON;
388 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
389 VAddMicrokernelTester()
390 .batch_size(batch_size)
391 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -0700392 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700393 }
394 }
395
396 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_a_and_b) {
397 TEST_REQUIRES_ARM_NEON;
398 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
399 VAddMicrokernelTester()
400 .batch_size(batch_size)
401 .inplace_a(true)
402 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -0700403 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700404 }
405 }
406
407 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, a_zero_point) {
408 TEST_REQUIRES_ARM_NEON;
409 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
410 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
411 VAddMicrokernelTester()
412 .batch_size(batch_size)
413 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700414 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700415 }
416 }
417 }
418
419 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, b_zero_point) {
420 TEST_REQUIRES_ARM_NEON;
421 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
422 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
423 VAddMicrokernelTester()
424 .batch_size(batch_size)
425 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700426 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700427 }
428 }
429 }
430
431 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, y_zero_point) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
434 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
435 VAddMicrokernelTester()
436 .batch_size(batch_size)
437 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700438 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700439 }
440 }
441 }
442
443 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, a_scale) {
444 TEST_REQUIRES_ARM_NEON;
445 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
446 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
447 VAddMicrokernelTester()
448 .batch_size(batch_size)
449 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700450 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700451 }
452 }
453 }
454
455 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, b_scale) {
456 TEST_REQUIRES_ARM_NEON;
457 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
458 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
459 VAddMicrokernelTester()
460 .batch_size(batch_size)
461 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700462 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700463 }
464 }
465 }
466
467 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, y_scale) {
468 TEST_REQUIRES_ARM_NEON;
469 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
470 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
471 VAddMicrokernelTester()
472 .batch_size(batch_size)
473 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700474 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700475 }
476 }
477 }
478
479 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, qmin) {
480 TEST_REQUIRES_ARM_NEON;
481 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
482 VAddMicrokernelTester()
483 .batch_size(batch_size)
484 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -0700485 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700486 }
487 }
488
489 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, qmax) {
490 TEST_REQUIRES_ARM_NEON;
491 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
492 VAddMicrokernelTester()
493 .batch_size(batch_size)
494 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -0700495 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700496 }
497 }
498#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
499
500
501#if XNN_ARCH_ARM || XNN_ARCH_ARM64
502 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_eq_32) {
503 TEST_REQUIRES_ARM_NEON;
504 VAddMicrokernelTester()
505 .batch_size(32)
Marat Dukhan66913242021-07-20 16:11:23 -0700506 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700507 }
508
509 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_div_32) {
510 TEST_REQUIRES_ARM_NEON;
511 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
512 VAddMicrokernelTester()
513 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700514 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700515 }
516 }
517
518 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_lt_32) {
519 TEST_REQUIRES_ARM_NEON;
520 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
521 VAddMicrokernelTester()
522 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700523 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700524 }
525 }
526
527 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_gt_32) {
528 TEST_REQUIRES_ARM_NEON;
529 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
530 VAddMicrokernelTester()
531 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700532 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700533 }
534 }
535
536 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_a) {
537 TEST_REQUIRES_ARM_NEON;
538 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
539 VAddMicrokernelTester()
540 .batch_size(batch_size)
541 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -0700542 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700543 }
544 }
545
546 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_b) {
547 TEST_REQUIRES_ARM_NEON;
548 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
549 VAddMicrokernelTester()
550 .batch_size(batch_size)
551 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -0700552 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700553 }
554 }
555
556 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_a_and_b) {
557 TEST_REQUIRES_ARM_NEON;
558 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
559 VAddMicrokernelTester()
560 .batch_size(batch_size)
561 .inplace_a(true)
562 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -0700563 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700564 }
565 }
566
567 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, a_zero_point) {
568 TEST_REQUIRES_ARM_NEON;
569 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
570 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
571 VAddMicrokernelTester()
572 .batch_size(batch_size)
573 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700574 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700575 }
576 }
577 }
578
579 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, b_zero_point) {
580 TEST_REQUIRES_ARM_NEON;
581 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
582 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
583 VAddMicrokernelTester()
584 .batch_size(batch_size)
585 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700586 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700587 }
588 }
589 }
590
591 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, y_zero_point) {
592 TEST_REQUIRES_ARM_NEON;
593 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
595 VAddMicrokernelTester()
596 .batch_size(batch_size)
597 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -0700598 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700599 }
600 }
601 }
602
603 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, a_scale) {
604 TEST_REQUIRES_ARM_NEON;
605 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
606 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
607 VAddMicrokernelTester()
608 .batch_size(batch_size)
609 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700610 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700611 }
612 }
613 }
614
615 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, b_scale) {
616 TEST_REQUIRES_ARM_NEON;
617 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
618 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
619 VAddMicrokernelTester()
620 .batch_size(batch_size)
621 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700622 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700623 }
624 }
625 }
626
627 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, y_scale) {
628 TEST_REQUIRES_ARM_NEON;
629 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
630 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
631 VAddMicrokernelTester()
632 .batch_size(batch_size)
633 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -0700634 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700635 }
636 }
637 }
638
639 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, qmin) {
640 TEST_REQUIRES_ARM_NEON;
641 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
642 VAddMicrokernelTester()
643 .batch_size(batch_size)
644 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -0700645 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700646 }
647 }
648
649 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, qmax) {
650 TEST_REQUIRES_ARM_NEON;
651 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
652 VAddMicrokernelTester()
653 .batch_size(batch_size)
654 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -0700655 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
Marat Dukhanba7b2792020-09-02 14:26:45 -0700656 }
657 }
658#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
659
660
Marat Dukhaneb3cff32021-07-30 11:35:27 -0700661#if XNN_ARCH_ARM || XNN_ARCH_ARM64
662 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_eq_16) {
663 TEST_REQUIRES_ARM_NEON;
664 VAddMicrokernelTester()
665 .batch_size(16)
666 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
667 }
668
669 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_div_16) {
670 TEST_REQUIRES_ARM_NEON;
671 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
672 VAddMicrokernelTester()
673 .batch_size(batch_size)
674 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
675 }
676 }
677
678 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_lt_16) {
679 TEST_REQUIRES_ARM_NEON;
680 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
681 VAddMicrokernelTester()
682 .batch_size(batch_size)
683 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
684 }
685 }
686
687 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_gt_16) {
688 TEST_REQUIRES_ARM_NEON;
689 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
690 VAddMicrokernelTester()
691 .batch_size(batch_size)
692 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
693 }
694 }
695
696 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, inplace_a) {
697 TEST_REQUIRES_ARM_NEON;
698 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
699 VAddMicrokernelTester()
700 .batch_size(batch_size)
701 .inplace_a(true)
702 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
703 }
704 }
705
706 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, inplace_b) {
707 TEST_REQUIRES_ARM_NEON;
708 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
709 VAddMicrokernelTester()
710 .batch_size(batch_size)
711 .inplace_b(true)
712 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
713 }
714 }
715
716 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, inplace_a_and_b) {
717 TEST_REQUIRES_ARM_NEON;
718 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
719 VAddMicrokernelTester()
720 .batch_size(batch_size)
721 .inplace_a(true)
722 .inplace_b(true)
723 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
724 }
725 }
726
727 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, a_zero_point) {
728 TEST_REQUIRES_ARM_NEON;
729 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
730 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
731 VAddMicrokernelTester()
732 .batch_size(batch_size)
733 .a_zero_point(a_zero_point)
734 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
735 }
736 }
737 }
738
739 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, b_zero_point) {
740 TEST_REQUIRES_ARM_NEON;
741 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
742 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
743 VAddMicrokernelTester()
744 .batch_size(batch_size)
745 .b_zero_point(b_zero_point)
746 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
747 }
748 }
749 }
750
751 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, y_zero_point) {
752 TEST_REQUIRES_ARM_NEON;
753 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
754 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
755 VAddMicrokernelTester()
756 .batch_size(batch_size)
757 .y_zero_point(y_zero_point)
758 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
759 }
760 }
761 }
762
763 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, a_scale) {
764 TEST_REQUIRES_ARM_NEON;
765 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
766 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
767 VAddMicrokernelTester()
768 .batch_size(batch_size)
769 .a_scale(a_scale)
770 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
771 }
772 }
773 }
774
775 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, b_scale) {
776 TEST_REQUIRES_ARM_NEON;
777 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
778 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
779 VAddMicrokernelTester()
780 .batch_size(batch_size)
781 .b_scale(b_scale)
782 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
783 }
784 }
785 }
786
787 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, y_scale) {
788 TEST_REQUIRES_ARM_NEON;
789 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
790 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
791 VAddMicrokernelTester()
792 .batch_size(batch_size)
793 .y_scale(y_scale)
794 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
795 }
796 }
797 }
798
799 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, qmin) {
800 TEST_REQUIRES_ARM_NEON;
801 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
802 VAddMicrokernelTester()
803 .batch_size(batch_size)
804 .qmin(128)
805 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
806 }
807 }
808
809 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, qmax) {
810 TEST_REQUIRES_ARM_NEON;
811 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
812 VAddMicrokernelTester()
813 .batch_size(batch_size)
814 .qmax(128)
815 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
816 }
817 }
818#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
819
820
821#if XNN_ARCH_ARM || XNN_ARCH_ARM64
822 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_eq_32) {
823 TEST_REQUIRES_ARM_NEON;
824 VAddMicrokernelTester()
825 .batch_size(32)
826 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
827 }
828
829 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_div_32) {
830 TEST_REQUIRES_ARM_NEON;
831 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
832 VAddMicrokernelTester()
833 .batch_size(batch_size)
834 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
835 }
836 }
837
838 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_lt_32) {
839 TEST_REQUIRES_ARM_NEON;
840 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
841 VAddMicrokernelTester()
842 .batch_size(batch_size)
843 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
844 }
845 }
846
847 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_gt_32) {
848 TEST_REQUIRES_ARM_NEON;
849 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
850 VAddMicrokernelTester()
851 .batch_size(batch_size)
852 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
853 }
854 }
855
856 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, inplace_a) {
857 TEST_REQUIRES_ARM_NEON;
858 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
859 VAddMicrokernelTester()
860 .batch_size(batch_size)
861 .inplace_a(true)
862 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
863 }
864 }
865
866 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, inplace_b) {
867 TEST_REQUIRES_ARM_NEON;
868 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
869 VAddMicrokernelTester()
870 .batch_size(batch_size)
871 .inplace_b(true)
872 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
873 }
874 }
875
876 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, inplace_a_and_b) {
877 TEST_REQUIRES_ARM_NEON;
878 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
879 VAddMicrokernelTester()
880 .batch_size(batch_size)
881 .inplace_a(true)
882 .inplace_b(true)
883 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
884 }
885 }
886
887 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, a_zero_point) {
888 TEST_REQUIRES_ARM_NEON;
889 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
890 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
891 VAddMicrokernelTester()
892 .batch_size(batch_size)
893 .a_zero_point(a_zero_point)
894 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
895 }
896 }
897 }
898
899 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, b_zero_point) {
900 TEST_REQUIRES_ARM_NEON;
901 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
902 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
903 VAddMicrokernelTester()
904 .batch_size(batch_size)
905 .b_zero_point(b_zero_point)
906 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
907 }
908 }
909 }
910
911 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, y_zero_point) {
912 TEST_REQUIRES_ARM_NEON;
913 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
914 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
915 VAddMicrokernelTester()
916 .batch_size(batch_size)
917 .y_zero_point(y_zero_point)
918 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
919 }
920 }
921 }
922
923 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, a_scale) {
924 TEST_REQUIRES_ARM_NEON;
925 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
926 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
927 VAddMicrokernelTester()
928 .batch_size(batch_size)
929 .a_scale(a_scale)
930 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
931 }
932 }
933 }
934
935 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, b_scale) {
936 TEST_REQUIRES_ARM_NEON;
937 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
938 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
939 VAddMicrokernelTester()
940 .batch_size(batch_size)
941 .b_scale(b_scale)
942 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
943 }
944 }
945 }
946
947 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, y_scale) {
948 TEST_REQUIRES_ARM_NEON;
949 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
950 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
951 VAddMicrokernelTester()
952 .batch_size(batch_size)
953 .y_scale(y_scale)
954 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
955 }
956 }
957 }
958
959 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, qmin) {
960 TEST_REQUIRES_ARM_NEON;
961 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
962 VAddMicrokernelTester()
963 .batch_size(batch_size)
964 .qmin(128)
965 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
966 }
967 }
968
969 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, qmax) {
970 TEST_REQUIRES_ARM_NEON;
971 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
972 VAddMicrokernelTester()
973 .batch_size(batch_size)
974 .qmax(128)
975 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
976 }
977 }
978#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
979
980
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700981#if XNN_ARCH_X86 || XNN_ARCH_X86_64
982 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
983 TEST_REQUIRES_X86_SSE2;
984 VAddMicrokernelTester()
985 .batch_size(8)
Marat Dukhan66913242021-07-20 16:11:23 -0700986 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700987 }
988
989 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
990 TEST_REQUIRES_X86_SSE2;
991 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
992 VAddMicrokernelTester()
993 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -0700994 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700995 }
996 }
997
998 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
999 TEST_REQUIRES_X86_SSE2;
1000 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1001 VAddMicrokernelTester()
1002 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001003 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001004 }
1005 }
1006
1007 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
1008 TEST_REQUIRES_X86_SSE2;
1009 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1010 VAddMicrokernelTester()
1011 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001012 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001013 }
1014 }
1015
1016 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a) {
1017 TEST_REQUIRES_X86_SSE2;
1018 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1019 VAddMicrokernelTester()
1020 .batch_size(batch_size)
1021 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001022 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001023 }
1024 }
1025
1026 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_b) {
1027 TEST_REQUIRES_X86_SSE2;
1028 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1029 VAddMicrokernelTester()
1030 .batch_size(batch_size)
1031 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001032 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001033 }
1034 }
1035
1036 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
1037 TEST_REQUIRES_X86_SSE2;
1038 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1039 VAddMicrokernelTester()
1040 .batch_size(batch_size)
1041 .inplace_a(true)
1042 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001043 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001044 }
1045 }
1046
Marat Dukhan95caee52020-09-02 03:41:32 -07001047 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
1048 TEST_REQUIRES_X86_SSE2;
1049 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1050 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1051 VAddMicrokernelTester()
1052 .batch_size(batch_size)
1053 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001054 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001055 }
1056 }
1057 }
1058
1059 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
1060 TEST_REQUIRES_X86_SSE2;
1061 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1062 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1063 VAddMicrokernelTester()
1064 .batch_size(batch_size)
1065 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001066 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001067 }
1068 }
1069 }
1070
1071 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
1072 TEST_REQUIRES_X86_SSE2;
1073 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1074 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1075 VAddMicrokernelTester()
1076 .batch_size(batch_size)
1077 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001078 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001079 }
1080 }
1081 }
1082
1083 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
1084 TEST_REQUIRES_X86_SSE2;
1085 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1086 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1087 VAddMicrokernelTester()
1088 .batch_size(batch_size)
1089 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001090 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001091 }
1092 }
1093 }
1094
1095 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
1096 TEST_REQUIRES_X86_SSE2;
1097 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1098 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1099 VAddMicrokernelTester()
1100 .batch_size(batch_size)
1101 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001102 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001103 }
1104 }
1105 }
1106
1107 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
1108 TEST_REQUIRES_X86_SSE2;
1109 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1110 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1111 VAddMicrokernelTester()
1112 .batch_size(batch_size)
1113 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001114 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001115 }
1116 }
1117 }
1118
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001119 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
1120 TEST_REQUIRES_X86_SSE2;
1121 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1122 VAddMicrokernelTester()
1123 .batch_size(batch_size)
1124 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -07001125 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001126 }
1127 }
1128
1129 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
1130 TEST_REQUIRES_X86_SSE2;
1131 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1132 VAddMicrokernelTester()
1133 .batch_size(batch_size)
1134 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -07001135 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001136 }
1137 }
1138#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1139
1140
1141#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1142 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
1143 TEST_REQUIRES_X86_SSE2;
1144 VAddMicrokernelTester()
1145 .batch_size(16)
Marat Dukhan66913242021-07-20 16:11:23 -07001146 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001147 }
1148
1149 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
1150 TEST_REQUIRES_X86_SSE2;
1151 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1152 VAddMicrokernelTester()
1153 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001154 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001155 }
1156 }
1157
1158 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
1159 TEST_REQUIRES_X86_SSE2;
1160 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1161 VAddMicrokernelTester()
1162 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001163 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001164 }
1165 }
1166
1167 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
1168 TEST_REQUIRES_X86_SSE2;
1169 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1170 VAddMicrokernelTester()
1171 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001172 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001173 }
1174 }
1175
1176 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a) {
1177 TEST_REQUIRES_X86_SSE2;
1178 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1179 VAddMicrokernelTester()
1180 .batch_size(batch_size)
1181 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001182 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001183 }
1184 }
1185
1186 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_b) {
1187 TEST_REQUIRES_X86_SSE2;
1188 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1189 VAddMicrokernelTester()
1190 .batch_size(batch_size)
1191 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001192 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001193 }
1194 }
1195
1196 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
1197 TEST_REQUIRES_X86_SSE2;
1198 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1199 VAddMicrokernelTester()
1200 .batch_size(batch_size)
1201 .inplace_a(true)
1202 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001203 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001204 }
1205 }
1206
Marat Dukhan95caee52020-09-02 03:41:32 -07001207 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
1208 TEST_REQUIRES_X86_SSE2;
1209 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1210 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1211 VAddMicrokernelTester()
1212 .batch_size(batch_size)
1213 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001214 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001215 }
1216 }
1217 }
1218
1219 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
1220 TEST_REQUIRES_X86_SSE2;
1221 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1222 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1223 VAddMicrokernelTester()
1224 .batch_size(batch_size)
1225 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001226 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001227 }
1228 }
1229 }
1230
1231 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
1232 TEST_REQUIRES_X86_SSE2;
1233 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1234 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1235 VAddMicrokernelTester()
1236 .batch_size(batch_size)
1237 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001238 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001239 }
1240 }
1241 }
1242
1243 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
1244 TEST_REQUIRES_X86_SSE2;
1245 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1246 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1247 VAddMicrokernelTester()
1248 .batch_size(batch_size)
1249 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001250 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001251 }
1252 }
1253 }
1254
1255 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
1256 TEST_REQUIRES_X86_SSE2;
1257 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1258 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1259 VAddMicrokernelTester()
1260 .batch_size(batch_size)
1261 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001262 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001263 }
1264 }
1265 }
1266
1267 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
1268 TEST_REQUIRES_X86_SSE2;
1269 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1270 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1271 VAddMicrokernelTester()
1272 .batch_size(batch_size)
1273 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001274 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001275 }
1276 }
1277 }
1278
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001279 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
1280 TEST_REQUIRES_X86_SSE2;
1281 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1282 VAddMicrokernelTester()
1283 .batch_size(batch_size)
1284 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -07001285 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001286 }
1287 }
1288
1289 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
1290 TEST_REQUIRES_X86_SSE2;
1291 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1292 VAddMicrokernelTester()
1293 .batch_size(batch_size)
1294 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -07001295 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001296 }
1297 }
1298#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1299
1300
1301#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1302 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_eq_24) {
1303 TEST_REQUIRES_X86_SSE2;
1304 VAddMicrokernelTester()
1305 .batch_size(24)
Marat Dukhan66913242021-07-20 16:11:23 -07001306 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001307 }
1308
1309 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_div_24) {
1310 TEST_REQUIRES_X86_SSE2;
1311 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1312 VAddMicrokernelTester()
1313 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001314 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001315 }
1316 }
1317
1318 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_lt_24) {
1319 TEST_REQUIRES_X86_SSE2;
1320 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1321 VAddMicrokernelTester()
1322 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001323 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001324 }
1325 }
1326
1327 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_gt_24) {
1328 TEST_REQUIRES_X86_SSE2;
1329 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1330 VAddMicrokernelTester()
1331 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001332 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001333 }
1334 }
1335
1336 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a) {
1337 TEST_REQUIRES_X86_SSE2;
1338 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1339 VAddMicrokernelTester()
1340 .batch_size(batch_size)
1341 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001342 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001343 }
1344 }
1345
1346 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_b) {
1347 TEST_REQUIRES_X86_SSE2;
1348 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1349 VAddMicrokernelTester()
1350 .batch_size(batch_size)
1351 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001352 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001353 }
1354 }
1355
1356 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a_and_b) {
1357 TEST_REQUIRES_X86_SSE2;
1358 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1359 VAddMicrokernelTester()
1360 .batch_size(batch_size)
1361 .inplace_a(true)
1362 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001363 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001364 }
1365 }
1366
Marat Dukhan95caee52020-09-02 03:41:32 -07001367 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_zero_point) {
1368 TEST_REQUIRES_X86_SSE2;
1369 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1370 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1371 VAddMicrokernelTester()
1372 .batch_size(batch_size)
1373 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001374 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001375 }
1376 }
1377 }
1378
1379 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_zero_point) {
1380 TEST_REQUIRES_X86_SSE2;
1381 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1382 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1383 VAddMicrokernelTester()
1384 .batch_size(batch_size)
1385 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001386 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001387 }
1388 }
1389 }
1390
1391 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_zero_point) {
1392 TEST_REQUIRES_X86_SSE2;
1393 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1394 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1395 VAddMicrokernelTester()
1396 .batch_size(batch_size)
1397 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001398 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001399 }
1400 }
1401 }
1402
1403 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_scale) {
1404 TEST_REQUIRES_X86_SSE2;
1405 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1406 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1407 VAddMicrokernelTester()
1408 .batch_size(batch_size)
1409 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001410 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001411 }
1412 }
1413 }
1414
1415 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_scale) {
1416 TEST_REQUIRES_X86_SSE2;
1417 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1418 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1419 VAddMicrokernelTester()
1420 .batch_size(batch_size)
1421 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001422 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001423 }
1424 }
1425 }
1426
1427 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_scale) {
1428 TEST_REQUIRES_X86_SSE2;
1429 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1430 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1431 VAddMicrokernelTester()
1432 .batch_size(batch_size)
1433 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001434 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001435 }
1436 }
1437 }
1438
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001439 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmin) {
1440 TEST_REQUIRES_X86_SSE2;
1441 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1442 VAddMicrokernelTester()
1443 .batch_size(batch_size)
1444 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -07001445 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001446 }
1447 }
1448
1449 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmax) {
1450 TEST_REQUIRES_X86_SSE2;
1451 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1452 VAddMicrokernelTester()
1453 .batch_size(batch_size)
1454 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -07001455 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001456 }
1457 }
1458#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1459
1460
1461#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1462 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_eq_32) {
1463 TEST_REQUIRES_X86_SSE2;
1464 VAddMicrokernelTester()
1465 .batch_size(32)
Marat Dukhan66913242021-07-20 16:11:23 -07001466 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001467 }
1468
1469 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_div_32) {
1470 TEST_REQUIRES_X86_SSE2;
1471 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1472 VAddMicrokernelTester()
1473 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001474 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001475 }
1476 }
1477
1478 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_lt_32) {
1479 TEST_REQUIRES_X86_SSE2;
1480 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1481 VAddMicrokernelTester()
1482 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001483 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001484 }
1485 }
1486
1487 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_gt_32) {
1488 TEST_REQUIRES_X86_SSE2;
1489 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1490 VAddMicrokernelTester()
1491 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07001492 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001493 }
1494 }
1495
1496 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a) {
1497 TEST_REQUIRES_X86_SSE2;
1498 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1499 VAddMicrokernelTester()
1500 .batch_size(batch_size)
1501 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001502 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001503 }
1504 }
1505
1506 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_b) {
1507 TEST_REQUIRES_X86_SSE2;
1508 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1509 VAddMicrokernelTester()
1510 .batch_size(batch_size)
1511 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001512 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001513 }
1514 }
1515
1516 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a_and_b) {
1517 TEST_REQUIRES_X86_SSE2;
1518 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1519 VAddMicrokernelTester()
1520 .batch_size(batch_size)
1521 .inplace_a(true)
1522 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07001523 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001524 }
1525 }
1526
Marat Dukhan95caee52020-09-02 03:41:32 -07001527 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_zero_point) {
1528 TEST_REQUIRES_X86_SSE2;
1529 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1530 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1531 VAddMicrokernelTester()
1532 .batch_size(batch_size)
1533 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001534 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001535 }
1536 }
1537 }
1538
1539 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_zero_point) {
1540 TEST_REQUIRES_X86_SSE2;
1541 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1542 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1543 VAddMicrokernelTester()
1544 .batch_size(batch_size)
1545 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001546 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001547 }
1548 }
1549 }
1550
1551 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_zero_point) {
1552 TEST_REQUIRES_X86_SSE2;
1553 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1554 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1555 VAddMicrokernelTester()
1556 .batch_size(batch_size)
1557 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07001558 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001559 }
1560 }
1561 }
1562
1563 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_scale) {
1564 TEST_REQUIRES_X86_SSE2;
1565 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1566 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1567 VAddMicrokernelTester()
1568 .batch_size(batch_size)
1569 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001570 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001571 }
1572 }
1573 }
1574
1575 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_scale) {
1576 TEST_REQUIRES_X86_SSE2;
1577 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1578 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1579 VAddMicrokernelTester()
1580 .batch_size(batch_size)
1581 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001582 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001583 }
1584 }
1585 }
1586
1587 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_scale) {
1588 TEST_REQUIRES_X86_SSE2;
1589 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1590 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1591 VAddMicrokernelTester()
1592 .batch_size(batch_size)
1593 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07001594 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001595 }
1596 }
1597 }
1598
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001599 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmin) {
1600 TEST_REQUIRES_X86_SSE2;
1601 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1602 VAddMicrokernelTester()
1603 .batch_size(batch_size)
1604 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -07001605 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001606 }
1607 }
1608
1609 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmax) {
1610 TEST_REQUIRES_X86_SSE2;
1611 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1612 VAddMicrokernelTester()
1613 .batch_size(batch_size)
1614 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -07001615 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001616 }
1617 }
1618#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1619
1620
1621#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1622 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
1623 TEST_REQUIRES_X86_SSE41;
1624 VAddMicrokernelTester()
1625 .batch_size(8)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001626 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001627 }
1628
1629 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
1630 TEST_REQUIRES_X86_SSE41;
1631 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1632 VAddMicrokernelTester()
1633 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001634 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001635 }
1636 }
1637
1638 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
1639 TEST_REQUIRES_X86_SSE41;
1640 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1641 VAddMicrokernelTester()
1642 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001643 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001644 }
1645 }
1646
1647 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1648 TEST_REQUIRES_X86_SSE41;
1649 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1650 VAddMicrokernelTester()
1651 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001652 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001653 }
1654 }
1655
1656 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a) {
1657 TEST_REQUIRES_X86_SSE41;
1658 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1659 VAddMicrokernelTester()
1660 .batch_size(batch_size)
1661 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001662 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001663 }
1664 }
1665
1666 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_b) {
1667 TEST_REQUIRES_X86_SSE41;
1668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1669 VAddMicrokernelTester()
1670 .batch_size(batch_size)
1671 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001672 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001673 }
1674 }
1675
1676 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1677 TEST_REQUIRES_X86_SSE41;
1678 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1679 VAddMicrokernelTester()
1680 .batch_size(batch_size)
1681 .inplace_a(true)
1682 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001683 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001684 }
1685 }
1686
Marat Dukhan95caee52020-09-02 03:41:32 -07001687 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1688 TEST_REQUIRES_X86_SSE41;
1689 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1690 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1691 VAddMicrokernelTester()
1692 .batch_size(batch_size)
1693 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001694 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001695 }
1696 }
1697 }
1698
1699 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1700 TEST_REQUIRES_X86_SSE41;
1701 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1702 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1703 VAddMicrokernelTester()
1704 .batch_size(batch_size)
1705 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001706 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001707 }
1708 }
1709 }
1710
1711 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1712 TEST_REQUIRES_X86_SSE41;
1713 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1714 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1715 VAddMicrokernelTester()
1716 .batch_size(batch_size)
1717 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001718 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001719 }
1720 }
1721 }
1722
1723 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1724 TEST_REQUIRES_X86_SSE41;
1725 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1727 VAddMicrokernelTester()
1728 .batch_size(batch_size)
1729 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001730 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001731 }
1732 }
1733 }
1734
1735 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1736 TEST_REQUIRES_X86_SSE41;
1737 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1738 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1739 VAddMicrokernelTester()
1740 .batch_size(batch_size)
1741 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001742 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001743 }
1744 }
1745 }
1746
1747 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1748 TEST_REQUIRES_X86_SSE41;
1749 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1750 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1751 VAddMicrokernelTester()
1752 .batch_size(batch_size)
1753 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001754 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001755 }
1756 }
1757 }
1758
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001759 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1760 TEST_REQUIRES_X86_SSE41;
1761 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1762 VAddMicrokernelTester()
1763 .batch_size(batch_size)
1764 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001765 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001766 }
1767 }
1768
1769 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1770 TEST_REQUIRES_X86_SSE41;
1771 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772 VAddMicrokernelTester()
1773 .batch_size(batch_size)
1774 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001775 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001776 }
1777 }
1778#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779
1780
1781#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1782 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1783 TEST_REQUIRES_X86_SSE41;
1784 VAddMicrokernelTester()
1785 .batch_size(16)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001786 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001787 }
1788
1789 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1790 TEST_REQUIRES_X86_SSE41;
1791 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1792 VAddMicrokernelTester()
1793 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001794 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001795 }
1796 }
1797
1798 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1799 TEST_REQUIRES_X86_SSE41;
1800 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1801 VAddMicrokernelTester()
1802 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001803 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001804 }
1805 }
1806
1807 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1808 TEST_REQUIRES_X86_SSE41;
1809 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1810 VAddMicrokernelTester()
1811 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001812 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001813 }
1814 }
1815
1816 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a) {
1817 TEST_REQUIRES_X86_SSE41;
1818 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1819 VAddMicrokernelTester()
1820 .batch_size(batch_size)
1821 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001822 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001823 }
1824 }
1825
1826 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_b) {
1827 TEST_REQUIRES_X86_SSE41;
1828 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1829 VAddMicrokernelTester()
1830 .batch_size(batch_size)
1831 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001832 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001833 }
1834 }
1835
1836 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1837 TEST_REQUIRES_X86_SSE41;
1838 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1839 VAddMicrokernelTester()
1840 .batch_size(batch_size)
1841 .inplace_a(true)
1842 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001843 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001844 }
1845 }
1846
Marat Dukhan95caee52020-09-02 03:41:32 -07001847 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1848 TEST_REQUIRES_X86_SSE41;
1849 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1850 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1851 VAddMicrokernelTester()
1852 .batch_size(batch_size)
1853 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001854 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001855 }
1856 }
1857 }
1858
1859 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1860 TEST_REQUIRES_X86_SSE41;
1861 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1862 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1863 VAddMicrokernelTester()
1864 .batch_size(batch_size)
1865 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001866 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001867 }
1868 }
1869 }
1870
1871 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1872 TEST_REQUIRES_X86_SSE41;
1873 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1874 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1875 VAddMicrokernelTester()
1876 .batch_size(batch_size)
1877 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001878 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001879 }
1880 }
1881 }
1882
1883 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1884 TEST_REQUIRES_X86_SSE41;
1885 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1886 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1887 VAddMicrokernelTester()
1888 .batch_size(batch_size)
1889 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001890 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001891 }
1892 }
1893 }
1894
1895 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1896 TEST_REQUIRES_X86_SSE41;
1897 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1898 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1899 VAddMicrokernelTester()
1900 .batch_size(batch_size)
1901 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001902 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001903 }
1904 }
1905 }
1906
1907 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1908 TEST_REQUIRES_X86_SSE41;
1909 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1910 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1911 VAddMicrokernelTester()
1912 .batch_size(batch_size)
1913 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001914 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07001915 }
1916 }
1917 }
1918
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001919 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1920 TEST_REQUIRES_X86_SSE41;
1921 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1922 VAddMicrokernelTester()
1923 .batch_size(batch_size)
1924 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001925 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001926 }
1927 }
1928
1929 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1930 TEST_REQUIRES_X86_SSE41;
1931 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932 VAddMicrokernelTester()
1933 .batch_size(batch_size)
1934 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001935 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001936 }
1937 }
1938#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1939
1940
1941#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1942 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_eq_24) {
1943 TEST_REQUIRES_X86_SSE41;
1944 VAddMicrokernelTester()
1945 .batch_size(24)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001946 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001947 }
1948
1949 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_div_24) {
1950 TEST_REQUIRES_X86_SSE41;
1951 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1952 VAddMicrokernelTester()
1953 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001954 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001955 }
1956 }
1957
1958 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_lt_24) {
1959 TEST_REQUIRES_X86_SSE41;
1960 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1961 VAddMicrokernelTester()
1962 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001963 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001964 }
1965 }
1966
1967 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_gt_24) {
1968 TEST_REQUIRES_X86_SSE41;
1969 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1970 VAddMicrokernelTester()
1971 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001972 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001973 }
1974 }
1975
1976 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a) {
1977 TEST_REQUIRES_X86_SSE41;
1978 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1979 VAddMicrokernelTester()
1980 .batch_size(batch_size)
1981 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001982 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001983 }
1984 }
1985
1986 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_b) {
1987 TEST_REQUIRES_X86_SSE41;
1988 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1989 VAddMicrokernelTester()
1990 .batch_size(batch_size)
1991 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07001992 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001993 }
1994 }
1995
1996 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a_and_b) {
1997 TEST_REQUIRES_X86_SSE41;
1998 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1999 VAddMicrokernelTester()
2000 .batch_size(batch_size)
2001 .inplace_a(true)
2002 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002003 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002004 }
2005 }
2006
Marat Dukhan95caee52020-09-02 03:41:32 -07002007 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_zero_point) {
2008 TEST_REQUIRES_X86_SSE41;
2009 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2010 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2011 VAddMicrokernelTester()
2012 .batch_size(batch_size)
2013 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002014 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002015 }
2016 }
2017 }
2018
2019 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_zero_point) {
2020 TEST_REQUIRES_X86_SSE41;
2021 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2022 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2023 VAddMicrokernelTester()
2024 .batch_size(batch_size)
2025 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002026 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002027 }
2028 }
2029 }
2030
2031 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_zero_point) {
2032 TEST_REQUIRES_X86_SSE41;
2033 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2034 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2035 VAddMicrokernelTester()
2036 .batch_size(batch_size)
2037 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002038 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002039 }
2040 }
2041 }
2042
2043 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_scale) {
2044 TEST_REQUIRES_X86_SSE41;
2045 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2046 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2047 VAddMicrokernelTester()
2048 .batch_size(batch_size)
2049 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002050 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002051 }
2052 }
2053 }
2054
2055 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_scale) {
2056 TEST_REQUIRES_X86_SSE41;
2057 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2058 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2059 VAddMicrokernelTester()
2060 .batch_size(batch_size)
2061 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002062 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002063 }
2064 }
2065 }
2066
2067 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_scale) {
2068 TEST_REQUIRES_X86_SSE41;
2069 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2070 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2071 VAddMicrokernelTester()
2072 .batch_size(batch_size)
2073 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002074 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002075 }
2076 }
2077 }
2078
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002079 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmin) {
2080 TEST_REQUIRES_X86_SSE41;
2081 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2082 VAddMicrokernelTester()
2083 .batch_size(batch_size)
2084 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002085 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002086 }
2087 }
2088
2089 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmax) {
2090 TEST_REQUIRES_X86_SSE41;
2091 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2092 VAddMicrokernelTester()
2093 .batch_size(batch_size)
2094 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002095 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002096 }
2097 }
2098#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2099
2100
2101#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2102 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_eq_32) {
2103 TEST_REQUIRES_X86_SSE41;
2104 VAddMicrokernelTester()
2105 .batch_size(32)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002106 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002107 }
2108
2109 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_div_32) {
2110 TEST_REQUIRES_X86_SSE41;
2111 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2112 VAddMicrokernelTester()
2113 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002114 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002115 }
2116 }
2117
2118 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_lt_32) {
2119 TEST_REQUIRES_X86_SSE41;
2120 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2121 VAddMicrokernelTester()
2122 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002123 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002124 }
2125 }
2126
2127 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_gt_32) {
2128 TEST_REQUIRES_X86_SSE41;
2129 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2130 VAddMicrokernelTester()
2131 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002132 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002133 }
2134 }
2135
2136 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a) {
2137 TEST_REQUIRES_X86_SSE41;
2138 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2139 VAddMicrokernelTester()
2140 .batch_size(batch_size)
2141 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002142 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002143 }
2144 }
2145
2146 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_b) {
2147 TEST_REQUIRES_X86_SSE41;
2148 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2149 VAddMicrokernelTester()
2150 .batch_size(batch_size)
2151 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002152 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002153 }
2154 }
2155
2156 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a_and_b) {
2157 TEST_REQUIRES_X86_SSE41;
2158 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2159 VAddMicrokernelTester()
2160 .batch_size(batch_size)
2161 .inplace_a(true)
2162 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002163 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002164 }
2165 }
2166
Marat Dukhan95caee52020-09-02 03:41:32 -07002167 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_zero_point) {
2168 TEST_REQUIRES_X86_SSE41;
2169 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2170 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2171 VAddMicrokernelTester()
2172 .batch_size(batch_size)
2173 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002174 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002175 }
2176 }
2177 }
2178
2179 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_zero_point) {
2180 TEST_REQUIRES_X86_SSE41;
2181 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2182 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2183 VAddMicrokernelTester()
2184 .batch_size(batch_size)
2185 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002186 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002187 }
2188 }
2189 }
2190
2191 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_zero_point) {
2192 TEST_REQUIRES_X86_SSE41;
2193 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2194 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2195 VAddMicrokernelTester()
2196 .batch_size(batch_size)
2197 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002198 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002199 }
2200 }
2201 }
2202
2203 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_scale) {
2204 TEST_REQUIRES_X86_SSE41;
2205 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2206 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2207 VAddMicrokernelTester()
2208 .batch_size(batch_size)
2209 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002210 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002211 }
2212 }
2213 }
2214
2215 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_scale) {
2216 TEST_REQUIRES_X86_SSE41;
2217 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2218 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2219 VAddMicrokernelTester()
2220 .batch_size(batch_size)
2221 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002222 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002223 }
2224 }
2225 }
2226
2227 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_scale) {
2228 TEST_REQUIRES_X86_SSE41;
2229 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2230 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2231 VAddMicrokernelTester()
2232 .batch_size(batch_size)
2233 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002234 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhan95caee52020-09-02 03:41:32 -07002235 }
2236 }
2237 }
2238
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002239 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmin) {
2240 TEST_REQUIRES_X86_SSE41;
2241 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2242 VAddMicrokernelTester()
2243 .batch_size(batch_size)
2244 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002245 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002246 }
2247 }
2248
2249 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmax) {
2250 TEST_REQUIRES_X86_SSE41;
2251 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2252 VAddMicrokernelTester()
2253 .batch_size(batch_size)
2254 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002255 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhand9f3ad42020-08-10 12:30:58 -07002256 }
2257 }
2258#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan5df27f82020-09-02 23:59:21 -07002259
2260
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002261#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhane9c4b962021-04-02 16:56:55 -07002262 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_eq_8) {
2263 TEST_REQUIRES_X86_AVX;
2264 VAddMicrokernelTester()
2265 .batch_size(8)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002266 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002267 }
2268
2269 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_div_8) {
2270 TEST_REQUIRES_X86_AVX;
2271 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2272 VAddMicrokernelTester()
2273 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002274 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002275 }
2276 }
2277
2278 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_lt_8) {
2279 TEST_REQUIRES_X86_AVX;
2280 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2281 VAddMicrokernelTester()
2282 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002283 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002284 }
2285 }
2286
2287 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_gt_8) {
2288 TEST_REQUIRES_X86_AVX;
2289 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2290 VAddMicrokernelTester()
2291 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002292 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002293 }
2294 }
2295
2296 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a) {
2297 TEST_REQUIRES_X86_AVX;
2298 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2299 VAddMicrokernelTester()
2300 .batch_size(batch_size)
2301 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002302 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002303 }
2304 }
2305
2306 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_b) {
2307 TEST_REQUIRES_X86_AVX;
2308 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2309 VAddMicrokernelTester()
2310 .batch_size(batch_size)
2311 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002312 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002313 }
2314 }
2315
2316 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a_and_b) {
2317 TEST_REQUIRES_X86_AVX;
2318 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2319 VAddMicrokernelTester()
2320 .batch_size(batch_size)
2321 .inplace_a(true)
2322 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002323 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002324 }
2325 }
2326
2327 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_zero_point) {
2328 TEST_REQUIRES_X86_AVX;
2329 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2330 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2331 VAddMicrokernelTester()
2332 .batch_size(batch_size)
2333 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002334 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002335 }
2336 }
2337 }
2338
2339 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_zero_point) {
2340 TEST_REQUIRES_X86_AVX;
2341 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2342 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2343 VAddMicrokernelTester()
2344 .batch_size(batch_size)
2345 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002346 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002347 }
2348 }
2349 }
2350
2351 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_zero_point) {
2352 TEST_REQUIRES_X86_AVX;
2353 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2354 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2355 VAddMicrokernelTester()
2356 .batch_size(batch_size)
2357 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002358 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002359 }
2360 }
2361 }
2362
2363 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_scale) {
2364 TEST_REQUIRES_X86_AVX;
2365 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2366 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2367 VAddMicrokernelTester()
2368 .batch_size(batch_size)
2369 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002370 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002371 }
2372 }
2373 }
2374
2375 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_scale) {
2376 TEST_REQUIRES_X86_AVX;
2377 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2378 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2379 VAddMicrokernelTester()
2380 .batch_size(batch_size)
2381 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002382 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002383 }
2384 }
2385 }
2386
2387 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_scale) {
2388 TEST_REQUIRES_X86_AVX;
2389 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2390 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2391 VAddMicrokernelTester()
2392 .batch_size(batch_size)
2393 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002394 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002395 }
2396 }
2397 }
2398
2399 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmin) {
2400 TEST_REQUIRES_X86_AVX;
2401 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2402 VAddMicrokernelTester()
2403 .batch_size(batch_size)
2404 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002405 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002406 }
2407 }
2408
2409 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmax) {
2410 TEST_REQUIRES_X86_AVX;
2411 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2412 VAddMicrokernelTester()
2413 .batch_size(batch_size)
2414 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002415 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002416 }
2417 }
2418#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2419
2420
2421#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2422 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_eq_16) {
2423 TEST_REQUIRES_X86_AVX;
2424 VAddMicrokernelTester()
2425 .batch_size(16)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002426 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002427 }
2428
2429 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_div_16) {
2430 TEST_REQUIRES_X86_AVX;
2431 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2432 VAddMicrokernelTester()
2433 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002434 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002435 }
2436 }
2437
2438 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_lt_16) {
2439 TEST_REQUIRES_X86_AVX;
2440 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2441 VAddMicrokernelTester()
2442 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002443 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002444 }
2445 }
2446
2447 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_gt_16) {
2448 TEST_REQUIRES_X86_AVX;
2449 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2450 VAddMicrokernelTester()
2451 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002452 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002453 }
2454 }
2455
2456 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a) {
2457 TEST_REQUIRES_X86_AVX;
2458 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2459 VAddMicrokernelTester()
2460 .batch_size(batch_size)
2461 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002462 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002463 }
2464 }
2465
2466 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_b) {
2467 TEST_REQUIRES_X86_AVX;
2468 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2469 VAddMicrokernelTester()
2470 .batch_size(batch_size)
2471 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002472 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002473 }
2474 }
2475
2476 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a_and_b) {
2477 TEST_REQUIRES_X86_AVX;
2478 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2479 VAddMicrokernelTester()
2480 .batch_size(batch_size)
2481 .inplace_a(true)
2482 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002483 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002484 }
2485 }
2486
2487 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_zero_point) {
2488 TEST_REQUIRES_X86_AVX;
2489 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2490 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2491 VAddMicrokernelTester()
2492 .batch_size(batch_size)
2493 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002494 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002495 }
2496 }
2497 }
2498
2499 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_zero_point) {
2500 TEST_REQUIRES_X86_AVX;
2501 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2502 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2503 VAddMicrokernelTester()
2504 .batch_size(batch_size)
2505 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002506 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002507 }
2508 }
2509 }
2510
2511 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_zero_point) {
2512 TEST_REQUIRES_X86_AVX;
2513 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2514 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2515 VAddMicrokernelTester()
2516 .batch_size(batch_size)
2517 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002518 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002519 }
2520 }
2521 }
2522
2523 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_scale) {
2524 TEST_REQUIRES_X86_AVX;
2525 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2526 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2527 VAddMicrokernelTester()
2528 .batch_size(batch_size)
2529 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002530 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002531 }
2532 }
2533 }
2534
2535 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_scale) {
2536 TEST_REQUIRES_X86_AVX;
2537 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2538 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2539 VAddMicrokernelTester()
2540 .batch_size(batch_size)
2541 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002542 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002543 }
2544 }
2545 }
2546
2547 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_scale) {
2548 TEST_REQUIRES_X86_AVX;
2549 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2550 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2551 VAddMicrokernelTester()
2552 .batch_size(batch_size)
2553 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002554 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002555 }
2556 }
2557 }
2558
2559 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmin) {
2560 TEST_REQUIRES_X86_AVX;
2561 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2562 VAddMicrokernelTester()
2563 .batch_size(batch_size)
2564 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002565 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002566 }
2567 }
2568
2569 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmax) {
2570 TEST_REQUIRES_X86_AVX;
2571 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2572 VAddMicrokernelTester()
2573 .batch_size(batch_size)
2574 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002575 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002576 }
2577 }
2578#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2579
2580
2581#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2582 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_eq_24) {
2583 TEST_REQUIRES_X86_AVX;
2584 VAddMicrokernelTester()
2585 .batch_size(24)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002586 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002587 }
2588
2589 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_div_24) {
2590 TEST_REQUIRES_X86_AVX;
2591 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2592 VAddMicrokernelTester()
2593 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002594 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002595 }
2596 }
2597
2598 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_lt_24) {
2599 TEST_REQUIRES_X86_AVX;
2600 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2601 VAddMicrokernelTester()
2602 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002603 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002604 }
2605 }
2606
2607 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_gt_24) {
2608 TEST_REQUIRES_X86_AVX;
2609 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2610 VAddMicrokernelTester()
2611 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002612 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002613 }
2614 }
2615
2616 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, inplace_a) {
2617 TEST_REQUIRES_X86_AVX;
2618 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2619 VAddMicrokernelTester()
2620 .batch_size(batch_size)
2621 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002622 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002623 }
2624 }
2625
2626 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, inplace_b) {
2627 TEST_REQUIRES_X86_AVX;
2628 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2629 VAddMicrokernelTester()
2630 .batch_size(batch_size)
2631 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002632 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002633 }
2634 }
2635
2636 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, inplace_a_and_b) {
2637 TEST_REQUIRES_X86_AVX;
2638 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2639 VAddMicrokernelTester()
2640 .batch_size(batch_size)
2641 .inplace_a(true)
2642 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002643 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002644 }
2645 }
2646
2647 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, a_zero_point) {
2648 TEST_REQUIRES_X86_AVX;
2649 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2650 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2651 VAddMicrokernelTester()
2652 .batch_size(batch_size)
2653 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002654 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002655 }
2656 }
2657 }
2658
2659 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, b_zero_point) {
2660 TEST_REQUIRES_X86_AVX;
2661 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2662 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2663 VAddMicrokernelTester()
2664 .batch_size(batch_size)
2665 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002666 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002667 }
2668 }
2669 }
2670
2671 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, y_zero_point) {
2672 TEST_REQUIRES_X86_AVX;
2673 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2674 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2675 VAddMicrokernelTester()
2676 .batch_size(batch_size)
2677 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002678 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002679 }
2680 }
2681 }
2682
2683 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, a_scale) {
2684 TEST_REQUIRES_X86_AVX;
2685 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2686 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2687 VAddMicrokernelTester()
2688 .batch_size(batch_size)
2689 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002690 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002691 }
2692 }
2693 }
2694
2695 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, b_scale) {
2696 TEST_REQUIRES_X86_AVX;
2697 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2698 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2699 VAddMicrokernelTester()
2700 .batch_size(batch_size)
2701 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002702 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002703 }
2704 }
2705 }
2706
2707 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, y_scale) {
2708 TEST_REQUIRES_X86_AVX;
2709 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2710 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2711 VAddMicrokernelTester()
2712 .batch_size(batch_size)
2713 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002714 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002715 }
2716 }
2717 }
2718
2719 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, qmin) {
2720 TEST_REQUIRES_X86_AVX;
2721 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2722 VAddMicrokernelTester()
2723 .batch_size(batch_size)
2724 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002725 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002726 }
2727 }
2728
2729 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, qmax) {
2730 TEST_REQUIRES_X86_AVX;
2731 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2732 VAddMicrokernelTester()
2733 .batch_size(batch_size)
2734 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002735 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002736 }
2737 }
2738#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2739
2740
2741#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2742 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_eq_32) {
2743 TEST_REQUIRES_X86_AVX;
2744 VAddMicrokernelTester()
2745 .batch_size(32)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002746 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002747 }
2748
2749 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_div_32) {
2750 TEST_REQUIRES_X86_AVX;
2751 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2752 VAddMicrokernelTester()
2753 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002754 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002755 }
2756 }
2757
2758 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_lt_32) {
2759 TEST_REQUIRES_X86_AVX;
2760 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2761 VAddMicrokernelTester()
2762 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002763 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002764 }
2765 }
2766
2767 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_gt_32) {
2768 TEST_REQUIRES_X86_AVX;
2769 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2770 VAddMicrokernelTester()
2771 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002772 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002773 }
2774 }
2775
2776 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, inplace_a) {
2777 TEST_REQUIRES_X86_AVX;
2778 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2779 VAddMicrokernelTester()
2780 .batch_size(batch_size)
2781 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002782 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002783 }
2784 }
2785
2786 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, inplace_b) {
2787 TEST_REQUIRES_X86_AVX;
2788 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2789 VAddMicrokernelTester()
2790 .batch_size(batch_size)
2791 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002792 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002793 }
2794 }
2795
2796 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, inplace_a_and_b) {
2797 TEST_REQUIRES_X86_AVX;
2798 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2799 VAddMicrokernelTester()
2800 .batch_size(batch_size)
2801 .inplace_a(true)
2802 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002803 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002804 }
2805 }
2806
2807 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, a_zero_point) {
2808 TEST_REQUIRES_X86_AVX;
2809 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2810 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2811 VAddMicrokernelTester()
2812 .batch_size(batch_size)
2813 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002814 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002815 }
2816 }
2817 }
2818
2819 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, b_zero_point) {
2820 TEST_REQUIRES_X86_AVX;
2821 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2822 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2823 VAddMicrokernelTester()
2824 .batch_size(batch_size)
2825 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002826 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002827 }
2828 }
2829 }
2830
2831 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, y_zero_point) {
2832 TEST_REQUIRES_X86_AVX;
2833 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2834 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2835 VAddMicrokernelTester()
2836 .batch_size(batch_size)
2837 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002838 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002839 }
2840 }
2841 }
2842
2843 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, a_scale) {
2844 TEST_REQUIRES_X86_AVX;
2845 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2846 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2847 VAddMicrokernelTester()
2848 .batch_size(batch_size)
2849 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002850 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002851 }
2852 }
2853 }
2854
2855 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, b_scale) {
2856 TEST_REQUIRES_X86_AVX;
2857 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2858 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2859 VAddMicrokernelTester()
2860 .batch_size(batch_size)
2861 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002862 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002863 }
2864 }
2865 }
2866
2867 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, y_scale) {
2868 TEST_REQUIRES_X86_AVX;
2869 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2870 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2871 VAddMicrokernelTester()
2872 .batch_size(batch_size)
2873 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002874 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002875 }
2876 }
2877 }
2878
2879 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, qmin) {
2880 TEST_REQUIRES_X86_AVX;
2881 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2882 VAddMicrokernelTester()
2883 .batch_size(batch_size)
2884 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002885 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002886 }
2887 }
2888
2889 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, qmax) {
2890 TEST_REQUIRES_X86_AVX;
2891 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2892 VAddMicrokernelTester()
2893 .batch_size(batch_size)
2894 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002895 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07002896 }
2897 }
2898#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2899
2900
2901#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002902 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
2903 TEST_REQUIRES_X86_SSE41;
2904 VAddMicrokernelTester()
2905 .batch_size(8)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002906 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002907 }
2908
2909 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
2910 TEST_REQUIRES_X86_SSE41;
2911 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2912 VAddMicrokernelTester()
2913 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002914 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002915 }
2916 }
2917
2918 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
2919 TEST_REQUIRES_X86_SSE41;
2920 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2921 VAddMicrokernelTester()
2922 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002923 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002924 }
2925 }
2926
2927 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
2928 TEST_REQUIRES_X86_SSE41;
2929 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2930 VAddMicrokernelTester()
2931 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002932 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002933 }
2934 }
2935
2936 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a) {
2937 TEST_REQUIRES_X86_SSE41;
2938 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2939 VAddMicrokernelTester()
2940 .batch_size(batch_size)
2941 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002942 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002943 }
2944 }
2945
2946 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_b) {
2947 TEST_REQUIRES_X86_SSE41;
2948 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2949 VAddMicrokernelTester()
2950 .batch_size(batch_size)
2951 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002952 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002953 }
2954 }
2955
2956 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a_and_b) {
2957 TEST_REQUIRES_X86_SSE41;
2958 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2959 VAddMicrokernelTester()
2960 .batch_size(batch_size)
2961 .inplace_a(true)
2962 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002963 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002964 }
2965 }
2966
2967 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
2968 TEST_REQUIRES_X86_SSE41;
2969 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2970 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2971 VAddMicrokernelTester()
2972 .batch_size(batch_size)
2973 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002974 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002975 }
2976 }
2977 }
2978
2979 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
2980 TEST_REQUIRES_X86_SSE41;
2981 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2982 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2983 VAddMicrokernelTester()
2984 .batch_size(batch_size)
2985 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002986 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002987 }
2988 }
2989 }
2990
2991 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
2992 TEST_REQUIRES_X86_SSE41;
2993 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2994 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2995 VAddMicrokernelTester()
2996 .batch_size(batch_size)
2997 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07002998 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07002999 }
3000 }
3001 }
3002
3003 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
3004 TEST_REQUIRES_X86_SSE41;
3005 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3006 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3007 VAddMicrokernelTester()
3008 .batch_size(batch_size)
3009 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003010 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003011 }
3012 }
3013 }
3014
3015 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
3016 TEST_REQUIRES_X86_SSE41;
3017 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3018 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3019 VAddMicrokernelTester()
3020 .batch_size(batch_size)
3021 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003022 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003023 }
3024 }
3025 }
3026
3027 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
3028 TEST_REQUIRES_X86_SSE41;
3029 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3030 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3031 VAddMicrokernelTester()
3032 .batch_size(batch_size)
3033 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003034 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003035 }
3036 }
3037 }
3038
3039 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
3040 TEST_REQUIRES_X86_SSE41;
3041 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3042 VAddMicrokernelTester()
3043 .batch_size(batch_size)
3044 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003045 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003046 }
3047 }
3048
3049 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
3050 TEST_REQUIRES_X86_SSE41;
3051 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3052 VAddMicrokernelTester()
3053 .batch_size(batch_size)
3054 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003055 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003056 }
3057 }
3058#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3059
3060
3061#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3062 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
3063 TEST_REQUIRES_X86_SSE41;
3064 VAddMicrokernelTester()
3065 .batch_size(16)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003066 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003067 }
3068
3069 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
3070 TEST_REQUIRES_X86_SSE41;
3071 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3072 VAddMicrokernelTester()
3073 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003074 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003075 }
3076 }
3077
3078 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
3079 TEST_REQUIRES_X86_SSE41;
3080 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3081 VAddMicrokernelTester()
3082 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003083 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003084 }
3085 }
3086
3087 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
3088 TEST_REQUIRES_X86_SSE41;
3089 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3090 VAddMicrokernelTester()
3091 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003092 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003093 }
3094 }
3095
3096 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a) {
3097 TEST_REQUIRES_X86_SSE41;
3098 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3099 VAddMicrokernelTester()
3100 .batch_size(batch_size)
3101 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003102 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003103 }
3104 }
3105
3106 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_b) {
3107 TEST_REQUIRES_X86_SSE41;
3108 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3109 VAddMicrokernelTester()
3110 .batch_size(batch_size)
3111 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003112 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003113 }
3114 }
3115
3116 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a_and_b) {
3117 TEST_REQUIRES_X86_SSE41;
3118 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3119 VAddMicrokernelTester()
3120 .batch_size(batch_size)
3121 .inplace_a(true)
3122 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003123 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003124 }
3125 }
3126
3127 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
3128 TEST_REQUIRES_X86_SSE41;
3129 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3130 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3131 VAddMicrokernelTester()
3132 .batch_size(batch_size)
3133 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003134 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003135 }
3136 }
3137 }
3138
3139 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
3140 TEST_REQUIRES_X86_SSE41;
3141 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3142 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3143 VAddMicrokernelTester()
3144 .batch_size(batch_size)
3145 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003146 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003147 }
3148 }
3149 }
3150
3151 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
3152 TEST_REQUIRES_X86_SSE41;
3153 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3154 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3155 VAddMicrokernelTester()
3156 .batch_size(batch_size)
3157 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003158 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003159 }
3160 }
3161 }
3162
3163 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
3164 TEST_REQUIRES_X86_SSE41;
3165 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3166 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3167 VAddMicrokernelTester()
3168 .batch_size(batch_size)
3169 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003170 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003171 }
3172 }
3173 }
3174
3175 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
3176 TEST_REQUIRES_X86_SSE41;
3177 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3178 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3179 VAddMicrokernelTester()
3180 .batch_size(batch_size)
3181 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003182 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003183 }
3184 }
3185 }
3186
3187 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
3188 TEST_REQUIRES_X86_SSE41;
3189 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3190 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3191 VAddMicrokernelTester()
3192 .batch_size(batch_size)
3193 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003194 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003195 }
3196 }
3197 }
3198
3199 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
3200 TEST_REQUIRES_X86_SSE41;
3201 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3202 VAddMicrokernelTester()
3203 .batch_size(batch_size)
3204 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003205 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003206 }
3207 }
3208
3209 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
3210 TEST_REQUIRES_X86_SSE41;
3211 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3212 VAddMicrokernelTester()
3213 .batch_size(batch_size)
3214 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003215 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003216 }
3217 }
3218#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3219
3220
3221#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3222 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_eq_24) {
3223 TEST_REQUIRES_X86_SSE41;
3224 VAddMicrokernelTester()
3225 .batch_size(24)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003226 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003227 }
3228
3229 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_div_24) {
3230 TEST_REQUIRES_X86_SSE41;
3231 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3232 VAddMicrokernelTester()
3233 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003234 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003235 }
3236 }
3237
3238 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_lt_24) {
3239 TEST_REQUIRES_X86_SSE41;
3240 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3241 VAddMicrokernelTester()
3242 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003243 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003244 }
3245 }
3246
3247 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_gt_24) {
3248 TEST_REQUIRES_X86_SSE41;
3249 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3250 VAddMicrokernelTester()
3251 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003252 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003253 }
3254 }
3255
3256 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_a) {
3257 TEST_REQUIRES_X86_SSE41;
3258 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3259 VAddMicrokernelTester()
3260 .batch_size(batch_size)
3261 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003262 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003263 }
3264 }
3265
3266 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_b) {
3267 TEST_REQUIRES_X86_SSE41;
3268 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3269 VAddMicrokernelTester()
3270 .batch_size(batch_size)
3271 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003272 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003273 }
3274 }
3275
3276 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_a_and_b) {
3277 TEST_REQUIRES_X86_SSE41;
3278 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3279 VAddMicrokernelTester()
3280 .batch_size(batch_size)
3281 .inplace_a(true)
3282 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003283 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003284 }
3285 }
3286
3287 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, a_zero_point) {
3288 TEST_REQUIRES_X86_SSE41;
3289 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3290 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3291 VAddMicrokernelTester()
3292 .batch_size(batch_size)
3293 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003294 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003295 }
3296 }
3297 }
3298
3299 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, b_zero_point) {
3300 TEST_REQUIRES_X86_SSE41;
3301 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3302 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3303 VAddMicrokernelTester()
3304 .batch_size(batch_size)
3305 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003306 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003307 }
3308 }
3309 }
3310
3311 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, y_zero_point) {
3312 TEST_REQUIRES_X86_SSE41;
3313 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3314 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3315 VAddMicrokernelTester()
3316 .batch_size(batch_size)
3317 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003318 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003319 }
3320 }
3321 }
3322
3323 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, a_scale) {
3324 TEST_REQUIRES_X86_SSE41;
3325 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3326 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3327 VAddMicrokernelTester()
3328 .batch_size(batch_size)
3329 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003330 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003331 }
3332 }
3333 }
3334
3335 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, b_scale) {
3336 TEST_REQUIRES_X86_SSE41;
3337 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3338 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3339 VAddMicrokernelTester()
3340 .batch_size(batch_size)
3341 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003342 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003343 }
3344 }
3345 }
3346
3347 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, y_scale) {
3348 TEST_REQUIRES_X86_SSE41;
3349 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3350 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3351 VAddMicrokernelTester()
3352 .batch_size(batch_size)
3353 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003354 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003355 }
3356 }
3357 }
3358
3359 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, qmin) {
3360 TEST_REQUIRES_X86_SSE41;
3361 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3362 VAddMicrokernelTester()
3363 .batch_size(batch_size)
3364 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003365 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003366 }
3367 }
3368
3369 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, qmax) {
3370 TEST_REQUIRES_X86_SSE41;
3371 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3372 VAddMicrokernelTester()
3373 .batch_size(batch_size)
3374 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003375 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003376 }
3377 }
3378#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3379
3380
3381#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3382 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_eq_32) {
3383 TEST_REQUIRES_X86_SSE41;
3384 VAddMicrokernelTester()
3385 .batch_size(32)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003386 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003387 }
3388
3389 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_div_32) {
3390 TEST_REQUIRES_X86_SSE41;
3391 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3392 VAddMicrokernelTester()
3393 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003394 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003395 }
3396 }
3397
3398 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_lt_32) {
3399 TEST_REQUIRES_X86_SSE41;
3400 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3401 VAddMicrokernelTester()
3402 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003403 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003404 }
3405 }
3406
3407 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_gt_32) {
3408 TEST_REQUIRES_X86_SSE41;
3409 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3410 VAddMicrokernelTester()
3411 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003412 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003413 }
3414 }
3415
3416 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_a) {
3417 TEST_REQUIRES_X86_SSE41;
3418 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3419 VAddMicrokernelTester()
3420 .batch_size(batch_size)
3421 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003422 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003423 }
3424 }
3425
3426 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_b) {
3427 TEST_REQUIRES_X86_SSE41;
3428 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3429 VAddMicrokernelTester()
3430 .batch_size(batch_size)
3431 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003432 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003433 }
3434 }
3435
3436 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_a_and_b) {
3437 TEST_REQUIRES_X86_SSE41;
3438 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3439 VAddMicrokernelTester()
3440 .batch_size(batch_size)
3441 .inplace_a(true)
3442 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003443 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003444 }
3445 }
3446
3447 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, a_zero_point) {
3448 TEST_REQUIRES_X86_SSE41;
3449 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3450 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3451 VAddMicrokernelTester()
3452 .batch_size(batch_size)
3453 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003454 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003455 }
3456 }
3457 }
3458
3459 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, b_zero_point) {
3460 TEST_REQUIRES_X86_SSE41;
3461 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3462 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3463 VAddMicrokernelTester()
3464 .batch_size(batch_size)
3465 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003466 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003467 }
3468 }
3469 }
3470
3471 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, y_zero_point) {
3472 TEST_REQUIRES_X86_SSE41;
3473 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3474 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3475 VAddMicrokernelTester()
3476 .batch_size(batch_size)
3477 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003478 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003479 }
3480 }
3481 }
3482
3483 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, a_scale) {
3484 TEST_REQUIRES_X86_SSE41;
3485 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3486 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3487 VAddMicrokernelTester()
3488 .batch_size(batch_size)
3489 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003490 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003491 }
3492 }
3493 }
3494
3495 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, b_scale) {
3496 TEST_REQUIRES_X86_SSE41;
3497 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3498 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3499 VAddMicrokernelTester()
3500 .batch_size(batch_size)
3501 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003502 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003503 }
3504 }
3505 }
3506
3507 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, y_scale) {
3508 TEST_REQUIRES_X86_SSE41;
3509 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3510 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3511 VAddMicrokernelTester()
3512 .batch_size(batch_size)
3513 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003514 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003515 }
3516 }
3517 }
3518
3519 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, qmin) {
3520 TEST_REQUIRES_X86_SSE41;
3521 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3522 VAddMicrokernelTester()
3523 .batch_size(batch_size)
3524 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003525 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003526 }
3527 }
3528
3529 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, qmax) {
3530 TEST_REQUIRES_X86_SSE41;
3531 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3532 VAddMicrokernelTester()
3533 .batch_size(batch_size)
3534 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003535 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07003536 }
3537 }
3538#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3539
3540
3541#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhane9c4b962021-04-02 16:56:55 -07003542 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_eq_8) {
3543 TEST_REQUIRES_X86_AVX;
3544 VAddMicrokernelTester()
3545 .batch_size(8)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003546 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003547 }
3548
3549 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_div_8) {
3550 TEST_REQUIRES_X86_AVX;
3551 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3552 VAddMicrokernelTester()
3553 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003554 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003555 }
3556 }
3557
3558 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_lt_8) {
3559 TEST_REQUIRES_X86_AVX;
3560 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3561 VAddMicrokernelTester()
3562 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003563 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003564 }
3565 }
3566
3567 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_gt_8) {
3568 TEST_REQUIRES_X86_AVX;
3569 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3570 VAddMicrokernelTester()
3571 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003572 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003573 }
3574 }
3575
3576 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a) {
3577 TEST_REQUIRES_X86_AVX;
3578 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3579 VAddMicrokernelTester()
3580 .batch_size(batch_size)
3581 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003582 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003583 }
3584 }
3585
3586 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_b) {
3587 TEST_REQUIRES_X86_AVX;
3588 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3589 VAddMicrokernelTester()
3590 .batch_size(batch_size)
3591 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003592 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003593 }
3594 }
3595
3596 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a_and_b) {
3597 TEST_REQUIRES_X86_AVX;
3598 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3599 VAddMicrokernelTester()
3600 .batch_size(batch_size)
3601 .inplace_a(true)
3602 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003603 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003604 }
3605 }
3606
3607 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_zero_point) {
3608 TEST_REQUIRES_X86_AVX;
3609 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3610 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3611 VAddMicrokernelTester()
3612 .batch_size(batch_size)
3613 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003614 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003615 }
3616 }
3617 }
3618
3619 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_zero_point) {
3620 TEST_REQUIRES_X86_AVX;
3621 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3622 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3623 VAddMicrokernelTester()
3624 .batch_size(batch_size)
3625 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003626 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003627 }
3628 }
3629 }
3630
3631 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_zero_point) {
3632 TEST_REQUIRES_X86_AVX;
3633 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3634 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3635 VAddMicrokernelTester()
3636 .batch_size(batch_size)
3637 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003638 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003639 }
3640 }
3641 }
3642
3643 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_scale) {
3644 TEST_REQUIRES_X86_AVX;
3645 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3646 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3647 VAddMicrokernelTester()
3648 .batch_size(batch_size)
3649 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003650 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003651 }
3652 }
3653 }
3654
3655 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_scale) {
3656 TEST_REQUIRES_X86_AVX;
3657 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3658 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3659 VAddMicrokernelTester()
3660 .batch_size(batch_size)
3661 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003662 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003663 }
3664 }
3665 }
3666
3667 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_scale) {
3668 TEST_REQUIRES_X86_AVX;
3669 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3670 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3671 VAddMicrokernelTester()
3672 .batch_size(batch_size)
3673 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003674 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003675 }
3676 }
3677 }
3678
3679 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmin) {
3680 TEST_REQUIRES_X86_AVX;
3681 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3682 VAddMicrokernelTester()
3683 .batch_size(batch_size)
3684 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003685 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003686 }
3687 }
3688
3689 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmax) {
3690 TEST_REQUIRES_X86_AVX;
3691 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3692 VAddMicrokernelTester()
3693 .batch_size(batch_size)
3694 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003695 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003696 }
3697 }
3698#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3699
3700
3701#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3702 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_eq_16) {
3703 TEST_REQUIRES_X86_AVX;
3704 VAddMicrokernelTester()
3705 .batch_size(16)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003706 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003707 }
3708
3709 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_div_16) {
3710 TEST_REQUIRES_X86_AVX;
3711 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3712 VAddMicrokernelTester()
3713 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003714 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003715 }
3716 }
3717
3718 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_lt_16) {
3719 TEST_REQUIRES_X86_AVX;
3720 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3721 VAddMicrokernelTester()
3722 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003723 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003724 }
3725 }
3726
3727 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_gt_16) {
3728 TEST_REQUIRES_X86_AVX;
3729 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3730 VAddMicrokernelTester()
3731 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003732 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003733 }
3734 }
3735
3736 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a) {
3737 TEST_REQUIRES_X86_AVX;
3738 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3739 VAddMicrokernelTester()
3740 .batch_size(batch_size)
3741 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003742 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003743 }
3744 }
3745
3746 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_b) {
3747 TEST_REQUIRES_X86_AVX;
3748 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3749 VAddMicrokernelTester()
3750 .batch_size(batch_size)
3751 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003752 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003753 }
3754 }
3755
3756 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a_and_b) {
3757 TEST_REQUIRES_X86_AVX;
3758 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3759 VAddMicrokernelTester()
3760 .batch_size(batch_size)
3761 .inplace_a(true)
3762 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003763 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003764 }
3765 }
3766
3767 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_zero_point) {
3768 TEST_REQUIRES_X86_AVX;
3769 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3770 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3771 VAddMicrokernelTester()
3772 .batch_size(batch_size)
3773 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003774 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003775 }
3776 }
3777 }
3778
3779 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_zero_point) {
3780 TEST_REQUIRES_X86_AVX;
3781 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3782 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3783 VAddMicrokernelTester()
3784 .batch_size(batch_size)
3785 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003786 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003787 }
3788 }
3789 }
3790
3791 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_zero_point) {
3792 TEST_REQUIRES_X86_AVX;
3793 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3794 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3795 VAddMicrokernelTester()
3796 .batch_size(batch_size)
3797 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003798 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003799 }
3800 }
3801 }
3802
3803 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_scale) {
3804 TEST_REQUIRES_X86_AVX;
3805 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3806 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3807 VAddMicrokernelTester()
3808 .batch_size(batch_size)
3809 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003810 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003811 }
3812 }
3813 }
3814
3815 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_scale) {
3816 TEST_REQUIRES_X86_AVX;
3817 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3818 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3819 VAddMicrokernelTester()
3820 .batch_size(batch_size)
3821 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003822 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003823 }
3824 }
3825 }
3826
3827 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_scale) {
3828 TEST_REQUIRES_X86_AVX;
3829 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3830 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3831 VAddMicrokernelTester()
3832 .batch_size(batch_size)
3833 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003834 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003835 }
3836 }
3837 }
3838
3839 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmin) {
3840 TEST_REQUIRES_X86_AVX;
3841 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3842 VAddMicrokernelTester()
3843 .batch_size(batch_size)
3844 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003845 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003846 }
3847 }
3848
3849 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmax) {
3850 TEST_REQUIRES_X86_AVX;
3851 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3852 VAddMicrokernelTester()
3853 .batch_size(batch_size)
3854 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003855 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003856 }
3857 }
3858#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3859
3860
3861#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3862 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_eq_24) {
3863 TEST_REQUIRES_X86_AVX;
3864 VAddMicrokernelTester()
3865 .batch_size(24)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003866 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003867 }
3868
3869 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_div_24) {
3870 TEST_REQUIRES_X86_AVX;
3871 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3872 VAddMicrokernelTester()
3873 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003874 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003875 }
3876 }
3877
3878 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_lt_24) {
3879 TEST_REQUIRES_X86_AVX;
3880 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3881 VAddMicrokernelTester()
3882 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003883 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003884 }
3885 }
3886
3887 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_gt_24) {
3888 TEST_REQUIRES_X86_AVX;
3889 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3890 VAddMicrokernelTester()
3891 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003892 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003893 }
3894 }
3895
3896 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, inplace_a) {
3897 TEST_REQUIRES_X86_AVX;
3898 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3899 VAddMicrokernelTester()
3900 .batch_size(batch_size)
3901 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003902 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003903 }
3904 }
3905
3906 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, inplace_b) {
3907 TEST_REQUIRES_X86_AVX;
3908 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3909 VAddMicrokernelTester()
3910 .batch_size(batch_size)
3911 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003912 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003913 }
3914 }
3915
3916 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, inplace_a_and_b) {
3917 TEST_REQUIRES_X86_AVX;
3918 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3919 VAddMicrokernelTester()
3920 .batch_size(batch_size)
3921 .inplace_a(true)
3922 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003923 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003924 }
3925 }
3926
3927 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, a_zero_point) {
3928 TEST_REQUIRES_X86_AVX;
3929 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3930 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3931 VAddMicrokernelTester()
3932 .batch_size(batch_size)
3933 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003934 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003935 }
3936 }
3937 }
3938
3939 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, b_zero_point) {
3940 TEST_REQUIRES_X86_AVX;
3941 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3942 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3943 VAddMicrokernelTester()
3944 .batch_size(batch_size)
3945 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003946 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003947 }
3948 }
3949 }
3950
3951 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, y_zero_point) {
3952 TEST_REQUIRES_X86_AVX;
3953 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3954 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3955 VAddMicrokernelTester()
3956 .batch_size(batch_size)
3957 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003958 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003959 }
3960 }
3961 }
3962
3963 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, a_scale) {
3964 TEST_REQUIRES_X86_AVX;
3965 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3966 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3967 VAddMicrokernelTester()
3968 .batch_size(batch_size)
3969 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003970 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003971 }
3972 }
3973 }
3974
3975 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, b_scale) {
3976 TEST_REQUIRES_X86_AVX;
3977 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3978 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3979 VAddMicrokernelTester()
3980 .batch_size(batch_size)
3981 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003982 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003983 }
3984 }
3985 }
3986
3987 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, y_scale) {
3988 TEST_REQUIRES_X86_AVX;
3989 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3990 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3991 VAddMicrokernelTester()
3992 .batch_size(batch_size)
3993 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07003994 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07003995 }
3996 }
3997 }
3998
3999 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, qmin) {
4000 TEST_REQUIRES_X86_AVX;
4001 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4002 VAddMicrokernelTester()
4003 .batch_size(batch_size)
4004 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004005 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004006 }
4007 }
4008
4009 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, qmax) {
4010 TEST_REQUIRES_X86_AVX;
4011 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4012 VAddMicrokernelTester()
4013 .batch_size(batch_size)
4014 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004015 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004016 }
4017 }
4018#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4019
4020
4021#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4022 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_eq_32) {
4023 TEST_REQUIRES_X86_AVX;
4024 VAddMicrokernelTester()
4025 .batch_size(32)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004026 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004027 }
4028
4029 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_div_32) {
4030 TEST_REQUIRES_X86_AVX;
4031 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4032 VAddMicrokernelTester()
4033 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004034 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004035 }
4036 }
4037
4038 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_lt_32) {
4039 TEST_REQUIRES_X86_AVX;
4040 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4041 VAddMicrokernelTester()
4042 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004043 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004044 }
4045 }
4046
4047 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_gt_32) {
4048 TEST_REQUIRES_X86_AVX;
4049 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4050 VAddMicrokernelTester()
4051 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004052 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004053 }
4054 }
4055
4056 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, inplace_a) {
4057 TEST_REQUIRES_X86_AVX;
4058 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4059 VAddMicrokernelTester()
4060 .batch_size(batch_size)
4061 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004062 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004063 }
4064 }
4065
4066 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, inplace_b) {
4067 TEST_REQUIRES_X86_AVX;
4068 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4069 VAddMicrokernelTester()
4070 .batch_size(batch_size)
4071 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004072 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004073 }
4074 }
4075
4076 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, inplace_a_and_b) {
4077 TEST_REQUIRES_X86_AVX;
4078 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4079 VAddMicrokernelTester()
4080 .batch_size(batch_size)
4081 .inplace_a(true)
4082 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004083 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004084 }
4085 }
4086
4087 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, a_zero_point) {
4088 TEST_REQUIRES_X86_AVX;
4089 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4090 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4091 VAddMicrokernelTester()
4092 .batch_size(batch_size)
4093 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004094 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004095 }
4096 }
4097 }
4098
4099 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, b_zero_point) {
4100 TEST_REQUIRES_X86_AVX;
4101 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4102 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4103 VAddMicrokernelTester()
4104 .batch_size(batch_size)
4105 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004106 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004107 }
4108 }
4109 }
4110
4111 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, y_zero_point) {
4112 TEST_REQUIRES_X86_AVX;
4113 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4114 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4115 VAddMicrokernelTester()
4116 .batch_size(batch_size)
4117 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004118 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004119 }
4120 }
4121 }
4122
4123 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, a_scale) {
4124 TEST_REQUIRES_X86_AVX;
4125 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4126 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4127 VAddMicrokernelTester()
4128 .batch_size(batch_size)
4129 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004130 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004131 }
4132 }
4133 }
4134
4135 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, b_scale) {
4136 TEST_REQUIRES_X86_AVX;
4137 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4138 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4139 VAddMicrokernelTester()
4140 .batch_size(batch_size)
4141 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004142 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004143 }
4144 }
4145 }
4146
4147 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, y_scale) {
4148 TEST_REQUIRES_X86_AVX;
4149 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4150 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4151 VAddMicrokernelTester()
4152 .batch_size(batch_size)
4153 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004154 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004155 }
4156 }
4157 }
4158
4159 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, qmin) {
4160 TEST_REQUIRES_X86_AVX;
4161 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4162 VAddMicrokernelTester()
4163 .batch_size(batch_size)
4164 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004165 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004166 }
4167 }
4168
4169 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, qmax) {
4170 TEST_REQUIRES_X86_AVX;
4171 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4172 VAddMicrokernelTester()
4173 .batch_size(batch_size)
4174 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004175 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhane9c4b962021-04-02 16:56:55 -07004176 }
4177 }
4178#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4179
4180
4181#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004182 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
4183 TEST_REQUIRES_X86_XOP;
4184 VAddMicrokernelTester()
4185 .batch_size(8)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004186 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004187 }
4188
4189 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
4190 TEST_REQUIRES_X86_XOP;
4191 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4192 VAddMicrokernelTester()
4193 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004194 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004195 }
4196 }
4197
4198 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
4199 TEST_REQUIRES_X86_XOP;
4200 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4201 VAddMicrokernelTester()
4202 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004203 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004204 }
4205 }
4206
4207 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
4208 TEST_REQUIRES_X86_XOP;
4209 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4210 VAddMicrokernelTester()
4211 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004212 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004213 }
4214 }
4215
4216 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a) {
4217 TEST_REQUIRES_X86_XOP;
4218 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4219 VAddMicrokernelTester()
4220 .batch_size(batch_size)
4221 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004222 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004223 }
4224 }
4225
4226 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_b) {
4227 TEST_REQUIRES_X86_XOP;
4228 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4229 VAddMicrokernelTester()
4230 .batch_size(batch_size)
4231 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004232 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004233 }
4234 }
4235
4236 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a_and_b) {
4237 TEST_REQUIRES_X86_XOP;
4238 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4239 VAddMicrokernelTester()
4240 .batch_size(batch_size)
4241 .inplace_a(true)
4242 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004243 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004244 }
4245 }
4246
4247 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
4248 TEST_REQUIRES_X86_XOP;
4249 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4250 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4251 VAddMicrokernelTester()
4252 .batch_size(batch_size)
4253 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004254 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004255 }
4256 }
4257 }
4258
4259 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
4260 TEST_REQUIRES_X86_XOP;
4261 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4262 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4263 VAddMicrokernelTester()
4264 .batch_size(batch_size)
4265 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004266 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004267 }
4268 }
4269 }
4270
4271 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
4272 TEST_REQUIRES_X86_XOP;
4273 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4274 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4275 VAddMicrokernelTester()
4276 .batch_size(batch_size)
4277 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004278 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004279 }
4280 }
4281 }
4282
4283 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
4284 TEST_REQUIRES_X86_XOP;
4285 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4286 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4287 VAddMicrokernelTester()
4288 .batch_size(batch_size)
4289 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004290 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004291 }
4292 }
4293 }
4294
4295 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
4296 TEST_REQUIRES_X86_XOP;
4297 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4298 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4299 VAddMicrokernelTester()
4300 .batch_size(batch_size)
4301 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004302 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004303 }
4304 }
4305 }
4306
4307 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
4308 TEST_REQUIRES_X86_XOP;
4309 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4310 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4311 VAddMicrokernelTester()
4312 .batch_size(batch_size)
4313 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004314 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004315 }
4316 }
4317 }
4318
4319 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmin) {
4320 TEST_REQUIRES_X86_XOP;
4321 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4322 VAddMicrokernelTester()
4323 .batch_size(batch_size)
4324 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004325 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004326 }
4327 }
4328
4329 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmax) {
4330 TEST_REQUIRES_X86_XOP;
4331 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4332 VAddMicrokernelTester()
4333 .batch_size(batch_size)
4334 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004335 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004336 }
4337 }
4338#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4339
4340
4341#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4342 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
4343 TEST_REQUIRES_X86_XOP;
4344 VAddMicrokernelTester()
4345 .batch_size(16)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004346 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004347 }
4348
4349 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
4350 TEST_REQUIRES_X86_XOP;
4351 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4352 VAddMicrokernelTester()
4353 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004354 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004355 }
4356 }
4357
4358 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
4359 TEST_REQUIRES_X86_XOP;
4360 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4361 VAddMicrokernelTester()
4362 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004363 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004364 }
4365 }
4366
4367 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
4368 TEST_REQUIRES_X86_XOP;
4369 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4370 VAddMicrokernelTester()
4371 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004372 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004373 }
4374 }
4375
4376 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a) {
4377 TEST_REQUIRES_X86_XOP;
4378 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4379 VAddMicrokernelTester()
4380 .batch_size(batch_size)
4381 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004382 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004383 }
4384 }
4385
4386 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_b) {
4387 TEST_REQUIRES_X86_XOP;
4388 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4389 VAddMicrokernelTester()
4390 .batch_size(batch_size)
4391 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004392 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004393 }
4394 }
4395
4396 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a_and_b) {
4397 TEST_REQUIRES_X86_XOP;
4398 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4399 VAddMicrokernelTester()
4400 .batch_size(batch_size)
4401 .inplace_a(true)
4402 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004403 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004404 }
4405 }
4406
4407 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
4408 TEST_REQUIRES_X86_XOP;
4409 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4410 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4411 VAddMicrokernelTester()
4412 .batch_size(batch_size)
4413 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004414 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004415 }
4416 }
4417 }
4418
4419 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
4420 TEST_REQUIRES_X86_XOP;
4421 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4422 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4423 VAddMicrokernelTester()
4424 .batch_size(batch_size)
4425 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004426 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004427 }
4428 }
4429 }
4430
4431 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
4432 TEST_REQUIRES_X86_XOP;
4433 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4434 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4435 VAddMicrokernelTester()
4436 .batch_size(batch_size)
4437 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004438 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004439 }
4440 }
4441 }
4442
4443 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
4444 TEST_REQUIRES_X86_XOP;
4445 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4446 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4447 VAddMicrokernelTester()
4448 .batch_size(batch_size)
4449 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004450 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004451 }
4452 }
4453 }
4454
4455 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
4456 TEST_REQUIRES_X86_XOP;
4457 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4458 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4459 VAddMicrokernelTester()
4460 .batch_size(batch_size)
4461 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004462 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004463 }
4464 }
4465 }
4466
4467 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
4468 TEST_REQUIRES_X86_XOP;
4469 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4470 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4471 VAddMicrokernelTester()
4472 .batch_size(batch_size)
4473 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004474 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004475 }
4476 }
4477 }
4478
4479 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmin) {
4480 TEST_REQUIRES_X86_XOP;
4481 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4482 VAddMicrokernelTester()
4483 .batch_size(batch_size)
4484 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004485 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004486 }
4487 }
4488
4489 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmax) {
4490 TEST_REQUIRES_X86_XOP;
4491 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4492 VAddMicrokernelTester()
4493 .batch_size(batch_size)
4494 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004495 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004496 }
4497 }
4498#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4499
4500
4501#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4502 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_eq_24) {
4503 TEST_REQUIRES_X86_XOP;
4504 VAddMicrokernelTester()
4505 .batch_size(24)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004506 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004507 }
4508
4509 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_div_24) {
4510 TEST_REQUIRES_X86_XOP;
4511 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4512 VAddMicrokernelTester()
4513 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004514 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004515 }
4516 }
4517
4518 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_lt_24) {
4519 TEST_REQUIRES_X86_XOP;
4520 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4521 VAddMicrokernelTester()
4522 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004523 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004524 }
4525 }
4526
4527 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_gt_24) {
4528 TEST_REQUIRES_X86_XOP;
4529 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4530 VAddMicrokernelTester()
4531 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004532 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004533 }
4534 }
4535
4536 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_a) {
4537 TEST_REQUIRES_X86_XOP;
4538 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4539 VAddMicrokernelTester()
4540 .batch_size(batch_size)
4541 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004542 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004543 }
4544 }
4545
4546 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_b) {
4547 TEST_REQUIRES_X86_XOP;
4548 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4549 VAddMicrokernelTester()
4550 .batch_size(batch_size)
4551 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004552 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004553 }
4554 }
4555
4556 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_a_and_b) {
4557 TEST_REQUIRES_X86_XOP;
4558 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4559 VAddMicrokernelTester()
4560 .batch_size(batch_size)
4561 .inplace_a(true)
4562 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004563 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004564 }
4565 }
4566
4567 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, a_zero_point) {
4568 TEST_REQUIRES_X86_XOP;
4569 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4570 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4571 VAddMicrokernelTester()
4572 .batch_size(batch_size)
4573 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004574 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004575 }
4576 }
4577 }
4578
4579 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, b_zero_point) {
4580 TEST_REQUIRES_X86_XOP;
4581 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4582 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4583 VAddMicrokernelTester()
4584 .batch_size(batch_size)
4585 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004586 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004587 }
4588 }
4589 }
4590
4591 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, y_zero_point) {
4592 TEST_REQUIRES_X86_XOP;
4593 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4595 VAddMicrokernelTester()
4596 .batch_size(batch_size)
4597 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004598 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004599 }
4600 }
4601 }
4602
4603 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, a_scale) {
4604 TEST_REQUIRES_X86_XOP;
4605 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4606 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4607 VAddMicrokernelTester()
4608 .batch_size(batch_size)
4609 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004610 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004611 }
4612 }
4613 }
4614
4615 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, b_scale) {
4616 TEST_REQUIRES_X86_XOP;
4617 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4618 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4619 VAddMicrokernelTester()
4620 .batch_size(batch_size)
4621 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004622 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004623 }
4624 }
4625 }
4626
4627 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, y_scale) {
4628 TEST_REQUIRES_X86_XOP;
4629 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4630 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4631 VAddMicrokernelTester()
4632 .batch_size(batch_size)
4633 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004634 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004635 }
4636 }
4637 }
4638
4639 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, qmin) {
4640 TEST_REQUIRES_X86_XOP;
4641 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4642 VAddMicrokernelTester()
4643 .batch_size(batch_size)
4644 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004645 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004646 }
4647 }
4648
4649 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, qmax) {
4650 TEST_REQUIRES_X86_XOP;
4651 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4652 VAddMicrokernelTester()
4653 .batch_size(batch_size)
4654 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004655 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004656 }
4657 }
4658#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4659
4660
4661#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4662 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_eq_32) {
4663 TEST_REQUIRES_X86_XOP;
4664 VAddMicrokernelTester()
4665 .batch_size(32)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004666 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004667 }
4668
4669 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_div_32) {
4670 TEST_REQUIRES_X86_XOP;
4671 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4672 VAddMicrokernelTester()
4673 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004674 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004675 }
4676 }
4677
4678 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_lt_32) {
4679 TEST_REQUIRES_X86_XOP;
4680 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4681 VAddMicrokernelTester()
4682 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004683 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004684 }
4685 }
4686
4687 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_gt_32) {
4688 TEST_REQUIRES_X86_XOP;
4689 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4690 VAddMicrokernelTester()
4691 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004692 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004693 }
4694 }
4695
4696 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_a) {
4697 TEST_REQUIRES_X86_XOP;
4698 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4699 VAddMicrokernelTester()
4700 .batch_size(batch_size)
4701 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004702 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004703 }
4704 }
4705
4706 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_b) {
4707 TEST_REQUIRES_X86_XOP;
4708 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4709 VAddMicrokernelTester()
4710 .batch_size(batch_size)
4711 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004712 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004713 }
4714 }
4715
4716 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_a_and_b) {
4717 TEST_REQUIRES_X86_XOP;
4718 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4719 VAddMicrokernelTester()
4720 .batch_size(batch_size)
4721 .inplace_a(true)
4722 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004723 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004724 }
4725 }
4726
4727 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, a_zero_point) {
4728 TEST_REQUIRES_X86_XOP;
4729 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4730 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4731 VAddMicrokernelTester()
4732 .batch_size(batch_size)
4733 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004734 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004735 }
4736 }
4737 }
4738
4739 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, b_zero_point) {
4740 TEST_REQUIRES_X86_XOP;
4741 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4742 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4743 VAddMicrokernelTester()
4744 .batch_size(batch_size)
4745 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004746 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004747 }
4748 }
4749 }
4750
4751 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, y_zero_point) {
4752 TEST_REQUIRES_X86_XOP;
4753 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4754 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4755 VAddMicrokernelTester()
4756 .batch_size(batch_size)
4757 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004758 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004759 }
4760 }
4761 }
4762
4763 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, a_scale) {
4764 TEST_REQUIRES_X86_XOP;
4765 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4766 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4767 VAddMicrokernelTester()
4768 .batch_size(batch_size)
4769 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004770 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004771 }
4772 }
4773 }
4774
4775 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, b_scale) {
4776 TEST_REQUIRES_X86_XOP;
4777 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4778 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4779 VAddMicrokernelTester()
4780 .batch_size(batch_size)
4781 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004782 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004783 }
4784 }
4785 }
4786
4787 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, y_scale) {
4788 TEST_REQUIRES_X86_XOP;
4789 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4790 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4791 VAddMicrokernelTester()
4792 .batch_size(batch_size)
4793 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004794 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004795 }
4796 }
4797 }
4798
4799 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, qmin) {
4800 TEST_REQUIRES_X86_XOP;
4801 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4802 VAddMicrokernelTester()
4803 .batch_size(batch_size)
4804 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004805 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004806 }
4807 }
4808
4809 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, qmax) {
4810 TEST_REQUIRES_X86_XOP;
4811 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4812 VAddMicrokernelTester()
4813 .batch_size(batch_size)
4814 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004815 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
Marat Dukhanbb9225e2020-09-06 22:40:56 -07004816 }
4817 }
4818#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4819
4820
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004821#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4822 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
4823 TEST_REQUIRES_X86_AVX2;
4824 VAddMicrokernelTester()
4825 .batch_size(8)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004826 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004827 }
4828
4829 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
4830 TEST_REQUIRES_X86_AVX2;
4831 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4832 VAddMicrokernelTester()
4833 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004834 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004835 }
4836 }
4837
4838 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
4839 TEST_REQUIRES_X86_AVX2;
4840 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4841 VAddMicrokernelTester()
4842 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004843 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004844 }
4845 }
4846
4847 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
4848 TEST_REQUIRES_X86_AVX2;
4849 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4850 VAddMicrokernelTester()
4851 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004852 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004853 }
4854 }
4855
4856 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a) {
4857 TEST_REQUIRES_X86_AVX2;
4858 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4859 VAddMicrokernelTester()
4860 .batch_size(batch_size)
4861 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004862 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004863 }
4864 }
4865
4866 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_b) {
4867 TEST_REQUIRES_X86_AVX2;
4868 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4869 VAddMicrokernelTester()
4870 .batch_size(batch_size)
4871 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004872 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004873 }
4874 }
4875
4876 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a_and_b) {
4877 TEST_REQUIRES_X86_AVX2;
4878 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4879 VAddMicrokernelTester()
4880 .batch_size(batch_size)
4881 .inplace_a(true)
4882 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004883 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004884 }
4885 }
4886
4887 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
4888 TEST_REQUIRES_X86_AVX2;
4889 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4890 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4891 VAddMicrokernelTester()
4892 .batch_size(batch_size)
4893 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004894 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004895 }
4896 }
4897 }
4898
4899 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
4900 TEST_REQUIRES_X86_AVX2;
4901 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4902 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4903 VAddMicrokernelTester()
4904 .batch_size(batch_size)
4905 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004906 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004907 }
4908 }
4909 }
4910
4911 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
4912 TEST_REQUIRES_X86_AVX2;
4913 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4914 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4915 VAddMicrokernelTester()
4916 .batch_size(batch_size)
4917 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004918 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004919 }
4920 }
4921 }
4922
4923 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
4924 TEST_REQUIRES_X86_AVX2;
4925 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4926 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4927 VAddMicrokernelTester()
4928 .batch_size(batch_size)
4929 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004930 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004931 }
4932 }
4933 }
4934
4935 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
4936 TEST_REQUIRES_X86_AVX2;
4937 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4938 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4939 VAddMicrokernelTester()
4940 .batch_size(batch_size)
4941 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004942 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004943 }
4944 }
4945 }
4946
4947 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
4948 TEST_REQUIRES_X86_AVX2;
4949 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4950 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4951 VAddMicrokernelTester()
4952 .batch_size(batch_size)
4953 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004954 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004955 }
4956 }
4957 }
4958
4959 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
4960 TEST_REQUIRES_X86_AVX2;
4961 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4962 VAddMicrokernelTester()
4963 .batch_size(batch_size)
4964 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004965 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004966 }
4967 }
4968
4969 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
4970 TEST_REQUIRES_X86_AVX2;
4971 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4972 VAddMicrokernelTester()
4973 .batch_size(batch_size)
4974 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004975 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004976 }
4977 }
4978#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4979
4980
4981#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4982 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
4983 TEST_REQUIRES_X86_AVX2;
4984 VAddMicrokernelTester()
4985 .batch_size(16)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004986 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004987 }
4988
4989 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
4990 TEST_REQUIRES_X86_AVX2;
4991 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4992 VAddMicrokernelTester()
4993 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07004994 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07004995 }
4996 }
4997
4998 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
4999 TEST_REQUIRES_X86_AVX2;
5000 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5001 VAddMicrokernelTester()
5002 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005003 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005004 }
5005 }
5006
5007 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
5008 TEST_REQUIRES_X86_AVX2;
5009 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5010 VAddMicrokernelTester()
5011 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005012 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005013 }
5014 }
5015
5016 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a) {
5017 TEST_REQUIRES_X86_AVX2;
5018 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5019 VAddMicrokernelTester()
5020 .batch_size(batch_size)
5021 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005022 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005023 }
5024 }
5025
5026 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_b) {
5027 TEST_REQUIRES_X86_AVX2;
5028 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5029 VAddMicrokernelTester()
5030 .batch_size(batch_size)
5031 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005032 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005033 }
5034 }
5035
5036 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a_and_b) {
5037 TEST_REQUIRES_X86_AVX2;
5038 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5039 VAddMicrokernelTester()
5040 .batch_size(batch_size)
5041 .inplace_a(true)
5042 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005043 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005044 }
5045 }
5046
5047 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
5048 TEST_REQUIRES_X86_AVX2;
5049 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5050 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5051 VAddMicrokernelTester()
5052 .batch_size(batch_size)
5053 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005054 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005055 }
5056 }
5057 }
5058
5059 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
5060 TEST_REQUIRES_X86_AVX2;
5061 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5062 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5063 VAddMicrokernelTester()
5064 .batch_size(batch_size)
5065 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005066 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005067 }
5068 }
5069 }
5070
5071 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
5072 TEST_REQUIRES_X86_AVX2;
5073 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5074 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5075 VAddMicrokernelTester()
5076 .batch_size(batch_size)
5077 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005078 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005079 }
5080 }
5081 }
5082
5083 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
5084 TEST_REQUIRES_X86_AVX2;
5085 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5086 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5087 VAddMicrokernelTester()
5088 .batch_size(batch_size)
5089 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005090 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005091 }
5092 }
5093 }
5094
5095 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
5096 TEST_REQUIRES_X86_AVX2;
5097 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5098 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5099 VAddMicrokernelTester()
5100 .batch_size(batch_size)
5101 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005102 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005103 }
5104 }
5105 }
5106
5107 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
5108 TEST_REQUIRES_X86_AVX2;
5109 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5110 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5111 VAddMicrokernelTester()
5112 .batch_size(batch_size)
5113 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005114 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005115 }
5116 }
5117 }
5118
5119 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
5120 TEST_REQUIRES_X86_AVX2;
5121 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5122 VAddMicrokernelTester()
5123 .batch_size(batch_size)
5124 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005125 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005126 }
5127 }
5128
5129 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
5130 TEST_REQUIRES_X86_AVX2;
5131 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5132 VAddMicrokernelTester()
5133 .batch_size(batch_size)
5134 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005135 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005136 }
5137 }
5138#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5139
5140
5141#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5142 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_eq_24) {
5143 TEST_REQUIRES_X86_AVX2;
5144 VAddMicrokernelTester()
5145 .batch_size(24)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005146 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005147 }
5148
5149 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_div_24) {
5150 TEST_REQUIRES_X86_AVX2;
5151 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5152 VAddMicrokernelTester()
5153 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005154 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005155 }
5156 }
5157
5158 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_lt_24) {
5159 TEST_REQUIRES_X86_AVX2;
5160 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5161 VAddMicrokernelTester()
5162 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005163 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005164 }
5165 }
5166
5167 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_gt_24) {
5168 TEST_REQUIRES_X86_AVX2;
5169 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5170 VAddMicrokernelTester()
5171 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005172 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005173 }
5174 }
5175
5176 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_a) {
5177 TEST_REQUIRES_X86_AVX2;
5178 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5179 VAddMicrokernelTester()
5180 .batch_size(batch_size)
5181 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005182 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005183 }
5184 }
5185
5186 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_b) {
5187 TEST_REQUIRES_X86_AVX2;
5188 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5189 VAddMicrokernelTester()
5190 .batch_size(batch_size)
5191 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005192 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005193 }
5194 }
5195
5196 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_a_and_b) {
5197 TEST_REQUIRES_X86_AVX2;
5198 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5199 VAddMicrokernelTester()
5200 .batch_size(batch_size)
5201 .inplace_a(true)
5202 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005203 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005204 }
5205 }
5206
5207 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, a_zero_point) {
5208 TEST_REQUIRES_X86_AVX2;
5209 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5210 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5211 VAddMicrokernelTester()
5212 .batch_size(batch_size)
5213 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005214 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005215 }
5216 }
5217 }
5218
5219 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, b_zero_point) {
5220 TEST_REQUIRES_X86_AVX2;
5221 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5222 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5223 VAddMicrokernelTester()
5224 .batch_size(batch_size)
5225 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005226 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005227 }
5228 }
5229 }
5230
5231 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, y_zero_point) {
5232 TEST_REQUIRES_X86_AVX2;
5233 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5234 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5235 VAddMicrokernelTester()
5236 .batch_size(batch_size)
5237 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005238 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005239 }
5240 }
5241 }
5242
5243 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, a_scale) {
5244 TEST_REQUIRES_X86_AVX2;
5245 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5246 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5247 VAddMicrokernelTester()
5248 .batch_size(batch_size)
5249 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005250 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005251 }
5252 }
5253 }
5254
5255 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, b_scale) {
5256 TEST_REQUIRES_X86_AVX2;
5257 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5258 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5259 VAddMicrokernelTester()
5260 .batch_size(batch_size)
5261 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005262 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005263 }
5264 }
5265 }
5266
5267 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, y_scale) {
5268 TEST_REQUIRES_X86_AVX2;
5269 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5270 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5271 VAddMicrokernelTester()
5272 .batch_size(batch_size)
5273 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005274 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005275 }
5276 }
5277 }
5278
5279 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, qmin) {
5280 TEST_REQUIRES_X86_AVX2;
5281 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5282 VAddMicrokernelTester()
5283 .batch_size(batch_size)
5284 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005285 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005286 }
5287 }
5288
5289 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, qmax) {
5290 TEST_REQUIRES_X86_AVX2;
5291 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5292 VAddMicrokernelTester()
5293 .batch_size(batch_size)
5294 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005295 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005296 }
5297 }
5298#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5299
5300
5301#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5302 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_eq_32) {
5303 TEST_REQUIRES_X86_AVX2;
5304 VAddMicrokernelTester()
5305 .batch_size(32)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005306 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005307 }
5308
5309 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_div_32) {
5310 TEST_REQUIRES_X86_AVX2;
5311 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5312 VAddMicrokernelTester()
5313 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005314 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005315 }
5316 }
5317
5318 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_lt_32) {
5319 TEST_REQUIRES_X86_AVX2;
5320 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5321 VAddMicrokernelTester()
5322 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005323 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005324 }
5325 }
5326
5327 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_gt_32) {
5328 TEST_REQUIRES_X86_AVX2;
5329 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5330 VAddMicrokernelTester()
5331 .batch_size(batch_size)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005332 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005333 }
5334 }
5335
5336 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_a) {
5337 TEST_REQUIRES_X86_AVX2;
5338 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5339 VAddMicrokernelTester()
5340 .batch_size(batch_size)
5341 .inplace_a(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005342 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005343 }
5344 }
5345
5346 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_b) {
5347 TEST_REQUIRES_X86_AVX2;
5348 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5349 VAddMicrokernelTester()
5350 .batch_size(batch_size)
5351 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005352 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005353 }
5354 }
5355
5356 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_a_and_b) {
5357 TEST_REQUIRES_X86_AVX2;
5358 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5359 VAddMicrokernelTester()
5360 .batch_size(batch_size)
5361 .inplace_a(true)
5362 .inplace_b(true)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005363 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005364 }
5365 }
5366
5367 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, a_zero_point) {
5368 TEST_REQUIRES_X86_AVX2;
5369 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5370 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5371 VAddMicrokernelTester()
5372 .batch_size(batch_size)
5373 .a_zero_point(a_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005374 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005375 }
5376 }
5377 }
5378
5379 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, b_zero_point) {
5380 TEST_REQUIRES_X86_AVX2;
5381 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5382 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5383 VAddMicrokernelTester()
5384 .batch_size(batch_size)
5385 .b_zero_point(b_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005386 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005387 }
5388 }
5389 }
5390
5391 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, y_zero_point) {
5392 TEST_REQUIRES_X86_AVX2;
5393 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5394 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5395 VAddMicrokernelTester()
5396 .batch_size(batch_size)
5397 .y_zero_point(y_zero_point)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005398 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005399 }
5400 }
5401 }
5402
5403 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, a_scale) {
5404 TEST_REQUIRES_X86_AVX2;
5405 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5406 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5407 VAddMicrokernelTester()
5408 .batch_size(batch_size)
5409 .a_scale(a_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005410 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005411 }
5412 }
5413 }
5414
5415 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, b_scale) {
5416 TEST_REQUIRES_X86_AVX2;
5417 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5418 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5419 VAddMicrokernelTester()
5420 .batch_size(batch_size)
5421 .b_scale(b_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005422 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005423 }
5424 }
5425 }
5426
5427 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, y_scale) {
5428 TEST_REQUIRES_X86_AVX2;
5429 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5430 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5431 VAddMicrokernelTester()
5432 .batch_size(batch_size)
5433 .y_scale(y_scale)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005434 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005435 }
5436 }
5437 }
5438
5439 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, qmin) {
5440 TEST_REQUIRES_X86_AVX2;
5441 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5442 VAddMicrokernelTester()
5443 .batch_size(batch_size)
5444 .qmin(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005445 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005446 }
5447 }
5448
5449 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, qmax) {
5450 TEST_REQUIRES_X86_AVX2;
5451 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5452 VAddMicrokernelTester()
5453 .batch_size(batch_size)
5454 .qmax(128)
Marat Dukhan7679b1e2021-07-20 18:32:23 -07005455 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
Marat Dukhane6dc0b62020-09-08 23:57:14 -07005456 }
5457 }
5458#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5459
5460
Marat Dukhane76049a2021-07-22 14:48:59 -07005461#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5462 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_eq_16) {
5463 TEST_REQUIRES_X86_AVX512SKX;
5464 VAddMicrokernelTester()
5465 .batch_size(16)
5466 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5467 }
5468
5469 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_div_16) {
5470 TEST_REQUIRES_X86_AVX512SKX;
5471 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5472 VAddMicrokernelTester()
5473 .batch_size(batch_size)
5474 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5475 }
5476 }
5477
5478 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_lt_16) {
5479 TEST_REQUIRES_X86_AVX512SKX;
5480 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5481 VAddMicrokernelTester()
5482 .batch_size(batch_size)
5483 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5484 }
5485 }
5486
5487 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_gt_16) {
5488 TEST_REQUIRES_X86_AVX512SKX;
5489 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5490 VAddMicrokernelTester()
5491 .batch_size(batch_size)
5492 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5493 }
5494 }
5495
5496 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a) {
5497 TEST_REQUIRES_X86_AVX512SKX;
5498 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5499 VAddMicrokernelTester()
5500 .batch_size(batch_size)
5501 .inplace_a(true)
5502 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5503 }
5504 }
5505
5506 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_b) {
5507 TEST_REQUIRES_X86_AVX512SKX;
5508 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5509 VAddMicrokernelTester()
5510 .batch_size(batch_size)
5511 .inplace_b(true)
5512 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5513 }
5514 }
5515
5516 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a_and_b) {
5517 TEST_REQUIRES_X86_AVX512SKX;
5518 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5519 VAddMicrokernelTester()
5520 .batch_size(batch_size)
5521 .inplace_a(true)
5522 .inplace_b(true)
5523 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5524 }
5525 }
5526
5527 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_zero_point) {
5528 TEST_REQUIRES_X86_AVX512SKX;
5529 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5530 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5531 VAddMicrokernelTester()
5532 .batch_size(batch_size)
5533 .a_zero_point(a_zero_point)
5534 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5535 }
5536 }
5537 }
5538
5539 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_zero_point) {
5540 TEST_REQUIRES_X86_AVX512SKX;
5541 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5542 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5543 VAddMicrokernelTester()
5544 .batch_size(batch_size)
5545 .b_zero_point(b_zero_point)
5546 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5547 }
5548 }
5549 }
5550
5551 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_zero_point) {
5552 TEST_REQUIRES_X86_AVX512SKX;
5553 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5554 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5555 VAddMicrokernelTester()
5556 .batch_size(batch_size)
5557 .y_zero_point(y_zero_point)
5558 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5559 }
5560 }
5561 }
5562
5563 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_scale) {
5564 TEST_REQUIRES_X86_AVX512SKX;
5565 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5566 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5567 VAddMicrokernelTester()
5568 .batch_size(batch_size)
5569 .a_scale(a_scale)
5570 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5571 }
5572 }
5573 }
5574
5575 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_scale) {
5576 TEST_REQUIRES_X86_AVX512SKX;
5577 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5578 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5579 VAddMicrokernelTester()
5580 .batch_size(batch_size)
5581 .b_scale(b_scale)
5582 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5583 }
5584 }
5585 }
5586
5587 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_scale) {
5588 TEST_REQUIRES_X86_AVX512SKX;
5589 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5590 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5591 VAddMicrokernelTester()
5592 .batch_size(batch_size)
5593 .y_scale(y_scale)
5594 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5595 }
5596 }
5597 }
5598
5599 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmin) {
5600 TEST_REQUIRES_X86_AVX512SKX;
5601 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5602 VAddMicrokernelTester()
5603 .batch_size(batch_size)
5604 .qmin(128)
5605 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5606 }
5607 }
5608
5609 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmax) {
5610 TEST_REQUIRES_X86_AVX512SKX;
5611 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5612 VAddMicrokernelTester()
5613 .batch_size(batch_size)
5614 .qmax(128)
5615 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5616 }
5617 }
5618#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5619
5620
5621#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5622 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_eq_32) {
5623 TEST_REQUIRES_X86_AVX512SKX;
5624 VAddMicrokernelTester()
5625 .batch_size(32)
5626 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5627 }
5628
5629 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_div_32) {
5630 TEST_REQUIRES_X86_AVX512SKX;
5631 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5632 VAddMicrokernelTester()
5633 .batch_size(batch_size)
5634 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5635 }
5636 }
5637
5638 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_lt_32) {
5639 TEST_REQUIRES_X86_AVX512SKX;
5640 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5641 VAddMicrokernelTester()
5642 .batch_size(batch_size)
5643 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5644 }
5645 }
5646
5647 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_gt_32) {
5648 TEST_REQUIRES_X86_AVX512SKX;
5649 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5650 VAddMicrokernelTester()
5651 .batch_size(batch_size)
5652 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5653 }
5654 }
5655
5656 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a) {
5657 TEST_REQUIRES_X86_AVX512SKX;
5658 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5659 VAddMicrokernelTester()
5660 .batch_size(batch_size)
5661 .inplace_a(true)
5662 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5663 }
5664 }
5665
5666 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_b) {
5667 TEST_REQUIRES_X86_AVX512SKX;
5668 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5669 VAddMicrokernelTester()
5670 .batch_size(batch_size)
5671 .inplace_b(true)
5672 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5673 }
5674 }
5675
5676 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a_and_b) {
5677 TEST_REQUIRES_X86_AVX512SKX;
5678 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5679 VAddMicrokernelTester()
5680 .batch_size(batch_size)
5681 .inplace_a(true)
5682 .inplace_b(true)
5683 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5684 }
5685 }
5686
5687 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_zero_point) {
5688 TEST_REQUIRES_X86_AVX512SKX;
5689 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5690 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5691 VAddMicrokernelTester()
5692 .batch_size(batch_size)
5693 .a_zero_point(a_zero_point)
5694 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5695 }
5696 }
5697 }
5698
5699 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_zero_point) {
5700 TEST_REQUIRES_X86_AVX512SKX;
5701 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5702 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5703 VAddMicrokernelTester()
5704 .batch_size(batch_size)
5705 .b_zero_point(b_zero_point)
5706 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5707 }
5708 }
5709 }
5710
5711 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_zero_point) {
5712 TEST_REQUIRES_X86_AVX512SKX;
5713 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5714 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5715 VAddMicrokernelTester()
5716 .batch_size(batch_size)
5717 .y_zero_point(y_zero_point)
5718 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5719 }
5720 }
5721 }
5722
5723 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_scale) {
5724 TEST_REQUIRES_X86_AVX512SKX;
5725 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5726 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5727 VAddMicrokernelTester()
5728 .batch_size(batch_size)
5729 .a_scale(a_scale)
5730 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5731 }
5732 }
5733 }
5734
5735 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_scale) {
5736 TEST_REQUIRES_X86_AVX512SKX;
5737 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5738 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5739 VAddMicrokernelTester()
5740 .batch_size(batch_size)
5741 .b_scale(b_scale)
5742 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5743 }
5744 }
5745 }
5746
5747 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_scale) {
5748 TEST_REQUIRES_X86_AVX512SKX;
5749 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5750 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5751 VAddMicrokernelTester()
5752 .batch_size(batch_size)
5753 .y_scale(y_scale)
5754 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5755 }
5756 }
5757 }
5758
5759 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmin) {
5760 TEST_REQUIRES_X86_AVX512SKX;
5761 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5762 VAddMicrokernelTester()
5763 .batch_size(batch_size)
5764 .qmin(128)
5765 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5766 }
5767 }
5768
5769 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmax) {
5770 TEST_REQUIRES_X86_AVX512SKX;
5771 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5772 VAddMicrokernelTester()
5773 .batch_size(batch_size)
5774 .qmax(128)
5775 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5776 }
5777 }
5778#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5779
5780
Marat Dukhan4c617792021-12-21 15:47:58 -08005781#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan5df27f82020-09-02 23:59:21 -07005782 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_eq_8) {
5783 VAddMicrokernelTester()
5784 .batch_size(8)
Marat Dukhan66913242021-07-20 16:11:23 -07005785 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005786 }
5787
5788 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_div_8) {
5789 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5790 VAddMicrokernelTester()
5791 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07005792 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005793 }
5794 }
5795
5796 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_lt_8) {
5797 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5798 VAddMicrokernelTester()
5799 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07005800 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005801 }
5802 }
5803
5804 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_gt_8) {
5805 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5806 VAddMicrokernelTester()
5807 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07005808 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005809 }
5810 }
5811
5812 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_a) {
5813 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5814 VAddMicrokernelTester()
5815 .batch_size(batch_size)
5816 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -07005817 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005818 }
5819 }
5820
5821 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_b) {
5822 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5823 VAddMicrokernelTester()
5824 .batch_size(batch_size)
5825 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07005826 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005827 }
5828 }
5829
5830 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_a_and_b) {
5831 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5832 VAddMicrokernelTester()
5833 .batch_size(batch_size)
5834 .inplace_a(true)
5835 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07005836 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005837 }
5838 }
5839
5840 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, a_zero_point) {
5841 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5842 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5843 VAddMicrokernelTester()
5844 .batch_size(batch_size)
5845 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07005846 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005847 }
5848 }
5849 }
5850
5851 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, b_zero_point) {
5852 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5853 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5854 VAddMicrokernelTester()
5855 .batch_size(batch_size)
5856 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07005857 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005858 }
5859 }
5860 }
5861
5862 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, y_zero_point) {
5863 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5864 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5865 VAddMicrokernelTester()
5866 .batch_size(batch_size)
5867 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07005868 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005869 }
5870 }
5871 }
5872
5873 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, a_scale) {
5874 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5875 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5876 VAddMicrokernelTester()
5877 .batch_size(batch_size)
5878 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07005879 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005880 }
5881 }
5882 }
5883
5884 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, b_scale) {
5885 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5886 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5887 VAddMicrokernelTester()
5888 .batch_size(batch_size)
5889 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07005890 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005891 }
5892 }
5893 }
5894
5895 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, y_scale) {
5896 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5897 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5898 VAddMicrokernelTester()
5899 .batch_size(batch_size)
5900 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07005901 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005902 }
5903 }
5904 }
5905
5906 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, qmin) {
5907 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5908 VAddMicrokernelTester()
5909 .batch_size(batch_size)
5910 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -07005911 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005912 }
5913 }
5914
5915 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, qmax) {
5916 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5917 VAddMicrokernelTester()
5918 .batch_size(batch_size)
5919 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -07005920 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005921 }
5922 }
Marat Dukhan4c617792021-12-21 15:47:58 -08005923#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan5df27f82020-09-02 23:59:21 -07005924
5925
Marat Dukhan4c617792021-12-21 15:47:58 -08005926#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan5df27f82020-09-02 23:59:21 -07005927 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_eq_16) {
5928 VAddMicrokernelTester()
5929 .batch_size(16)
Marat Dukhan66913242021-07-20 16:11:23 -07005930 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005931 }
5932
5933 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_div_16) {
5934 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5935 VAddMicrokernelTester()
5936 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07005937 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005938 }
5939 }
5940
5941 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_lt_16) {
5942 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5943 VAddMicrokernelTester()
5944 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07005945 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005946 }
5947 }
5948
5949 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_gt_16) {
5950 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5951 VAddMicrokernelTester()
5952 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07005953 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005954 }
5955 }
5956
5957 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_a) {
5958 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5959 VAddMicrokernelTester()
5960 .batch_size(batch_size)
5961 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -07005962 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005963 }
5964 }
5965
5966 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_b) {
5967 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5968 VAddMicrokernelTester()
5969 .batch_size(batch_size)
5970 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07005971 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005972 }
5973 }
5974
5975 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_a_and_b) {
5976 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5977 VAddMicrokernelTester()
5978 .batch_size(batch_size)
5979 .inplace_a(true)
5980 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07005981 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005982 }
5983 }
5984
5985 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, a_zero_point) {
5986 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5987 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5988 VAddMicrokernelTester()
5989 .batch_size(batch_size)
5990 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07005991 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07005992 }
5993 }
5994 }
5995
5996 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, b_zero_point) {
5997 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5998 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5999 VAddMicrokernelTester()
6000 .batch_size(batch_size)
6001 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07006002 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006003 }
6004 }
6005 }
6006
6007 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, y_zero_point) {
6008 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6009 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6010 VAddMicrokernelTester()
6011 .batch_size(batch_size)
6012 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07006013 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006014 }
6015 }
6016 }
6017
6018 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, a_scale) {
6019 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6020 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6021 VAddMicrokernelTester()
6022 .batch_size(batch_size)
6023 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07006024 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006025 }
6026 }
6027 }
6028
6029 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, b_scale) {
6030 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6031 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6032 VAddMicrokernelTester()
6033 .batch_size(batch_size)
6034 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07006035 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006036 }
6037 }
6038 }
6039
6040 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, y_scale) {
6041 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6042 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6043 VAddMicrokernelTester()
6044 .batch_size(batch_size)
6045 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07006046 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006047 }
6048 }
6049 }
6050
6051 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, qmin) {
6052 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6053 VAddMicrokernelTester()
6054 .batch_size(batch_size)
6055 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -07006056 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006057 }
6058 }
6059
6060 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, qmax) {
6061 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6062 VAddMicrokernelTester()
6063 .batch_size(batch_size)
6064 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -07006065 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006066 }
6067 }
Marat Dukhan4c617792021-12-21 15:47:58 -08006068#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan5df27f82020-09-02 23:59:21 -07006069
6070
Marat Dukhan4c617792021-12-21 15:47:58 -08006071#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan5df27f82020-09-02 23:59:21 -07006072 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_eq_24) {
6073 VAddMicrokernelTester()
6074 .batch_size(24)
Marat Dukhan66913242021-07-20 16:11:23 -07006075 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006076 }
6077
6078 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_div_24) {
6079 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
6080 VAddMicrokernelTester()
6081 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07006082 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006083 }
6084 }
6085
6086 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_lt_24) {
6087 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
6088 VAddMicrokernelTester()
6089 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07006090 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006091 }
6092 }
6093
6094 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_gt_24) {
6095 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
6096 VAddMicrokernelTester()
6097 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07006098 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006099 }
6100 }
6101
6102 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_a) {
6103 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6104 VAddMicrokernelTester()
6105 .batch_size(batch_size)
6106 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -07006107 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006108 }
6109 }
6110
6111 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_b) {
6112 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6113 VAddMicrokernelTester()
6114 .batch_size(batch_size)
6115 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07006116 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006117 }
6118 }
6119
6120 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_a_and_b) {
6121 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6122 VAddMicrokernelTester()
6123 .batch_size(batch_size)
6124 .inplace_a(true)
6125 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07006126 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006127 }
6128 }
6129
6130 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, a_zero_point) {
6131 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6132 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6133 VAddMicrokernelTester()
6134 .batch_size(batch_size)
6135 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07006136 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006137 }
6138 }
6139 }
6140
6141 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, b_zero_point) {
6142 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6143 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6144 VAddMicrokernelTester()
6145 .batch_size(batch_size)
6146 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07006147 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006148 }
6149 }
6150 }
6151
6152 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, y_zero_point) {
6153 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6154 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6155 VAddMicrokernelTester()
6156 .batch_size(batch_size)
6157 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07006158 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006159 }
6160 }
6161 }
6162
6163 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, a_scale) {
6164 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6165 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6166 VAddMicrokernelTester()
6167 .batch_size(batch_size)
6168 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07006169 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006170 }
6171 }
6172 }
6173
6174 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, b_scale) {
6175 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6176 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6177 VAddMicrokernelTester()
6178 .batch_size(batch_size)
6179 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07006180 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006181 }
6182 }
6183 }
6184
6185 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, y_scale) {
6186 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6187 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6188 VAddMicrokernelTester()
6189 .batch_size(batch_size)
6190 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07006191 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006192 }
6193 }
6194 }
6195
6196 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, qmin) {
6197 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6198 VAddMicrokernelTester()
6199 .batch_size(batch_size)
6200 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -07006201 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006202 }
6203 }
6204
6205 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, qmax) {
6206 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6207 VAddMicrokernelTester()
6208 .batch_size(batch_size)
6209 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -07006210 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006211 }
6212 }
Marat Dukhan4c617792021-12-21 15:47:58 -08006213#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan5df27f82020-09-02 23:59:21 -07006214
6215
Marat Dukhan4c617792021-12-21 15:47:58 -08006216#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan5df27f82020-09-02 23:59:21 -07006217 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_eq_32) {
6218 VAddMicrokernelTester()
6219 .batch_size(32)
Marat Dukhan66913242021-07-20 16:11:23 -07006220 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006221 }
6222
6223 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_div_32) {
6224 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
6225 VAddMicrokernelTester()
6226 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07006227 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006228 }
6229 }
6230
6231 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_lt_32) {
6232 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
6233 VAddMicrokernelTester()
6234 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07006235 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006236 }
6237 }
6238
6239 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_gt_32) {
6240 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
6241 VAddMicrokernelTester()
6242 .batch_size(batch_size)
Marat Dukhan66913242021-07-20 16:11:23 -07006243 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006244 }
6245 }
6246
6247 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_a) {
6248 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6249 VAddMicrokernelTester()
6250 .batch_size(batch_size)
6251 .inplace_a(true)
Marat Dukhan66913242021-07-20 16:11:23 -07006252 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006253 }
6254 }
6255
6256 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_b) {
6257 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6258 VAddMicrokernelTester()
6259 .batch_size(batch_size)
6260 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07006261 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006262 }
6263 }
6264
6265 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_a_and_b) {
6266 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6267 VAddMicrokernelTester()
6268 .batch_size(batch_size)
6269 .inplace_a(true)
6270 .inplace_b(true)
Marat Dukhan66913242021-07-20 16:11:23 -07006271 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006272 }
6273 }
6274
6275 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, a_zero_point) {
6276 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6277 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6278 VAddMicrokernelTester()
6279 .batch_size(batch_size)
6280 .a_zero_point(a_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07006281 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006282 }
6283 }
6284 }
6285
6286 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, b_zero_point) {
6287 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6288 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6289 VAddMicrokernelTester()
6290 .batch_size(batch_size)
6291 .b_zero_point(b_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07006292 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006293 }
6294 }
6295 }
6296
6297 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, y_zero_point) {
6298 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6299 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6300 VAddMicrokernelTester()
6301 .batch_size(batch_size)
6302 .y_zero_point(y_zero_point)
Marat Dukhan66913242021-07-20 16:11:23 -07006303 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006304 }
6305 }
6306 }
6307
6308 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, a_scale) {
6309 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6310 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6311 VAddMicrokernelTester()
6312 .batch_size(batch_size)
6313 .a_scale(a_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07006314 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006315 }
6316 }
6317 }
6318
6319 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, b_scale) {
6320 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6321 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6322 VAddMicrokernelTester()
6323 .batch_size(batch_size)
6324 .b_scale(b_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07006325 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006326 }
6327 }
6328 }
6329
6330 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, y_scale) {
6331 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6332 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6333 VAddMicrokernelTester()
6334 .batch_size(batch_size)
6335 .y_scale(y_scale)
Marat Dukhan66913242021-07-20 16:11:23 -07006336 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006337 }
6338 }
6339 }
6340
6341 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, qmin) {
6342 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6343 VAddMicrokernelTester()
6344 .batch_size(batch_size)
6345 .qmin(128)
Marat Dukhan66913242021-07-20 16:11:23 -07006346 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006347 }
6348 }
6349
6350 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, qmax) {
6351 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6352 VAddMicrokernelTester()
6353 .batch_size(batch_size)
6354 .qmax(128)
Marat Dukhan66913242021-07-20 16:11:23 -07006355 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
Marat Dukhan5df27f82020-09-02 23:59:21 -07006356 }
6357 }
Marat Dukhan4c617792021-12-21 15:47:58 -08006358#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhand481c282021-05-11 23:48:31 -07006359
6360
6361TEST(QS8_VADD_MINMAX__SCALAR_X1, batch_eq_1) {
6362 VAddMicrokernelTester()
6363 .batch_size(1)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006364 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006365}
6366
6367TEST(QS8_VADD_MINMAX__SCALAR_X1, batch_gt_1) {
6368 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
6369 VAddMicrokernelTester()
6370 .batch_size(batch_size)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006371 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006372 }
6373}
6374
6375TEST(QS8_VADD_MINMAX__SCALAR_X1, inplace_a) {
6376 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6377 VAddMicrokernelTester()
6378 .batch_size(batch_size)
6379 .inplace_a(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006380 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006381 }
6382}
6383
6384TEST(QS8_VADD_MINMAX__SCALAR_X1, inplace_b) {
6385 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6386 VAddMicrokernelTester()
6387 .batch_size(batch_size)
6388 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006389 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006390 }
6391}
6392
6393TEST(QS8_VADD_MINMAX__SCALAR_X1, inplace_a_and_b) {
6394 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6395 VAddMicrokernelTester()
6396 .batch_size(batch_size)
6397 .inplace_a(true)
6398 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006399 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006400 }
6401}
6402
6403TEST(QS8_VADD_MINMAX__SCALAR_X1, a_zero_point) {
6404 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6405 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6406 VAddMicrokernelTester()
6407 .batch_size(batch_size)
6408 .a_zero_point(a_zero_point)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006409 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006410 }
6411 }
6412}
6413
6414TEST(QS8_VADD_MINMAX__SCALAR_X1, b_zero_point) {
6415 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6416 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6417 VAddMicrokernelTester()
6418 .batch_size(batch_size)
6419 .b_zero_point(b_zero_point)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006420 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006421 }
6422 }
6423}
6424
6425TEST(QS8_VADD_MINMAX__SCALAR_X1, y_zero_point) {
6426 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6427 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6428 VAddMicrokernelTester()
6429 .batch_size(batch_size)
6430 .y_zero_point(y_zero_point)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006431 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006432 }
6433 }
6434}
6435
6436TEST(QS8_VADD_MINMAX__SCALAR_X1, a_scale) {
6437 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6438 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6439 VAddMicrokernelTester()
6440 .batch_size(batch_size)
6441 .a_scale(a_scale)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006442 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006443 }
6444 }
6445}
6446
6447TEST(QS8_VADD_MINMAX__SCALAR_X1, b_scale) {
6448 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6449 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6450 VAddMicrokernelTester()
6451 .batch_size(batch_size)
6452 .b_scale(b_scale)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006453 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006454 }
6455 }
6456}
6457
6458TEST(QS8_VADD_MINMAX__SCALAR_X1, y_scale) {
6459 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6460 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6461 VAddMicrokernelTester()
6462 .batch_size(batch_size)
6463 .y_scale(y_scale)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006464 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006465 }
6466 }
6467}
6468
6469TEST(QS8_VADD_MINMAX__SCALAR_X1, qmin) {
6470 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6471 VAddMicrokernelTester()
6472 .batch_size(batch_size)
6473 .qmin(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006474 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006475 }
6476}
6477
6478TEST(QS8_VADD_MINMAX__SCALAR_X1, qmax) {
6479 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6480 VAddMicrokernelTester()
6481 .batch_size(batch_size)
6482 .qmax(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006483 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006484 }
6485}
6486
6487TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_eq_2) {
6488 VAddMicrokernelTester()
6489 .batch_size(2)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006490 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006491}
6492
6493TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_div_2) {
6494 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
6495 VAddMicrokernelTester()
6496 .batch_size(batch_size)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006497 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006498 }
6499}
6500
6501TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_lt_2) {
6502 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
6503 VAddMicrokernelTester()
6504 .batch_size(batch_size)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006505 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006506 }
6507}
6508
6509TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_gt_2) {
6510 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
6511 VAddMicrokernelTester()
6512 .batch_size(batch_size)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006513 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006514 }
6515}
6516
6517TEST(QS8_VADD_MINMAX__SCALAR_X2, inplace_a) {
6518 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6519 VAddMicrokernelTester()
6520 .batch_size(batch_size)
6521 .inplace_a(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006522 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006523 }
6524}
6525
6526TEST(QS8_VADD_MINMAX__SCALAR_X2, inplace_b) {
6527 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6528 VAddMicrokernelTester()
6529 .batch_size(batch_size)
6530 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006531 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006532 }
6533}
6534
6535TEST(QS8_VADD_MINMAX__SCALAR_X2, inplace_a_and_b) {
6536 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6537 VAddMicrokernelTester()
6538 .batch_size(batch_size)
6539 .inplace_a(true)
6540 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006541 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006542 }
6543}
6544
6545TEST(QS8_VADD_MINMAX__SCALAR_X2, a_zero_point) {
6546 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6547 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6548 VAddMicrokernelTester()
6549 .batch_size(batch_size)
6550 .a_zero_point(a_zero_point)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006551 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006552 }
6553 }
6554}
6555
6556TEST(QS8_VADD_MINMAX__SCALAR_X2, b_zero_point) {
6557 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6558 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6559 VAddMicrokernelTester()
6560 .batch_size(batch_size)
6561 .b_zero_point(b_zero_point)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006562 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006563 }
6564 }
6565}
6566
6567TEST(QS8_VADD_MINMAX__SCALAR_X2, y_zero_point) {
6568 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6569 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6570 VAddMicrokernelTester()
6571 .batch_size(batch_size)
6572 .y_zero_point(y_zero_point)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006573 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006574 }
6575 }
6576}
6577
6578TEST(QS8_VADD_MINMAX__SCALAR_X2, a_scale) {
6579 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6580 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6581 VAddMicrokernelTester()
6582 .batch_size(batch_size)
6583 .a_scale(a_scale)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006584 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006585 }
6586 }
6587}
6588
6589TEST(QS8_VADD_MINMAX__SCALAR_X2, b_scale) {
6590 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6591 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6592 VAddMicrokernelTester()
6593 .batch_size(batch_size)
6594 .b_scale(b_scale)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006595 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006596 }
6597 }
6598}
6599
6600TEST(QS8_VADD_MINMAX__SCALAR_X2, y_scale) {
6601 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6602 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6603 VAddMicrokernelTester()
6604 .batch_size(batch_size)
6605 .y_scale(y_scale)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006606 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006607 }
6608 }
6609}
6610
6611TEST(QS8_VADD_MINMAX__SCALAR_X2, qmin) {
6612 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6613 VAddMicrokernelTester()
6614 .batch_size(batch_size)
6615 .qmin(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006616 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006617 }
6618}
6619
6620TEST(QS8_VADD_MINMAX__SCALAR_X2, qmax) {
6621 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6622 VAddMicrokernelTester()
6623 .batch_size(batch_size)
6624 .qmax(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006625 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006626 }
6627}
6628
6629TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_eq_4) {
6630 VAddMicrokernelTester()
6631 .batch_size(4)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006632 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006633}
6634
6635TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_div_4) {
6636 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
6637 VAddMicrokernelTester()
6638 .batch_size(batch_size)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006639 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006640 }
6641}
6642
6643TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_lt_4) {
6644 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
6645 VAddMicrokernelTester()
6646 .batch_size(batch_size)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006647 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006648 }
6649}
6650
6651TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_gt_4) {
6652 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
6653 VAddMicrokernelTester()
6654 .batch_size(batch_size)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006655 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006656 }
6657}
6658
6659TEST(QS8_VADD_MINMAX__SCALAR_X4, inplace_a) {
6660 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6661 VAddMicrokernelTester()
6662 .batch_size(batch_size)
6663 .inplace_a(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006664 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006665 }
6666}
6667
6668TEST(QS8_VADD_MINMAX__SCALAR_X4, inplace_b) {
6669 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6670 VAddMicrokernelTester()
6671 .batch_size(batch_size)
6672 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006673 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006674 }
6675}
6676
6677TEST(QS8_VADD_MINMAX__SCALAR_X4, inplace_a_and_b) {
6678 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6679 VAddMicrokernelTester()
6680 .batch_size(batch_size)
6681 .inplace_a(true)
6682 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006683 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006684 }
6685}
6686
6687TEST(QS8_VADD_MINMAX__SCALAR_X4, a_zero_point) {
6688 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6689 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6690 VAddMicrokernelTester()
6691 .batch_size(batch_size)
6692 .a_zero_point(a_zero_point)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006693 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006694 }
6695 }
6696}
6697
6698TEST(QS8_VADD_MINMAX__SCALAR_X4, b_zero_point) {
6699 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6700 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6701 VAddMicrokernelTester()
6702 .batch_size(batch_size)
6703 .b_zero_point(b_zero_point)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006704 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006705 }
6706 }
6707}
6708
6709TEST(QS8_VADD_MINMAX__SCALAR_X4, y_zero_point) {
6710 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6711 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6712 VAddMicrokernelTester()
6713 .batch_size(batch_size)
6714 .y_zero_point(y_zero_point)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006715 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006716 }
6717 }
6718}
6719
6720TEST(QS8_VADD_MINMAX__SCALAR_X4, a_scale) {
6721 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6722 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6723 VAddMicrokernelTester()
6724 .batch_size(batch_size)
6725 .a_scale(a_scale)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006726 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006727 }
6728 }
6729}
6730
6731TEST(QS8_VADD_MINMAX__SCALAR_X4, b_scale) {
6732 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6733 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6734 VAddMicrokernelTester()
6735 .batch_size(batch_size)
6736 .b_scale(b_scale)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006737 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006738 }
6739 }
6740}
6741
6742TEST(QS8_VADD_MINMAX__SCALAR_X4, y_scale) {
6743 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6744 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6745 VAddMicrokernelTester()
6746 .batch_size(batch_size)
6747 .y_scale(y_scale)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006748 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006749 }
6750 }
6751}
6752
6753TEST(QS8_VADD_MINMAX__SCALAR_X4, qmin) {
6754 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6755 VAddMicrokernelTester()
6756 .batch_size(batch_size)
6757 .qmin(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006758 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006759 }
6760}
6761
6762TEST(QS8_VADD_MINMAX__SCALAR_X4, qmax) {
6763 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6764 VAddMicrokernelTester()
6765 .batch_size(batch_size)
6766 .qmax(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07006767 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
Marat Dukhand481c282021-05-11 23:48:31 -07006768 }
6769}