blob: 4bb038cb698fda44a78d8e5d753111df9655dacb [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
Marat Dukhan6e0fc392021-07-19 18:38:24 -07005//
6// Auto-generated file. Do not edit!
7// Specification: test/qu8-vadd-minmax.yaml
8// Generator: tools/generate-vbinary-test.py
9
XNNPACK Teamb455b122019-09-27 18:10:33 -070010
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <gtest/gtest.h>
12
Marat Dukhan1dadbf72019-10-01 10:46:20 -070013#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070015
Marat Dukhan87bd5112021-08-02 11:43:53 -070016#include <xnnpack/params-init.h>
Marat Dukhan64287252021-09-07 16:20:03 -070017#include <xnnpack/vaddsub.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070018#include "vadd-microkernel-tester.h"
19
20
Marat Dukhan76e78c82021-07-20 21:11:23 -070021#if XNN_ARCH_ARM || XNN_ARCH_ARM64
22 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_eq_8) {
23 TEST_REQUIRES_ARM_NEON;
24 VAddMicrokernelTester()
25 .batch_size(8)
26 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
27 }
28
29 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_div_8) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32 VAddMicrokernelTester()
33 .batch_size(batch_size)
34 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
35 }
36 }
37
38 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_lt_8) {
39 TEST_REQUIRES_ARM_NEON;
40 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41 VAddMicrokernelTester()
42 .batch_size(batch_size)
43 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
44 }
45 }
46
47 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_gt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50 VAddMicrokernelTester()
51 .batch_size(batch_size)
52 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
53 }
54 }
55
56 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, inplace_a) {
57 TEST_REQUIRES_ARM_NEON;
58 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59 VAddMicrokernelTester()
60 .batch_size(batch_size)
61 .inplace_a(true)
62 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
63 }
64 }
65
66 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, inplace_b) {
67 TEST_REQUIRES_ARM_NEON;
68 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69 VAddMicrokernelTester()
70 .batch_size(batch_size)
71 .inplace_b(true)
72 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
73 }
74 }
75
76 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, inplace_a_and_b) {
77 TEST_REQUIRES_ARM_NEON;
78 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
79 VAddMicrokernelTester()
80 .batch_size(batch_size)
81 .inplace_a(true)
82 .inplace_b(true)
83 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
84 }
85 }
86
Marat Dukhan87bd5112021-08-02 11:43:53 -070087 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, a_zero_point) {
88 TEST_REQUIRES_ARM_NEON;
89 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
90 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
91 VAddMicrokernelTester()
92 .batch_size(batch_size)
93 .a_zero_point(a_zero_point)
94 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
95 }
96 }
97 }
98
99 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, b_zero_point) {
100 TEST_REQUIRES_ARM_NEON;
101 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
102 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
103 VAddMicrokernelTester()
104 .batch_size(batch_size)
105 .b_zero_point(b_zero_point)
106 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
107 }
108 }
109 }
110
111 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, y_zero_point) {
112 TEST_REQUIRES_ARM_NEON;
113 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
114 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
115 VAddMicrokernelTester()
116 .batch_size(batch_size)
117 .y_zero_point(y_zero_point)
118 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
119 }
120 }
121 }
122
123 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, a_scale) {
124 TEST_REQUIRES_ARM_NEON;
125 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
126 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
127 VAddMicrokernelTester()
128 .batch_size(batch_size)
129 .a_scale(a_scale)
130 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
131 }
132 }
133 }
134
135 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, b_scale) {
136 TEST_REQUIRES_ARM_NEON;
137 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
138 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
139 VAddMicrokernelTester()
140 .batch_size(batch_size)
141 .b_scale(b_scale)
142 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
143 }
144 }
145 }
146
147 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, y_scale) {
148 TEST_REQUIRES_ARM_NEON;
149 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
151 VAddMicrokernelTester()
152 .batch_size(batch_size)
153 .y_scale(y_scale)
154 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
155 }
156 }
157 }
158
Marat Dukhan76e78c82021-07-20 21:11:23 -0700159 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, qmin) {
160 TEST_REQUIRES_ARM_NEON;
161 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
162 VAddMicrokernelTester()
163 .batch_size(batch_size)
164 .qmin(128)
165 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
166 }
167 }
168
169 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, qmax) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
172 VAddMicrokernelTester()
173 .batch_size(batch_size)
174 .qmax(128)
175 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
176 }
177 }
178#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
179
180
181#if XNN_ARCH_ARM || XNN_ARCH_ARM64
182 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_eq_16) {
183 TEST_REQUIRES_ARM_NEON;
184 VAddMicrokernelTester()
185 .batch_size(16)
186 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
187 }
188
189 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_div_16) {
190 TEST_REQUIRES_ARM_NEON;
191 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
192 VAddMicrokernelTester()
193 .batch_size(batch_size)
194 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
195 }
196 }
197
198 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_lt_16) {
199 TEST_REQUIRES_ARM_NEON;
200 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
201 VAddMicrokernelTester()
202 .batch_size(batch_size)
203 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
204 }
205 }
206
207 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_gt_16) {
208 TEST_REQUIRES_ARM_NEON;
209 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
210 VAddMicrokernelTester()
211 .batch_size(batch_size)
212 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
213 }
214 }
215
216 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, inplace_a) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219 VAddMicrokernelTester()
220 .batch_size(batch_size)
221 .inplace_a(true)
222 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
223 }
224 }
225
226 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, inplace_b) {
227 TEST_REQUIRES_ARM_NEON;
228 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
229 VAddMicrokernelTester()
230 .batch_size(batch_size)
231 .inplace_b(true)
232 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
233 }
234 }
235
236 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, inplace_a_and_b) {
237 TEST_REQUIRES_ARM_NEON;
238 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
239 VAddMicrokernelTester()
240 .batch_size(batch_size)
241 .inplace_a(true)
242 .inplace_b(true)
243 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
244 }
245 }
246
Marat Dukhan87bd5112021-08-02 11:43:53 -0700247 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, a_zero_point) {
248 TEST_REQUIRES_ARM_NEON;
249 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
250 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
251 VAddMicrokernelTester()
252 .batch_size(batch_size)
253 .a_zero_point(a_zero_point)
254 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
255 }
256 }
257 }
258
259 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, b_zero_point) {
260 TEST_REQUIRES_ARM_NEON;
261 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
262 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
263 VAddMicrokernelTester()
264 .batch_size(batch_size)
265 .b_zero_point(b_zero_point)
266 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
267 }
268 }
269 }
270
271 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, y_zero_point) {
272 TEST_REQUIRES_ARM_NEON;
273 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
274 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
275 VAddMicrokernelTester()
276 .batch_size(batch_size)
277 .y_zero_point(y_zero_point)
278 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
279 }
280 }
281 }
282
283 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, a_scale) {
284 TEST_REQUIRES_ARM_NEON;
285 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
286 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
287 VAddMicrokernelTester()
288 .batch_size(batch_size)
289 .a_scale(a_scale)
290 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
291 }
292 }
293 }
294
295 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, b_scale) {
296 TEST_REQUIRES_ARM_NEON;
297 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
298 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
299 VAddMicrokernelTester()
300 .batch_size(batch_size)
301 .b_scale(b_scale)
302 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
303 }
304 }
305 }
306
307 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, y_scale) {
308 TEST_REQUIRES_ARM_NEON;
309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
310 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
311 VAddMicrokernelTester()
312 .batch_size(batch_size)
313 .y_scale(y_scale)
314 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
315 }
316 }
317 }
318
Marat Dukhan76e78c82021-07-20 21:11:23 -0700319 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, qmin) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
322 VAddMicrokernelTester()
323 .batch_size(batch_size)
324 .qmin(128)
325 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
326 }
327 }
328
329 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, qmax) {
330 TEST_REQUIRES_ARM_NEON;
331 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
332 VAddMicrokernelTester()
333 .batch_size(batch_size)
334 .qmax(128)
335 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
336 }
337 }
338#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
339
340
Marat Dukhaneb3cff32021-07-30 11:35:27 -0700341#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard0a3093c2021-08-31 09:58:11 -0700342 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_eq_32) {
343 TEST_REQUIRES_ARM_NEON;
344 VAddMicrokernelTester()
345 .batch_size(32)
346 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
347 }
348
349 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_div_32) {
350 TEST_REQUIRES_ARM_NEON;
351 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
352 VAddMicrokernelTester()
353 .batch_size(batch_size)
354 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
355 }
356 }
357
358 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_lt_32) {
359 TEST_REQUIRES_ARM_NEON;
360 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
361 VAddMicrokernelTester()
362 .batch_size(batch_size)
363 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
364 }
365 }
366
367 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_gt_32) {
368 TEST_REQUIRES_ARM_NEON;
369 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
370 VAddMicrokernelTester()
371 .batch_size(batch_size)
372 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
373 }
374 }
375
376 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, inplace_a) {
377 TEST_REQUIRES_ARM_NEON;
378 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
379 VAddMicrokernelTester()
380 .batch_size(batch_size)
381 .inplace_a(true)
382 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
383 }
384 }
385
386 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, inplace_b) {
387 TEST_REQUIRES_ARM_NEON;
388 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
389 VAddMicrokernelTester()
390 .batch_size(batch_size)
391 .inplace_b(true)
392 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
393 }
394 }
395
396 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, inplace_a_and_b) {
397 TEST_REQUIRES_ARM_NEON;
398 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
399 VAddMicrokernelTester()
400 .batch_size(batch_size)
401 .inplace_a(true)
402 .inplace_b(true)
403 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
404 }
405 }
406
407 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, a_zero_point) {
408 TEST_REQUIRES_ARM_NEON;
409 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
410 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
411 VAddMicrokernelTester()
412 .batch_size(batch_size)
413 .a_zero_point(a_zero_point)
414 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
415 }
416 }
417 }
418
419 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, b_zero_point) {
420 TEST_REQUIRES_ARM_NEON;
421 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
422 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
423 VAddMicrokernelTester()
424 .batch_size(batch_size)
425 .b_zero_point(b_zero_point)
426 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
427 }
428 }
429 }
430
431 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, y_zero_point) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
434 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
435 VAddMicrokernelTester()
436 .batch_size(batch_size)
437 .y_zero_point(y_zero_point)
438 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
439 }
440 }
441 }
442
443 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, a_scale) {
444 TEST_REQUIRES_ARM_NEON;
445 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
446 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
447 VAddMicrokernelTester()
448 .batch_size(batch_size)
449 .a_scale(a_scale)
450 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
451 }
452 }
453 }
454
455 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, b_scale) {
456 TEST_REQUIRES_ARM_NEON;
457 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
458 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
459 VAddMicrokernelTester()
460 .batch_size(batch_size)
461 .b_scale(b_scale)
462 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
463 }
464 }
465 }
466
467 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, y_scale) {
468 TEST_REQUIRES_ARM_NEON;
469 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
470 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
471 VAddMicrokernelTester()
472 .batch_size(batch_size)
473 .y_scale(y_scale)
474 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
475 }
476 }
477 }
478
479 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, qmin) {
480 TEST_REQUIRES_ARM_NEON;
481 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
482 VAddMicrokernelTester()
483 .batch_size(batch_size)
484 .qmin(128)
485 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
486 }
487 }
488
489 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, qmax) {
490 TEST_REQUIRES_ARM_NEON;
491 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
492 VAddMicrokernelTester()
493 .batch_size(batch_size)
494 .qmax(128)
495 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
496 }
497 }
498#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
499
500
501#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhaneb3cff32021-07-30 11:35:27 -0700502 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_eq_16) {
503 TEST_REQUIRES_ARM_NEON;
504 VAddMicrokernelTester()
505 .batch_size(16)
506 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
507 }
508
509 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_div_16) {
510 TEST_REQUIRES_ARM_NEON;
511 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
512 VAddMicrokernelTester()
513 .batch_size(batch_size)
514 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
515 }
516 }
517
518 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_lt_16) {
519 TEST_REQUIRES_ARM_NEON;
520 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
521 VAddMicrokernelTester()
522 .batch_size(batch_size)
523 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
524 }
525 }
526
527 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_gt_16) {
528 TEST_REQUIRES_ARM_NEON;
529 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
530 VAddMicrokernelTester()
531 .batch_size(batch_size)
532 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
533 }
534 }
535
536 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, inplace_a) {
537 TEST_REQUIRES_ARM_NEON;
538 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
539 VAddMicrokernelTester()
540 .batch_size(batch_size)
541 .inplace_a(true)
542 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
543 }
544 }
545
546 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, inplace_b) {
547 TEST_REQUIRES_ARM_NEON;
548 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
549 VAddMicrokernelTester()
550 .batch_size(batch_size)
551 .inplace_b(true)
552 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
553 }
554 }
555
556 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, inplace_a_and_b) {
557 TEST_REQUIRES_ARM_NEON;
558 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
559 VAddMicrokernelTester()
560 .batch_size(batch_size)
561 .inplace_a(true)
562 .inplace_b(true)
563 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
564 }
565 }
566
Marat Dukhan87bd5112021-08-02 11:43:53 -0700567 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, a_zero_point) {
568 TEST_REQUIRES_ARM_NEON;
569 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
570 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
571 VAddMicrokernelTester()
572 .batch_size(batch_size)
573 .a_zero_point(a_zero_point)
574 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
575 }
576 }
577 }
578
579 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, b_zero_point) {
580 TEST_REQUIRES_ARM_NEON;
581 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
582 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
583 VAddMicrokernelTester()
584 .batch_size(batch_size)
585 .b_zero_point(b_zero_point)
586 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
587 }
588 }
589 }
590
591 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, y_zero_point) {
592 TEST_REQUIRES_ARM_NEON;
593 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
595 VAddMicrokernelTester()
596 .batch_size(batch_size)
597 .y_zero_point(y_zero_point)
598 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
599 }
600 }
601 }
602
603 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, a_scale) {
604 TEST_REQUIRES_ARM_NEON;
605 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
606 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
607 VAddMicrokernelTester()
608 .batch_size(batch_size)
609 .a_scale(a_scale)
610 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
611 }
612 }
613 }
614
615 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, b_scale) {
616 TEST_REQUIRES_ARM_NEON;
617 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
618 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
619 VAddMicrokernelTester()
620 .batch_size(batch_size)
621 .b_scale(b_scale)
622 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
623 }
624 }
625 }
626
627 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, y_scale) {
628 TEST_REQUIRES_ARM_NEON;
629 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
630 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
631 VAddMicrokernelTester()
632 .batch_size(batch_size)
633 .y_scale(y_scale)
634 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
635 }
636 }
637 }
638
Marat Dukhaneb3cff32021-07-30 11:35:27 -0700639 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, qmin) {
640 TEST_REQUIRES_ARM_NEON;
641 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
642 VAddMicrokernelTester()
643 .batch_size(batch_size)
644 .qmin(128)
645 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
646 }
647 }
648
649 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, qmax) {
650 TEST_REQUIRES_ARM_NEON;
651 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
652 VAddMicrokernelTester()
653 .batch_size(batch_size)
654 .qmax(128)
655 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
656 }
657 }
658#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
659
660
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700661#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan76e78c82021-07-20 21:11:23 -0700662 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700663 TEST_REQUIRES_X86_SSE2;
664 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700665 .batch_size(8)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700666 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700667 }
668
Marat Dukhan76e78c82021-07-20 21:11:23 -0700669 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700670 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700671 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700672 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700673 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700674 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700675 }
676 }
677
Marat Dukhan76e78c82021-07-20 21:11:23 -0700678 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700679 TEST_REQUIRES_X86_SSE2;
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700680 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700681 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700682 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700683 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700684 }
685 }
686
Marat Dukhan76e78c82021-07-20 21:11:23 -0700687 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700688 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700689 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700690 VAddMicrokernelTester()
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700691 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700692 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700693 }
694 }
695
Marat Dukhan76e78c82021-07-20 21:11:23 -0700696 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a) {
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700697 TEST_REQUIRES_X86_SSE2;
698 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
699 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700700 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700701 .inplace_a(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700702 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700703 }
704 }
705
Marat Dukhan76e78c82021-07-20 21:11:23 -0700706 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_b) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700707 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700708 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700709 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700710 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700711 .inplace_b(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700712 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700713 }
714 }
715
Marat Dukhan76e78c82021-07-20 21:11:23 -0700716 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700717 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700718 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700719 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700720 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700721 .inplace_a(true)
722 .inplace_b(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700723 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700724 }
725 }
726
Marat Dukhan87bd5112021-08-02 11:43:53 -0700727 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
728 TEST_REQUIRES_X86_SSE2;
729 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
730 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
731 VAddMicrokernelTester()
732 .batch_size(batch_size)
733 .a_zero_point(a_zero_point)
734 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
735 }
736 }
737 }
738
739 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
740 TEST_REQUIRES_X86_SSE2;
741 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
742 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
743 VAddMicrokernelTester()
744 .batch_size(batch_size)
745 .b_zero_point(b_zero_point)
746 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
747 }
748 }
749 }
750
751 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
752 TEST_REQUIRES_X86_SSE2;
753 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
754 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
755 VAddMicrokernelTester()
756 .batch_size(batch_size)
757 .y_zero_point(y_zero_point)
758 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
759 }
760 }
761 }
762
763 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
764 TEST_REQUIRES_X86_SSE2;
765 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
766 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
767 VAddMicrokernelTester()
768 .batch_size(batch_size)
769 .a_scale(a_scale)
770 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
771 }
772 }
773 }
774
775 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
776 TEST_REQUIRES_X86_SSE2;
777 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
778 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
779 VAddMicrokernelTester()
780 .batch_size(batch_size)
781 .b_scale(b_scale)
782 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
783 }
784 }
785 }
786
787 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
788 TEST_REQUIRES_X86_SSE2;
789 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
790 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
791 VAddMicrokernelTester()
792 .batch_size(batch_size)
793 .y_scale(y_scale)
794 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
795 }
796 }
797 }
798
Marat Dukhan76e78c82021-07-20 21:11:23 -0700799 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700800 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700801 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700802 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700803 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700804 .qmin(128)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700805 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700806 }
807 }
808
Marat Dukhan76e78c82021-07-20 21:11:23 -0700809 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700810 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700811 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700812 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700813 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700814 .qmax(128)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700815 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700816 }
817 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700818#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700819
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700820
Marat Dukhan76e78c82021-07-20 21:11:23 -0700821#if XNN_ARCH_X86 || XNN_ARCH_X86_64
822 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
823 TEST_REQUIRES_X86_SSE2;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700824 VAddMicrokernelTester()
Marat Dukhan76e78c82021-07-20 21:11:23 -0700825 .batch_size(16)
826 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700827 }
828
Marat Dukhan76e78c82021-07-20 21:11:23 -0700829 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
830 TEST_REQUIRES_X86_SSE2;
831 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700832 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700833 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700834 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700835 }
836 }
837
Marat Dukhan76e78c82021-07-20 21:11:23 -0700838 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
839 TEST_REQUIRES_X86_SSE2;
840 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700841 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700842 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700843 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700844 }
845 }
846
Marat Dukhan76e78c82021-07-20 21:11:23 -0700847 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
848 TEST_REQUIRES_X86_SSE2;
849 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700850 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700851 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700852 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700853 }
854 }
855
Marat Dukhan76e78c82021-07-20 21:11:23 -0700856 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a) {
857 TEST_REQUIRES_X86_SSE2;
858 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700859 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700860 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700861 .inplace_a(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700862 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700863 }
864 }
865
Marat Dukhan76e78c82021-07-20 21:11:23 -0700866 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_b) {
867 TEST_REQUIRES_X86_SSE2;
868 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700869 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700870 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700871 .inplace_b(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700872 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700873 }
874 }
875
Marat Dukhan76e78c82021-07-20 21:11:23 -0700876 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
877 TEST_REQUIRES_X86_SSE2;
878 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700879 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700880 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700881 .inplace_a(true)
882 .inplace_b(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700883 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700884 }
885 }
886
Marat Dukhan87bd5112021-08-02 11:43:53 -0700887 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
888 TEST_REQUIRES_X86_SSE2;
889 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
890 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
891 VAddMicrokernelTester()
892 .batch_size(batch_size)
893 .a_zero_point(a_zero_point)
894 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
895 }
896 }
897 }
898
899 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
900 TEST_REQUIRES_X86_SSE2;
901 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
902 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
903 VAddMicrokernelTester()
904 .batch_size(batch_size)
905 .b_zero_point(b_zero_point)
906 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
907 }
908 }
909 }
910
911 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
912 TEST_REQUIRES_X86_SSE2;
913 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
914 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
915 VAddMicrokernelTester()
916 .batch_size(batch_size)
917 .y_zero_point(y_zero_point)
918 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
919 }
920 }
921 }
922
923 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
924 TEST_REQUIRES_X86_SSE2;
925 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
926 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
927 VAddMicrokernelTester()
928 .batch_size(batch_size)
929 .a_scale(a_scale)
930 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
931 }
932 }
933 }
934
935 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
936 TEST_REQUIRES_X86_SSE2;
937 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
938 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
939 VAddMicrokernelTester()
940 .batch_size(batch_size)
941 .b_scale(b_scale)
942 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
943 }
944 }
945 }
946
947 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
948 TEST_REQUIRES_X86_SSE2;
949 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
950 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
951 VAddMicrokernelTester()
952 .batch_size(batch_size)
953 .y_scale(y_scale)
954 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
955 }
956 }
957 }
958
Marat Dukhan76e78c82021-07-20 21:11:23 -0700959 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
960 TEST_REQUIRES_X86_SSE2;
961 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700962 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700963 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700964 .qmin(128)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700965 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700966 }
967 }
968
Marat Dukhan76e78c82021-07-20 21:11:23 -0700969 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
970 TEST_REQUIRES_X86_SSE2;
971 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700972 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700973 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700974 .qmax(128)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700975 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700976 }
977 }
Marat Dukhan76e78c82021-07-20 21:11:23 -0700978#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
979
980
Marat Dukhan3eac69c2021-07-21 01:42:29 -0700981#if XNN_ARCH_X86 || XNN_ARCH_X86_64
982 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
983 TEST_REQUIRES_X86_SSE41;
984 VAddMicrokernelTester()
985 .batch_size(8)
986 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
987 }
988
989 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
990 TEST_REQUIRES_X86_SSE41;
991 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
992 VAddMicrokernelTester()
993 .batch_size(batch_size)
994 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
995 }
996 }
997
998 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
999 TEST_REQUIRES_X86_SSE41;
1000 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1001 VAddMicrokernelTester()
1002 .batch_size(batch_size)
1003 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1004 }
1005 }
1006
1007 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1008 TEST_REQUIRES_X86_SSE41;
1009 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1010 VAddMicrokernelTester()
1011 .batch_size(batch_size)
1012 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1013 }
1014 }
1015
1016 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a) {
1017 TEST_REQUIRES_X86_SSE41;
1018 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1019 VAddMicrokernelTester()
1020 .batch_size(batch_size)
1021 .inplace_a(true)
1022 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1023 }
1024 }
1025
1026 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_b) {
1027 TEST_REQUIRES_X86_SSE41;
1028 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1029 VAddMicrokernelTester()
1030 .batch_size(batch_size)
1031 .inplace_b(true)
1032 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1033 }
1034 }
1035
1036 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1037 TEST_REQUIRES_X86_SSE41;
1038 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1039 VAddMicrokernelTester()
1040 .batch_size(batch_size)
1041 .inplace_a(true)
1042 .inplace_b(true)
1043 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1044 }
1045 }
1046
Marat Dukhan87bd5112021-08-02 11:43:53 -07001047 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1048 TEST_REQUIRES_X86_SSE41;
1049 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1050 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1051 VAddMicrokernelTester()
1052 .batch_size(batch_size)
1053 .a_zero_point(a_zero_point)
1054 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1055 }
1056 }
1057 }
1058
1059 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1060 TEST_REQUIRES_X86_SSE41;
1061 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1062 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1063 VAddMicrokernelTester()
1064 .batch_size(batch_size)
1065 .b_zero_point(b_zero_point)
1066 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1067 }
1068 }
1069 }
1070
1071 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1072 TEST_REQUIRES_X86_SSE41;
1073 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1074 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1075 VAddMicrokernelTester()
1076 .batch_size(batch_size)
1077 .y_zero_point(y_zero_point)
1078 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1079 }
1080 }
1081 }
1082
1083 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1084 TEST_REQUIRES_X86_SSE41;
1085 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1086 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1087 VAddMicrokernelTester()
1088 .batch_size(batch_size)
1089 .a_scale(a_scale)
1090 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1091 }
1092 }
1093 }
1094
1095 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1096 TEST_REQUIRES_X86_SSE41;
1097 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1098 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1099 VAddMicrokernelTester()
1100 .batch_size(batch_size)
1101 .b_scale(b_scale)
1102 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1103 }
1104 }
1105 }
1106
1107 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1108 TEST_REQUIRES_X86_SSE41;
1109 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1110 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1111 VAddMicrokernelTester()
1112 .batch_size(batch_size)
1113 .y_scale(y_scale)
1114 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1115 }
1116 }
1117 }
1118
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001119 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1120 TEST_REQUIRES_X86_SSE41;
1121 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1122 VAddMicrokernelTester()
1123 .batch_size(batch_size)
1124 .qmin(128)
1125 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1126 }
1127 }
1128
1129 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1130 TEST_REQUIRES_X86_SSE41;
1131 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1132 VAddMicrokernelTester()
1133 .batch_size(batch_size)
1134 .qmax(128)
1135 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1136 }
1137 }
1138#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1139
1140
1141#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1142 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1143 TEST_REQUIRES_X86_SSE41;
1144 VAddMicrokernelTester()
1145 .batch_size(16)
1146 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1147 }
1148
1149 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1150 TEST_REQUIRES_X86_SSE41;
1151 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1152 VAddMicrokernelTester()
1153 .batch_size(batch_size)
1154 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1155 }
1156 }
1157
1158 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1159 TEST_REQUIRES_X86_SSE41;
1160 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1161 VAddMicrokernelTester()
1162 .batch_size(batch_size)
1163 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1164 }
1165 }
1166
1167 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1168 TEST_REQUIRES_X86_SSE41;
1169 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1170 VAddMicrokernelTester()
1171 .batch_size(batch_size)
1172 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1173 }
1174 }
1175
1176 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a) {
1177 TEST_REQUIRES_X86_SSE41;
1178 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1179 VAddMicrokernelTester()
1180 .batch_size(batch_size)
1181 .inplace_a(true)
1182 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1183 }
1184 }
1185
1186 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_b) {
1187 TEST_REQUIRES_X86_SSE41;
1188 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1189 VAddMicrokernelTester()
1190 .batch_size(batch_size)
1191 .inplace_b(true)
1192 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1193 }
1194 }
1195
1196 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1197 TEST_REQUIRES_X86_SSE41;
1198 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1199 VAddMicrokernelTester()
1200 .batch_size(batch_size)
1201 .inplace_a(true)
1202 .inplace_b(true)
1203 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1204 }
1205 }
1206
Marat Dukhan87bd5112021-08-02 11:43:53 -07001207 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1208 TEST_REQUIRES_X86_SSE41;
1209 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1210 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1211 VAddMicrokernelTester()
1212 .batch_size(batch_size)
1213 .a_zero_point(a_zero_point)
1214 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1215 }
1216 }
1217 }
1218
1219 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1220 TEST_REQUIRES_X86_SSE41;
1221 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1222 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1223 VAddMicrokernelTester()
1224 .batch_size(batch_size)
1225 .b_zero_point(b_zero_point)
1226 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1227 }
1228 }
1229 }
1230
1231 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1232 TEST_REQUIRES_X86_SSE41;
1233 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1234 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1235 VAddMicrokernelTester()
1236 .batch_size(batch_size)
1237 .y_zero_point(y_zero_point)
1238 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1239 }
1240 }
1241 }
1242
1243 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1244 TEST_REQUIRES_X86_SSE41;
1245 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1246 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1247 VAddMicrokernelTester()
1248 .batch_size(batch_size)
1249 .a_scale(a_scale)
1250 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1251 }
1252 }
1253 }
1254
1255 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1256 TEST_REQUIRES_X86_SSE41;
1257 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1258 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1259 VAddMicrokernelTester()
1260 .batch_size(batch_size)
1261 .b_scale(b_scale)
1262 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1263 }
1264 }
1265 }
1266
1267 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1268 TEST_REQUIRES_X86_SSE41;
1269 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1270 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1271 VAddMicrokernelTester()
1272 .batch_size(batch_size)
1273 .y_scale(y_scale)
1274 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1275 }
1276 }
1277 }
1278
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001279 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1280 TEST_REQUIRES_X86_SSE41;
1281 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1282 VAddMicrokernelTester()
1283 .batch_size(batch_size)
1284 .qmin(128)
1285 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1286 }
1287 }
1288
1289 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1290 TEST_REQUIRES_X86_SSE41;
1291 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1292 VAddMicrokernelTester()
1293 .batch_size(batch_size)
1294 .qmax(128)
1295 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1296 }
1297 }
1298#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1299
1300
1301#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1302 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_eq_8) {
1303 TEST_REQUIRES_X86_AVX;
1304 VAddMicrokernelTester()
1305 .batch_size(8)
1306 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1307 }
1308
1309 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_div_8) {
1310 TEST_REQUIRES_X86_AVX;
1311 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1312 VAddMicrokernelTester()
1313 .batch_size(batch_size)
1314 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1315 }
1316 }
1317
1318 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_lt_8) {
1319 TEST_REQUIRES_X86_AVX;
1320 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1321 VAddMicrokernelTester()
1322 .batch_size(batch_size)
1323 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1324 }
1325 }
1326
1327 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_gt_8) {
1328 TEST_REQUIRES_X86_AVX;
1329 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1330 VAddMicrokernelTester()
1331 .batch_size(batch_size)
1332 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1333 }
1334 }
1335
1336 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a) {
1337 TEST_REQUIRES_X86_AVX;
1338 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1339 VAddMicrokernelTester()
1340 .batch_size(batch_size)
1341 .inplace_a(true)
1342 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1343 }
1344 }
1345
1346 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_b) {
1347 TEST_REQUIRES_X86_AVX;
1348 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1349 VAddMicrokernelTester()
1350 .batch_size(batch_size)
1351 .inplace_b(true)
1352 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1353 }
1354 }
1355
1356 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a_and_b) {
1357 TEST_REQUIRES_X86_AVX;
1358 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1359 VAddMicrokernelTester()
1360 .batch_size(batch_size)
1361 .inplace_a(true)
1362 .inplace_b(true)
1363 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1364 }
1365 }
1366
Marat Dukhan87bd5112021-08-02 11:43:53 -07001367 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_zero_point) {
1368 TEST_REQUIRES_X86_AVX;
1369 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1370 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1371 VAddMicrokernelTester()
1372 .batch_size(batch_size)
1373 .a_zero_point(a_zero_point)
1374 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1375 }
1376 }
1377 }
1378
1379 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_zero_point) {
1380 TEST_REQUIRES_X86_AVX;
1381 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1382 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1383 VAddMicrokernelTester()
1384 .batch_size(batch_size)
1385 .b_zero_point(b_zero_point)
1386 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1387 }
1388 }
1389 }
1390
1391 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_zero_point) {
1392 TEST_REQUIRES_X86_AVX;
1393 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1394 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1395 VAddMicrokernelTester()
1396 .batch_size(batch_size)
1397 .y_zero_point(y_zero_point)
1398 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1399 }
1400 }
1401 }
1402
1403 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_scale) {
1404 TEST_REQUIRES_X86_AVX;
1405 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1406 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1407 VAddMicrokernelTester()
1408 .batch_size(batch_size)
1409 .a_scale(a_scale)
1410 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1411 }
1412 }
1413 }
1414
1415 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_scale) {
1416 TEST_REQUIRES_X86_AVX;
1417 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1418 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1419 VAddMicrokernelTester()
1420 .batch_size(batch_size)
1421 .b_scale(b_scale)
1422 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1423 }
1424 }
1425 }
1426
1427 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_scale) {
1428 TEST_REQUIRES_X86_AVX;
1429 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1430 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1431 VAddMicrokernelTester()
1432 .batch_size(batch_size)
1433 .y_scale(y_scale)
1434 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1435 }
1436 }
1437 }
1438
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001439 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmin) {
1440 TEST_REQUIRES_X86_AVX;
1441 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1442 VAddMicrokernelTester()
1443 .batch_size(batch_size)
1444 .qmin(128)
1445 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1446 }
1447 }
1448
1449 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmax) {
1450 TEST_REQUIRES_X86_AVX;
1451 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1452 VAddMicrokernelTester()
1453 .batch_size(batch_size)
1454 .qmax(128)
1455 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1456 }
1457 }
1458#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1459
1460
1461#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1462 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_eq_16) {
1463 TEST_REQUIRES_X86_AVX;
1464 VAddMicrokernelTester()
1465 .batch_size(16)
1466 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1467 }
1468
1469 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_div_16) {
1470 TEST_REQUIRES_X86_AVX;
1471 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1472 VAddMicrokernelTester()
1473 .batch_size(batch_size)
1474 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1475 }
1476 }
1477
1478 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_lt_16) {
1479 TEST_REQUIRES_X86_AVX;
1480 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1481 VAddMicrokernelTester()
1482 .batch_size(batch_size)
1483 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1484 }
1485 }
1486
1487 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_gt_16) {
1488 TEST_REQUIRES_X86_AVX;
1489 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1490 VAddMicrokernelTester()
1491 .batch_size(batch_size)
1492 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1493 }
1494 }
1495
1496 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a) {
1497 TEST_REQUIRES_X86_AVX;
1498 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1499 VAddMicrokernelTester()
1500 .batch_size(batch_size)
1501 .inplace_a(true)
1502 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1503 }
1504 }
1505
1506 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_b) {
1507 TEST_REQUIRES_X86_AVX;
1508 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1509 VAddMicrokernelTester()
1510 .batch_size(batch_size)
1511 .inplace_b(true)
1512 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1513 }
1514 }
1515
1516 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a_and_b) {
1517 TEST_REQUIRES_X86_AVX;
1518 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1519 VAddMicrokernelTester()
1520 .batch_size(batch_size)
1521 .inplace_a(true)
1522 .inplace_b(true)
1523 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1524 }
1525 }
1526
Marat Dukhan87bd5112021-08-02 11:43:53 -07001527 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_zero_point) {
1528 TEST_REQUIRES_X86_AVX;
1529 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1530 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1531 VAddMicrokernelTester()
1532 .batch_size(batch_size)
1533 .a_zero_point(a_zero_point)
1534 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1535 }
1536 }
1537 }
1538
1539 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_zero_point) {
1540 TEST_REQUIRES_X86_AVX;
1541 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1542 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1543 VAddMicrokernelTester()
1544 .batch_size(batch_size)
1545 .b_zero_point(b_zero_point)
1546 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1547 }
1548 }
1549 }
1550
1551 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_zero_point) {
1552 TEST_REQUIRES_X86_AVX;
1553 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1554 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1555 VAddMicrokernelTester()
1556 .batch_size(batch_size)
1557 .y_zero_point(y_zero_point)
1558 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1559 }
1560 }
1561 }
1562
1563 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_scale) {
1564 TEST_REQUIRES_X86_AVX;
1565 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1566 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1567 VAddMicrokernelTester()
1568 .batch_size(batch_size)
1569 .a_scale(a_scale)
1570 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1571 }
1572 }
1573 }
1574
1575 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_scale) {
1576 TEST_REQUIRES_X86_AVX;
1577 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1578 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1579 VAddMicrokernelTester()
1580 .batch_size(batch_size)
1581 .b_scale(b_scale)
1582 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1583 }
1584 }
1585 }
1586
1587 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_scale) {
1588 TEST_REQUIRES_X86_AVX;
1589 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1590 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1591 VAddMicrokernelTester()
1592 .batch_size(batch_size)
1593 .y_scale(y_scale)
1594 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1595 }
1596 }
1597 }
1598
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001599 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmin) {
1600 TEST_REQUIRES_X86_AVX;
1601 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1602 VAddMicrokernelTester()
1603 .batch_size(batch_size)
1604 .qmin(128)
1605 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1606 }
1607 }
1608
1609 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmax) {
1610 TEST_REQUIRES_X86_AVX;
1611 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1612 VAddMicrokernelTester()
1613 .batch_size(batch_size)
1614 .qmax(128)
1615 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1616 }
1617 }
1618#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1619
1620
1621#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1622 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
1623 TEST_REQUIRES_X86_SSE41;
1624 VAddMicrokernelTester()
1625 .batch_size(8)
1626 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1627 }
1628
1629 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
1630 TEST_REQUIRES_X86_SSE41;
1631 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1632 VAddMicrokernelTester()
1633 .batch_size(batch_size)
1634 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1635 }
1636 }
1637
1638 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
1639 TEST_REQUIRES_X86_SSE41;
1640 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1641 VAddMicrokernelTester()
1642 .batch_size(batch_size)
1643 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1644 }
1645 }
1646
1647 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
1648 TEST_REQUIRES_X86_SSE41;
1649 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1650 VAddMicrokernelTester()
1651 .batch_size(batch_size)
1652 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1653 }
1654 }
1655
1656 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a) {
1657 TEST_REQUIRES_X86_SSE41;
1658 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1659 VAddMicrokernelTester()
1660 .batch_size(batch_size)
1661 .inplace_a(true)
1662 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1663 }
1664 }
1665
1666 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_b) {
1667 TEST_REQUIRES_X86_SSE41;
1668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1669 VAddMicrokernelTester()
1670 .batch_size(batch_size)
1671 .inplace_b(true)
1672 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1673 }
1674 }
1675
1676 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a_and_b) {
1677 TEST_REQUIRES_X86_SSE41;
1678 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1679 VAddMicrokernelTester()
1680 .batch_size(batch_size)
1681 .inplace_a(true)
1682 .inplace_b(true)
1683 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1684 }
1685 }
1686
Marat Dukhan87bd5112021-08-02 11:43:53 -07001687 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
1688 TEST_REQUIRES_X86_SSE41;
1689 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1690 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1691 VAddMicrokernelTester()
1692 .batch_size(batch_size)
1693 .a_zero_point(a_zero_point)
1694 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1695 }
1696 }
1697 }
1698
1699 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
1700 TEST_REQUIRES_X86_SSE41;
1701 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1702 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1703 VAddMicrokernelTester()
1704 .batch_size(batch_size)
1705 .b_zero_point(b_zero_point)
1706 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1707 }
1708 }
1709 }
1710
1711 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
1712 TEST_REQUIRES_X86_SSE41;
1713 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1714 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1715 VAddMicrokernelTester()
1716 .batch_size(batch_size)
1717 .y_zero_point(y_zero_point)
1718 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1719 }
1720 }
1721 }
1722
1723 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
1724 TEST_REQUIRES_X86_SSE41;
1725 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1727 VAddMicrokernelTester()
1728 .batch_size(batch_size)
1729 .a_scale(a_scale)
1730 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1731 }
1732 }
1733 }
1734
1735 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
1736 TEST_REQUIRES_X86_SSE41;
1737 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1738 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1739 VAddMicrokernelTester()
1740 .batch_size(batch_size)
1741 .b_scale(b_scale)
1742 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1743 }
1744 }
1745 }
1746
1747 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
1748 TEST_REQUIRES_X86_SSE41;
1749 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1750 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1751 VAddMicrokernelTester()
1752 .batch_size(batch_size)
1753 .y_scale(y_scale)
1754 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1755 }
1756 }
1757 }
1758
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001759 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
1760 TEST_REQUIRES_X86_SSE41;
1761 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1762 VAddMicrokernelTester()
1763 .batch_size(batch_size)
1764 .qmin(128)
1765 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1766 }
1767 }
1768
1769 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
1770 TEST_REQUIRES_X86_SSE41;
1771 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772 VAddMicrokernelTester()
1773 .batch_size(batch_size)
1774 .qmax(128)
1775 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1776 }
1777 }
1778#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779
1780
1781#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1782 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
1783 TEST_REQUIRES_X86_SSE41;
1784 VAddMicrokernelTester()
1785 .batch_size(16)
1786 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1787 }
1788
1789 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
1790 TEST_REQUIRES_X86_SSE41;
1791 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1792 VAddMicrokernelTester()
1793 .batch_size(batch_size)
1794 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1795 }
1796 }
1797
1798 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
1799 TEST_REQUIRES_X86_SSE41;
1800 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1801 VAddMicrokernelTester()
1802 .batch_size(batch_size)
1803 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1804 }
1805 }
1806
1807 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
1808 TEST_REQUIRES_X86_SSE41;
1809 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1810 VAddMicrokernelTester()
1811 .batch_size(batch_size)
1812 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1813 }
1814 }
1815
1816 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a) {
1817 TEST_REQUIRES_X86_SSE41;
1818 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1819 VAddMicrokernelTester()
1820 .batch_size(batch_size)
1821 .inplace_a(true)
1822 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1823 }
1824 }
1825
1826 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_b) {
1827 TEST_REQUIRES_X86_SSE41;
1828 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1829 VAddMicrokernelTester()
1830 .batch_size(batch_size)
1831 .inplace_b(true)
1832 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1833 }
1834 }
1835
1836 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a_and_b) {
1837 TEST_REQUIRES_X86_SSE41;
1838 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1839 VAddMicrokernelTester()
1840 .batch_size(batch_size)
1841 .inplace_a(true)
1842 .inplace_b(true)
1843 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1844 }
1845 }
1846
Marat Dukhan87bd5112021-08-02 11:43:53 -07001847 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
1848 TEST_REQUIRES_X86_SSE41;
1849 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1850 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1851 VAddMicrokernelTester()
1852 .batch_size(batch_size)
1853 .a_zero_point(a_zero_point)
1854 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1855 }
1856 }
1857 }
1858
1859 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
1860 TEST_REQUIRES_X86_SSE41;
1861 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1862 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1863 VAddMicrokernelTester()
1864 .batch_size(batch_size)
1865 .b_zero_point(b_zero_point)
1866 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1867 }
1868 }
1869 }
1870
1871 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
1872 TEST_REQUIRES_X86_SSE41;
1873 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1874 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1875 VAddMicrokernelTester()
1876 .batch_size(batch_size)
1877 .y_zero_point(y_zero_point)
1878 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1879 }
1880 }
1881 }
1882
1883 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
1884 TEST_REQUIRES_X86_SSE41;
1885 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1886 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1887 VAddMicrokernelTester()
1888 .batch_size(batch_size)
1889 .a_scale(a_scale)
1890 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1891 }
1892 }
1893 }
1894
1895 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
1896 TEST_REQUIRES_X86_SSE41;
1897 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1898 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1899 VAddMicrokernelTester()
1900 .batch_size(batch_size)
1901 .b_scale(b_scale)
1902 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1903 }
1904 }
1905 }
1906
1907 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
1908 TEST_REQUIRES_X86_SSE41;
1909 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1910 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1911 VAddMicrokernelTester()
1912 .batch_size(batch_size)
1913 .y_scale(y_scale)
1914 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1915 }
1916 }
1917 }
1918
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001919 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
1920 TEST_REQUIRES_X86_SSE41;
1921 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1922 VAddMicrokernelTester()
1923 .batch_size(batch_size)
1924 .qmin(128)
1925 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1926 }
1927 }
1928
1929 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
1930 TEST_REQUIRES_X86_SSE41;
1931 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932 VAddMicrokernelTester()
1933 .batch_size(batch_size)
1934 .qmax(128)
1935 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1936 }
1937 }
1938#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1939
1940
1941#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1942 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_eq_8) {
1943 TEST_REQUIRES_X86_AVX;
1944 VAddMicrokernelTester()
1945 .batch_size(8)
1946 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1947 }
1948
1949 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_div_8) {
1950 TEST_REQUIRES_X86_AVX;
1951 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1952 VAddMicrokernelTester()
1953 .batch_size(batch_size)
1954 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1955 }
1956 }
1957
1958 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_lt_8) {
1959 TEST_REQUIRES_X86_AVX;
1960 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1961 VAddMicrokernelTester()
1962 .batch_size(batch_size)
1963 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1964 }
1965 }
1966
1967 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_gt_8) {
1968 TEST_REQUIRES_X86_AVX;
1969 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1970 VAddMicrokernelTester()
1971 .batch_size(batch_size)
1972 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1973 }
1974 }
1975
1976 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a) {
1977 TEST_REQUIRES_X86_AVX;
1978 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1979 VAddMicrokernelTester()
1980 .batch_size(batch_size)
1981 .inplace_a(true)
1982 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1983 }
1984 }
1985
1986 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_b) {
1987 TEST_REQUIRES_X86_AVX;
1988 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1989 VAddMicrokernelTester()
1990 .batch_size(batch_size)
1991 .inplace_b(true)
1992 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1993 }
1994 }
1995
1996 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a_and_b) {
1997 TEST_REQUIRES_X86_AVX;
1998 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1999 VAddMicrokernelTester()
2000 .batch_size(batch_size)
2001 .inplace_a(true)
2002 .inplace_b(true)
2003 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2004 }
2005 }
2006
Marat Dukhan87bd5112021-08-02 11:43:53 -07002007 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_zero_point) {
2008 TEST_REQUIRES_X86_AVX;
2009 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2010 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2011 VAddMicrokernelTester()
2012 .batch_size(batch_size)
2013 .a_zero_point(a_zero_point)
2014 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2015 }
2016 }
2017 }
2018
2019 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_zero_point) {
2020 TEST_REQUIRES_X86_AVX;
2021 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2022 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2023 VAddMicrokernelTester()
2024 .batch_size(batch_size)
2025 .b_zero_point(b_zero_point)
2026 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2027 }
2028 }
2029 }
2030
2031 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_zero_point) {
2032 TEST_REQUIRES_X86_AVX;
2033 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2034 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2035 VAddMicrokernelTester()
2036 .batch_size(batch_size)
2037 .y_zero_point(y_zero_point)
2038 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2039 }
2040 }
2041 }
2042
2043 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_scale) {
2044 TEST_REQUIRES_X86_AVX;
2045 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2046 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2047 VAddMicrokernelTester()
2048 .batch_size(batch_size)
2049 .a_scale(a_scale)
2050 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2051 }
2052 }
2053 }
2054
2055 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_scale) {
2056 TEST_REQUIRES_X86_AVX;
2057 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2058 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2059 VAddMicrokernelTester()
2060 .batch_size(batch_size)
2061 .b_scale(b_scale)
2062 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2063 }
2064 }
2065 }
2066
2067 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_scale) {
2068 TEST_REQUIRES_X86_AVX;
2069 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2070 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2071 VAddMicrokernelTester()
2072 .batch_size(batch_size)
2073 .y_scale(y_scale)
2074 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2075 }
2076 }
2077 }
2078
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002079 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmin) {
2080 TEST_REQUIRES_X86_AVX;
2081 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2082 VAddMicrokernelTester()
2083 .batch_size(batch_size)
2084 .qmin(128)
2085 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2086 }
2087 }
2088
2089 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmax) {
2090 TEST_REQUIRES_X86_AVX;
2091 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2092 VAddMicrokernelTester()
2093 .batch_size(batch_size)
2094 .qmax(128)
2095 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2096 }
2097 }
2098#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2099
2100
2101#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2102 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_eq_16) {
2103 TEST_REQUIRES_X86_AVX;
2104 VAddMicrokernelTester()
2105 .batch_size(16)
2106 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2107 }
2108
2109 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_div_16) {
2110 TEST_REQUIRES_X86_AVX;
2111 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2112 VAddMicrokernelTester()
2113 .batch_size(batch_size)
2114 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2115 }
2116 }
2117
2118 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_lt_16) {
2119 TEST_REQUIRES_X86_AVX;
2120 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2121 VAddMicrokernelTester()
2122 .batch_size(batch_size)
2123 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2124 }
2125 }
2126
2127 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_gt_16) {
2128 TEST_REQUIRES_X86_AVX;
2129 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2130 VAddMicrokernelTester()
2131 .batch_size(batch_size)
2132 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2133 }
2134 }
2135
2136 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a) {
2137 TEST_REQUIRES_X86_AVX;
2138 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2139 VAddMicrokernelTester()
2140 .batch_size(batch_size)
2141 .inplace_a(true)
2142 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2143 }
2144 }
2145
2146 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_b) {
2147 TEST_REQUIRES_X86_AVX;
2148 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2149 VAddMicrokernelTester()
2150 .batch_size(batch_size)
2151 .inplace_b(true)
2152 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2153 }
2154 }
2155
2156 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a_and_b) {
2157 TEST_REQUIRES_X86_AVX;
2158 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2159 VAddMicrokernelTester()
2160 .batch_size(batch_size)
2161 .inplace_a(true)
2162 .inplace_b(true)
2163 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2164 }
2165 }
2166
Marat Dukhan87bd5112021-08-02 11:43:53 -07002167 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_zero_point) {
2168 TEST_REQUIRES_X86_AVX;
2169 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2170 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2171 VAddMicrokernelTester()
2172 .batch_size(batch_size)
2173 .a_zero_point(a_zero_point)
2174 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2175 }
2176 }
2177 }
2178
2179 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_zero_point) {
2180 TEST_REQUIRES_X86_AVX;
2181 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2182 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2183 VAddMicrokernelTester()
2184 .batch_size(batch_size)
2185 .b_zero_point(b_zero_point)
2186 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2187 }
2188 }
2189 }
2190
2191 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_zero_point) {
2192 TEST_REQUIRES_X86_AVX;
2193 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2194 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2195 VAddMicrokernelTester()
2196 .batch_size(batch_size)
2197 .y_zero_point(y_zero_point)
2198 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2199 }
2200 }
2201 }
2202
2203 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_scale) {
2204 TEST_REQUIRES_X86_AVX;
2205 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2206 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2207 VAddMicrokernelTester()
2208 .batch_size(batch_size)
2209 .a_scale(a_scale)
2210 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2211 }
2212 }
2213 }
2214
2215 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_scale) {
2216 TEST_REQUIRES_X86_AVX;
2217 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2218 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2219 VAddMicrokernelTester()
2220 .batch_size(batch_size)
2221 .b_scale(b_scale)
2222 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2223 }
2224 }
2225 }
2226
2227 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_scale) {
2228 TEST_REQUIRES_X86_AVX;
2229 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2230 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2231 VAddMicrokernelTester()
2232 .batch_size(batch_size)
2233 .y_scale(y_scale)
2234 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2235 }
2236 }
2237 }
2238
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002239 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmin) {
2240 TEST_REQUIRES_X86_AVX;
2241 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2242 VAddMicrokernelTester()
2243 .batch_size(batch_size)
2244 .qmin(128)
2245 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2246 }
2247 }
2248
2249 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmax) {
2250 TEST_REQUIRES_X86_AVX;
2251 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2252 VAddMicrokernelTester()
2253 .batch_size(batch_size)
2254 .qmax(128)
2255 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2256 }
2257 }
2258#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2259
2260
2261#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2262 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
2263 TEST_REQUIRES_X86_XOP;
2264 VAddMicrokernelTester()
2265 .batch_size(8)
2266 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2267 }
2268
2269 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
2270 TEST_REQUIRES_X86_XOP;
2271 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2272 VAddMicrokernelTester()
2273 .batch_size(batch_size)
2274 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2275 }
2276 }
2277
2278 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
2279 TEST_REQUIRES_X86_XOP;
2280 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2281 VAddMicrokernelTester()
2282 .batch_size(batch_size)
2283 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2284 }
2285 }
2286
2287 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
2288 TEST_REQUIRES_X86_XOP;
2289 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2290 VAddMicrokernelTester()
2291 .batch_size(batch_size)
2292 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2293 }
2294 }
2295
2296 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a) {
2297 TEST_REQUIRES_X86_XOP;
2298 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2299 VAddMicrokernelTester()
2300 .batch_size(batch_size)
2301 .inplace_a(true)
2302 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2303 }
2304 }
2305
2306 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_b) {
2307 TEST_REQUIRES_X86_XOP;
2308 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2309 VAddMicrokernelTester()
2310 .batch_size(batch_size)
2311 .inplace_b(true)
2312 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2313 }
2314 }
2315
2316 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a_and_b) {
2317 TEST_REQUIRES_X86_XOP;
2318 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2319 VAddMicrokernelTester()
2320 .batch_size(batch_size)
2321 .inplace_a(true)
2322 .inplace_b(true)
2323 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2324 }
2325 }
2326
Marat Dukhan87bd5112021-08-02 11:43:53 -07002327 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
2328 TEST_REQUIRES_X86_XOP;
2329 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2330 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2331 VAddMicrokernelTester()
2332 .batch_size(batch_size)
2333 .a_zero_point(a_zero_point)
2334 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2335 }
2336 }
2337 }
2338
2339 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
2340 TEST_REQUIRES_X86_XOP;
2341 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2342 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2343 VAddMicrokernelTester()
2344 .batch_size(batch_size)
2345 .b_zero_point(b_zero_point)
2346 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2347 }
2348 }
2349 }
2350
2351 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
2352 TEST_REQUIRES_X86_XOP;
2353 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2354 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2355 VAddMicrokernelTester()
2356 .batch_size(batch_size)
2357 .y_zero_point(y_zero_point)
2358 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2359 }
2360 }
2361 }
2362
2363 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
2364 TEST_REQUIRES_X86_XOP;
2365 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2366 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2367 VAddMicrokernelTester()
2368 .batch_size(batch_size)
2369 .a_scale(a_scale)
2370 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2371 }
2372 }
2373 }
2374
2375 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
2376 TEST_REQUIRES_X86_XOP;
2377 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2378 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2379 VAddMicrokernelTester()
2380 .batch_size(batch_size)
2381 .b_scale(b_scale)
2382 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2383 }
2384 }
2385 }
2386
2387 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
2388 TEST_REQUIRES_X86_XOP;
2389 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2390 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2391 VAddMicrokernelTester()
2392 .batch_size(batch_size)
2393 .y_scale(y_scale)
2394 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2395 }
2396 }
2397 }
2398
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002399 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmin) {
2400 TEST_REQUIRES_X86_XOP;
2401 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2402 VAddMicrokernelTester()
2403 .batch_size(batch_size)
2404 .qmin(128)
2405 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2406 }
2407 }
2408
2409 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmax) {
2410 TEST_REQUIRES_X86_XOP;
2411 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2412 VAddMicrokernelTester()
2413 .batch_size(batch_size)
2414 .qmax(128)
2415 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2416 }
2417 }
2418#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2419
2420
2421#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2422 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
2423 TEST_REQUIRES_X86_XOP;
2424 VAddMicrokernelTester()
2425 .batch_size(16)
2426 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2427 }
2428
2429 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
2430 TEST_REQUIRES_X86_XOP;
2431 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2432 VAddMicrokernelTester()
2433 .batch_size(batch_size)
2434 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2435 }
2436 }
2437
2438 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
2439 TEST_REQUIRES_X86_XOP;
2440 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2441 VAddMicrokernelTester()
2442 .batch_size(batch_size)
2443 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2444 }
2445 }
2446
2447 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
2448 TEST_REQUIRES_X86_XOP;
2449 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2450 VAddMicrokernelTester()
2451 .batch_size(batch_size)
2452 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2453 }
2454 }
2455
2456 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a) {
2457 TEST_REQUIRES_X86_XOP;
2458 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2459 VAddMicrokernelTester()
2460 .batch_size(batch_size)
2461 .inplace_a(true)
2462 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2463 }
2464 }
2465
2466 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_b) {
2467 TEST_REQUIRES_X86_XOP;
2468 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2469 VAddMicrokernelTester()
2470 .batch_size(batch_size)
2471 .inplace_b(true)
2472 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2473 }
2474 }
2475
2476 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a_and_b) {
2477 TEST_REQUIRES_X86_XOP;
2478 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2479 VAddMicrokernelTester()
2480 .batch_size(batch_size)
2481 .inplace_a(true)
2482 .inplace_b(true)
2483 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2484 }
2485 }
2486
Marat Dukhan87bd5112021-08-02 11:43:53 -07002487 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
2488 TEST_REQUIRES_X86_XOP;
2489 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2490 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2491 VAddMicrokernelTester()
2492 .batch_size(batch_size)
2493 .a_zero_point(a_zero_point)
2494 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2495 }
2496 }
2497 }
2498
2499 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
2500 TEST_REQUIRES_X86_XOP;
2501 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2502 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2503 VAddMicrokernelTester()
2504 .batch_size(batch_size)
2505 .b_zero_point(b_zero_point)
2506 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2507 }
2508 }
2509 }
2510
2511 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
2512 TEST_REQUIRES_X86_XOP;
2513 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2514 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2515 VAddMicrokernelTester()
2516 .batch_size(batch_size)
2517 .y_zero_point(y_zero_point)
2518 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2519 }
2520 }
2521 }
2522
2523 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
2524 TEST_REQUIRES_X86_XOP;
2525 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2526 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2527 VAddMicrokernelTester()
2528 .batch_size(batch_size)
2529 .a_scale(a_scale)
2530 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2531 }
2532 }
2533 }
2534
2535 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
2536 TEST_REQUIRES_X86_XOP;
2537 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2538 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2539 VAddMicrokernelTester()
2540 .batch_size(batch_size)
2541 .b_scale(b_scale)
2542 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2543 }
2544 }
2545 }
2546
2547 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
2548 TEST_REQUIRES_X86_XOP;
2549 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2550 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2551 VAddMicrokernelTester()
2552 .batch_size(batch_size)
2553 .y_scale(y_scale)
2554 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2555 }
2556 }
2557 }
2558
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002559 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmin) {
2560 TEST_REQUIRES_X86_XOP;
2561 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2562 VAddMicrokernelTester()
2563 .batch_size(batch_size)
2564 .qmin(128)
2565 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2566 }
2567 }
2568
2569 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmax) {
2570 TEST_REQUIRES_X86_XOP;
2571 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2572 VAddMicrokernelTester()
2573 .batch_size(batch_size)
2574 .qmax(128)
2575 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2576 }
2577 }
2578#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2579
2580
2581#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2582 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
2583 TEST_REQUIRES_X86_AVX2;
2584 VAddMicrokernelTester()
2585 .batch_size(8)
2586 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2587 }
2588
2589 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
2590 TEST_REQUIRES_X86_AVX2;
2591 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2592 VAddMicrokernelTester()
2593 .batch_size(batch_size)
2594 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2595 }
2596 }
2597
2598 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
2599 TEST_REQUIRES_X86_AVX2;
2600 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2601 VAddMicrokernelTester()
2602 .batch_size(batch_size)
2603 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2604 }
2605 }
2606
2607 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
2608 TEST_REQUIRES_X86_AVX2;
2609 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2610 VAddMicrokernelTester()
2611 .batch_size(batch_size)
2612 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2613 }
2614 }
2615
2616 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a) {
2617 TEST_REQUIRES_X86_AVX2;
2618 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2619 VAddMicrokernelTester()
2620 .batch_size(batch_size)
2621 .inplace_a(true)
2622 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2623 }
2624 }
2625
2626 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_b) {
2627 TEST_REQUIRES_X86_AVX2;
2628 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2629 VAddMicrokernelTester()
2630 .batch_size(batch_size)
2631 .inplace_b(true)
2632 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2633 }
2634 }
2635
2636 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a_and_b) {
2637 TEST_REQUIRES_X86_AVX2;
2638 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2639 VAddMicrokernelTester()
2640 .batch_size(batch_size)
2641 .inplace_a(true)
2642 .inplace_b(true)
2643 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2644 }
2645 }
2646
Marat Dukhan87bd5112021-08-02 11:43:53 -07002647 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
2648 TEST_REQUIRES_X86_AVX2;
2649 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2650 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2651 VAddMicrokernelTester()
2652 .batch_size(batch_size)
2653 .a_zero_point(a_zero_point)
2654 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2655 }
2656 }
2657 }
2658
2659 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
2660 TEST_REQUIRES_X86_AVX2;
2661 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2662 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2663 VAddMicrokernelTester()
2664 .batch_size(batch_size)
2665 .b_zero_point(b_zero_point)
2666 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2667 }
2668 }
2669 }
2670
2671 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
2672 TEST_REQUIRES_X86_AVX2;
2673 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2674 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2675 VAddMicrokernelTester()
2676 .batch_size(batch_size)
2677 .y_zero_point(y_zero_point)
2678 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2679 }
2680 }
2681 }
2682
2683 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
2684 TEST_REQUIRES_X86_AVX2;
2685 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2686 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2687 VAddMicrokernelTester()
2688 .batch_size(batch_size)
2689 .a_scale(a_scale)
2690 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2691 }
2692 }
2693 }
2694
2695 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
2696 TEST_REQUIRES_X86_AVX2;
2697 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2698 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2699 VAddMicrokernelTester()
2700 .batch_size(batch_size)
2701 .b_scale(b_scale)
2702 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2703 }
2704 }
2705 }
2706
2707 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
2708 TEST_REQUIRES_X86_AVX2;
2709 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2710 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2711 VAddMicrokernelTester()
2712 .batch_size(batch_size)
2713 .y_scale(y_scale)
2714 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2715 }
2716 }
2717 }
2718
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002719 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
2720 TEST_REQUIRES_X86_AVX2;
2721 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2722 VAddMicrokernelTester()
2723 .batch_size(batch_size)
2724 .qmin(128)
2725 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2726 }
2727 }
2728
2729 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
2730 TEST_REQUIRES_X86_AVX2;
2731 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2732 VAddMicrokernelTester()
2733 .batch_size(batch_size)
2734 .qmax(128)
2735 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2736 }
2737 }
2738#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2739
2740
2741#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2742 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
2743 TEST_REQUIRES_X86_AVX2;
2744 VAddMicrokernelTester()
2745 .batch_size(16)
2746 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2747 }
2748
2749 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
2750 TEST_REQUIRES_X86_AVX2;
2751 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2752 VAddMicrokernelTester()
2753 .batch_size(batch_size)
2754 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2755 }
2756 }
2757
2758 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
2759 TEST_REQUIRES_X86_AVX2;
2760 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2761 VAddMicrokernelTester()
2762 .batch_size(batch_size)
2763 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2764 }
2765 }
2766
2767 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
2768 TEST_REQUIRES_X86_AVX2;
2769 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2770 VAddMicrokernelTester()
2771 .batch_size(batch_size)
2772 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2773 }
2774 }
2775
2776 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a) {
2777 TEST_REQUIRES_X86_AVX2;
2778 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2779 VAddMicrokernelTester()
2780 .batch_size(batch_size)
2781 .inplace_a(true)
2782 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2783 }
2784 }
2785
2786 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_b) {
2787 TEST_REQUIRES_X86_AVX2;
2788 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2789 VAddMicrokernelTester()
2790 .batch_size(batch_size)
2791 .inplace_b(true)
2792 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2793 }
2794 }
2795
2796 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a_and_b) {
2797 TEST_REQUIRES_X86_AVX2;
2798 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2799 VAddMicrokernelTester()
2800 .batch_size(batch_size)
2801 .inplace_a(true)
2802 .inplace_b(true)
2803 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2804 }
2805 }
2806
Marat Dukhan87bd5112021-08-02 11:43:53 -07002807 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
2808 TEST_REQUIRES_X86_AVX2;
2809 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2810 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2811 VAddMicrokernelTester()
2812 .batch_size(batch_size)
2813 .a_zero_point(a_zero_point)
2814 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2815 }
2816 }
2817 }
2818
2819 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
2820 TEST_REQUIRES_X86_AVX2;
2821 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2822 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2823 VAddMicrokernelTester()
2824 .batch_size(batch_size)
2825 .b_zero_point(b_zero_point)
2826 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2827 }
2828 }
2829 }
2830
2831 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
2832 TEST_REQUIRES_X86_AVX2;
2833 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2834 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2835 VAddMicrokernelTester()
2836 .batch_size(batch_size)
2837 .y_zero_point(y_zero_point)
2838 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2839 }
2840 }
2841 }
2842
2843 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
2844 TEST_REQUIRES_X86_AVX2;
2845 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2846 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2847 VAddMicrokernelTester()
2848 .batch_size(batch_size)
2849 .a_scale(a_scale)
2850 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2851 }
2852 }
2853 }
2854
2855 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
2856 TEST_REQUIRES_X86_AVX2;
2857 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2858 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2859 VAddMicrokernelTester()
2860 .batch_size(batch_size)
2861 .b_scale(b_scale)
2862 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2863 }
2864 }
2865 }
2866
2867 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
2868 TEST_REQUIRES_X86_AVX2;
2869 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2870 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2871 VAddMicrokernelTester()
2872 .batch_size(batch_size)
2873 .y_scale(y_scale)
2874 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2875 }
2876 }
2877 }
2878
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002879 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
2880 TEST_REQUIRES_X86_AVX2;
2881 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2882 VAddMicrokernelTester()
2883 .batch_size(batch_size)
2884 .qmin(128)
2885 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2886 }
2887 }
2888
2889 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
2890 TEST_REQUIRES_X86_AVX2;
2891 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2892 VAddMicrokernelTester()
2893 .batch_size(batch_size)
2894 .qmax(128)
2895 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2896 }
2897 }
2898#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2899
2900
Marat Dukhane76049a2021-07-22 14:48:59 -07002901#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2902 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_eq_16) {
2903 TEST_REQUIRES_X86_AVX512SKX;
2904 VAddMicrokernelTester()
2905 .batch_size(16)
2906 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2907 }
2908
2909 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_div_16) {
2910 TEST_REQUIRES_X86_AVX512SKX;
2911 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2912 VAddMicrokernelTester()
2913 .batch_size(batch_size)
2914 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2915 }
2916 }
2917
2918 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_lt_16) {
2919 TEST_REQUIRES_X86_AVX512SKX;
2920 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2921 VAddMicrokernelTester()
2922 .batch_size(batch_size)
2923 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2924 }
2925 }
2926
2927 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_gt_16) {
2928 TEST_REQUIRES_X86_AVX512SKX;
2929 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2930 VAddMicrokernelTester()
2931 .batch_size(batch_size)
2932 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2933 }
2934 }
2935
2936 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a) {
2937 TEST_REQUIRES_X86_AVX512SKX;
2938 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2939 VAddMicrokernelTester()
2940 .batch_size(batch_size)
2941 .inplace_a(true)
2942 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2943 }
2944 }
2945
2946 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_b) {
2947 TEST_REQUIRES_X86_AVX512SKX;
2948 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2949 VAddMicrokernelTester()
2950 .batch_size(batch_size)
2951 .inplace_b(true)
2952 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2953 }
2954 }
2955
2956 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a_and_b) {
2957 TEST_REQUIRES_X86_AVX512SKX;
2958 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2959 VAddMicrokernelTester()
2960 .batch_size(batch_size)
2961 .inplace_a(true)
2962 .inplace_b(true)
2963 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2964 }
2965 }
2966
Marat Dukhan87bd5112021-08-02 11:43:53 -07002967 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_zero_point) {
2968 TEST_REQUIRES_X86_AVX512SKX;
2969 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2970 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2971 VAddMicrokernelTester()
2972 .batch_size(batch_size)
2973 .a_zero_point(a_zero_point)
2974 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2975 }
2976 }
2977 }
2978
2979 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_zero_point) {
2980 TEST_REQUIRES_X86_AVX512SKX;
2981 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2982 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2983 VAddMicrokernelTester()
2984 .batch_size(batch_size)
2985 .b_zero_point(b_zero_point)
2986 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2987 }
2988 }
2989 }
2990
2991 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_zero_point) {
2992 TEST_REQUIRES_X86_AVX512SKX;
2993 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2994 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2995 VAddMicrokernelTester()
2996 .batch_size(batch_size)
2997 .y_zero_point(y_zero_point)
2998 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2999 }
3000 }
3001 }
3002
3003 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_scale) {
3004 TEST_REQUIRES_X86_AVX512SKX;
3005 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3006 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3007 VAddMicrokernelTester()
3008 .batch_size(batch_size)
3009 .a_scale(a_scale)
3010 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3011 }
3012 }
3013 }
3014
3015 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_scale) {
3016 TEST_REQUIRES_X86_AVX512SKX;
3017 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3018 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3019 VAddMicrokernelTester()
3020 .batch_size(batch_size)
3021 .b_scale(b_scale)
3022 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3023 }
3024 }
3025 }
3026
3027 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_scale) {
3028 TEST_REQUIRES_X86_AVX512SKX;
3029 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3030 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3031 VAddMicrokernelTester()
3032 .batch_size(batch_size)
3033 .y_scale(y_scale)
3034 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3035 }
3036 }
3037 }
3038
Marat Dukhane76049a2021-07-22 14:48:59 -07003039 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmin) {
3040 TEST_REQUIRES_X86_AVX512SKX;
3041 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3042 VAddMicrokernelTester()
3043 .batch_size(batch_size)
3044 .qmin(128)
3045 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3046 }
3047 }
3048
3049 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmax) {
3050 TEST_REQUIRES_X86_AVX512SKX;
3051 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3052 VAddMicrokernelTester()
3053 .batch_size(batch_size)
3054 .qmax(128)
3055 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3056 }
3057 }
3058#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3059
3060
3061#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3062 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_eq_32) {
3063 TEST_REQUIRES_X86_AVX512SKX;
3064 VAddMicrokernelTester()
3065 .batch_size(32)
3066 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3067 }
3068
3069 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_div_32) {
3070 TEST_REQUIRES_X86_AVX512SKX;
3071 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3072 VAddMicrokernelTester()
3073 .batch_size(batch_size)
3074 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3075 }
3076 }
3077
3078 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_lt_32) {
3079 TEST_REQUIRES_X86_AVX512SKX;
3080 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3081 VAddMicrokernelTester()
3082 .batch_size(batch_size)
3083 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3084 }
3085 }
3086
3087 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_gt_32) {
3088 TEST_REQUIRES_X86_AVX512SKX;
3089 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3090 VAddMicrokernelTester()
3091 .batch_size(batch_size)
3092 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3093 }
3094 }
3095
3096 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a) {
3097 TEST_REQUIRES_X86_AVX512SKX;
3098 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3099 VAddMicrokernelTester()
3100 .batch_size(batch_size)
3101 .inplace_a(true)
3102 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3103 }
3104 }
3105
3106 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_b) {
3107 TEST_REQUIRES_X86_AVX512SKX;
3108 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3109 VAddMicrokernelTester()
3110 .batch_size(batch_size)
3111 .inplace_b(true)
3112 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3113 }
3114 }
3115
3116 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a_and_b) {
3117 TEST_REQUIRES_X86_AVX512SKX;
3118 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3119 VAddMicrokernelTester()
3120 .batch_size(batch_size)
3121 .inplace_a(true)
3122 .inplace_b(true)
3123 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3124 }
3125 }
3126
Marat Dukhan87bd5112021-08-02 11:43:53 -07003127 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_zero_point) {
3128 TEST_REQUIRES_X86_AVX512SKX;
3129 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3130 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3131 VAddMicrokernelTester()
3132 .batch_size(batch_size)
3133 .a_zero_point(a_zero_point)
3134 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3135 }
3136 }
3137 }
3138
3139 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_zero_point) {
3140 TEST_REQUIRES_X86_AVX512SKX;
3141 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3142 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3143 VAddMicrokernelTester()
3144 .batch_size(batch_size)
3145 .b_zero_point(b_zero_point)
3146 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3147 }
3148 }
3149 }
3150
3151 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_zero_point) {
3152 TEST_REQUIRES_X86_AVX512SKX;
3153 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3154 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3155 VAddMicrokernelTester()
3156 .batch_size(batch_size)
3157 .y_zero_point(y_zero_point)
3158 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3159 }
3160 }
3161 }
3162
3163 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_scale) {
3164 TEST_REQUIRES_X86_AVX512SKX;
3165 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3166 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3167 VAddMicrokernelTester()
3168 .batch_size(batch_size)
3169 .a_scale(a_scale)
3170 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3171 }
3172 }
3173 }
3174
3175 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_scale) {
3176 TEST_REQUIRES_X86_AVX512SKX;
3177 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3178 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3179 VAddMicrokernelTester()
3180 .batch_size(batch_size)
3181 .b_scale(b_scale)
3182 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3183 }
3184 }
3185 }
3186
3187 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_scale) {
3188 TEST_REQUIRES_X86_AVX512SKX;
3189 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3190 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3191 VAddMicrokernelTester()
3192 .batch_size(batch_size)
3193 .y_scale(y_scale)
3194 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3195 }
3196 }
3197 }
3198
Marat Dukhane76049a2021-07-22 14:48:59 -07003199 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmin) {
3200 TEST_REQUIRES_X86_AVX512SKX;
3201 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3202 VAddMicrokernelTester()
3203 .batch_size(batch_size)
3204 .qmin(128)
3205 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3206 }
3207 }
3208
3209 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmax) {
3210 TEST_REQUIRES_X86_AVX512SKX;
3211 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3212 VAddMicrokernelTester()
3213 .batch_size(batch_size)
3214 .qmax(128)
3215 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3216 }
3217 }
3218#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3219
3220
Marat Dukhan4c617792021-12-21 15:47:58 -08003221#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan76e78c82021-07-20 21:11:23 -07003222 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_eq_8) {
3223 VAddMicrokernelTester()
3224 .batch_size(8)
3225 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3226 }
3227
3228 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_div_8) {
3229 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3230 VAddMicrokernelTester()
3231 .batch_size(batch_size)
3232 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3233 }
3234 }
3235
3236 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_lt_8) {
3237 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3238 VAddMicrokernelTester()
3239 .batch_size(batch_size)
3240 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3241 }
3242 }
3243
3244 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_gt_8) {
3245 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3246 VAddMicrokernelTester()
3247 .batch_size(batch_size)
3248 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3249 }
3250 }
3251
3252 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, inplace_a) {
3253 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3254 VAddMicrokernelTester()
3255 .batch_size(batch_size)
3256 .inplace_a(true)
3257 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3258 }
3259 }
3260
3261 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, inplace_b) {
3262 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3263 VAddMicrokernelTester()
3264 .batch_size(batch_size)
3265 .inplace_b(true)
3266 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3267 }
3268 }
3269
3270 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, inplace_a_and_b) {
3271 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3272 VAddMicrokernelTester()
3273 .batch_size(batch_size)
3274 .inplace_a(true)
3275 .inplace_b(true)
3276 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3277 }
3278 }
3279
Marat Dukhan87bd5112021-08-02 11:43:53 -07003280 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, a_zero_point) {
3281 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3282 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3283 VAddMicrokernelTester()
3284 .batch_size(batch_size)
3285 .a_zero_point(a_zero_point)
3286 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3287 }
3288 }
3289 }
3290
3291 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, b_zero_point) {
3292 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3293 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3294 VAddMicrokernelTester()
3295 .batch_size(batch_size)
3296 .b_zero_point(b_zero_point)
3297 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3298 }
3299 }
3300 }
3301
3302 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, y_zero_point) {
3303 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3304 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3305 VAddMicrokernelTester()
3306 .batch_size(batch_size)
3307 .y_zero_point(y_zero_point)
3308 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3309 }
3310 }
3311 }
3312
3313 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, a_scale) {
3314 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3315 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3316 VAddMicrokernelTester()
3317 .batch_size(batch_size)
3318 .a_scale(a_scale)
3319 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3320 }
3321 }
3322 }
3323
3324 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, b_scale) {
3325 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3326 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3327 VAddMicrokernelTester()
3328 .batch_size(batch_size)
3329 .b_scale(b_scale)
3330 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3331 }
3332 }
3333 }
3334
3335 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, y_scale) {
3336 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3337 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3338 VAddMicrokernelTester()
3339 .batch_size(batch_size)
3340 .y_scale(y_scale)
3341 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3342 }
3343 }
3344 }
3345
Marat Dukhan76e78c82021-07-20 21:11:23 -07003346 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, qmin) {
3347 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3348 VAddMicrokernelTester()
3349 .batch_size(batch_size)
3350 .qmin(128)
3351 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3352 }
3353 }
3354
3355 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, qmax) {
3356 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3357 VAddMicrokernelTester()
3358 .batch_size(batch_size)
3359 .qmax(128)
3360 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3361 }
3362 }
Marat Dukhan4c617792021-12-21 15:47:58 -08003363#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan76e78c82021-07-20 21:11:23 -07003364
3365
Marat Dukhan4c617792021-12-21 15:47:58 -08003366#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan76e78c82021-07-20 21:11:23 -07003367 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_eq_16) {
3368 VAddMicrokernelTester()
3369 .batch_size(16)
3370 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3371 }
3372
3373 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_div_16) {
3374 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3375 VAddMicrokernelTester()
3376 .batch_size(batch_size)
3377 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3378 }
3379 }
3380
3381 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_lt_16) {
3382 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3383 VAddMicrokernelTester()
3384 .batch_size(batch_size)
3385 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3386 }
3387 }
3388
3389 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_gt_16) {
3390 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3391 VAddMicrokernelTester()
3392 .batch_size(batch_size)
3393 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3394 }
3395 }
3396
3397 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, inplace_a) {
3398 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3399 VAddMicrokernelTester()
3400 .batch_size(batch_size)
3401 .inplace_a(true)
3402 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3403 }
3404 }
3405
3406 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, inplace_b) {
3407 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3408 VAddMicrokernelTester()
3409 .batch_size(batch_size)
3410 .inplace_b(true)
3411 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3412 }
3413 }
3414
3415 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, inplace_a_and_b) {
3416 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3417 VAddMicrokernelTester()
3418 .batch_size(batch_size)
3419 .inplace_a(true)
3420 .inplace_b(true)
3421 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3422 }
3423 }
3424
Marat Dukhan87bd5112021-08-02 11:43:53 -07003425 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, a_zero_point) {
3426 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3427 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3428 VAddMicrokernelTester()
3429 .batch_size(batch_size)
3430 .a_zero_point(a_zero_point)
3431 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3432 }
3433 }
3434 }
3435
3436 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, b_zero_point) {
3437 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3438 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3439 VAddMicrokernelTester()
3440 .batch_size(batch_size)
3441 .b_zero_point(b_zero_point)
3442 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3443 }
3444 }
3445 }
3446
3447 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, y_zero_point) {
3448 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3449 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3450 VAddMicrokernelTester()
3451 .batch_size(batch_size)
3452 .y_zero_point(y_zero_point)
3453 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3454 }
3455 }
3456 }
3457
3458 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, a_scale) {
3459 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3460 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3461 VAddMicrokernelTester()
3462 .batch_size(batch_size)
3463 .a_scale(a_scale)
3464 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3465 }
3466 }
3467 }
3468
3469 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, b_scale) {
3470 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3471 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3472 VAddMicrokernelTester()
3473 .batch_size(batch_size)
3474 .b_scale(b_scale)
3475 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3476 }
3477 }
3478 }
3479
3480 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, y_scale) {
3481 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3482 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3483 VAddMicrokernelTester()
3484 .batch_size(batch_size)
3485 .y_scale(y_scale)
3486 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3487 }
3488 }
3489 }
3490
Marat Dukhan76e78c82021-07-20 21:11:23 -07003491 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, qmin) {
3492 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3493 VAddMicrokernelTester()
3494 .batch_size(batch_size)
3495 .qmin(128)
3496 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3497 }
3498 }
3499
3500 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, qmax) {
3501 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3502 VAddMicrokernelTester()
3503 .batch_size(batch_size)
3504 .qmax(128)
3505 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3506 }
3507 }
Marat Dukhan4c617792021-12-21 15:47:58 -08003508#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -07003509
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003510
Marat Dukhan4c617792021-12-21 15:47:58 -08003511#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhane20a8732021-12-07 17:11:37 -08003512 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, batch_eq_32) {
3513 VAddMicrokernelTester()
3514 .batch_size(32)
3515 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3516 }
3517
3518 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, batch_div_32) {
3519 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3520 VAddMicrokernelTester()
3521 .batch_size(batch_size)
3522 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3523 }
3524 }
3525
3526 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, batch_lt_32) {
3527 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3528 VAddMicrokernelTester()
3529 .batch_size(batch_size)
3530 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3531 }
3532 }
3533
3534 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, batch_gt_32) {
3535 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3536 VAddMicrokernelTester()
3537 .batch_size(batch_size)
3538 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3539 }
3540 }
3541
3542 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, inplace_a) {
3543 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3544 VAddMicrokernelTester()
3545 .batch_size(batch_size)
3546 .inplace_a(true)
3547 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3548 }
3549 }
3550
3551 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, inplace_b) {
3552 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3553 VAddMicrokernelTester()
3554 .batch_size(batch_size)
3555 .inplace_b(true)
3556 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3557 }
3558 }
3559
3560 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, inplace_a_and_b) {
3561 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3562 VAddMicrokernelTester()
3563 .batch_size(batch_size)
3564 .inplace_a(true)
3565 .inplace_b(true)
3566 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3567 }
3568 }
3569
3570 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, a_zero_point) {
3571 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3572 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3573 VAddMicrokernelTester()
3574 .batch_size(batch_size)
3575 .a_zero_point(a_zero_point)
3576 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3577 }
3578 }
3579 }
3580
3581 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, b_zero_point) {
3582 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3583 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3584 VAddMicrokernelTester()
3585 .batch_size(batch_size)
3586 .b_zero_point(b_zero_point)
3587 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3588 }
3589 }
3590 }
3591
3592 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, y_zero_point) {
3593 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3595 VAddMicrokernelTester()
3596 .batch_size(batch_size)
3597 .y_zero_point(y_zero_point)
3598 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3599 }
3600 }
3601 }
3602
3603 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, a_scale) {
3604 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3605 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3606 VAddMicrokernelTester()
3607 .batch_size(batch_size)
3608 .a_scale(a_scale)
3609 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3610 }
3611 }
3612 }
3613
3614 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, b_scale) {
3615 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3616 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3617 VAddMicrokernelTester()
3618 .batch_size(batch_size)
3619 .b_scale(b_scale)
3620 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3621 }
3622 }
3623 }
3624
3625 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, y_scale) {
3626 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3627 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3628 VAddMicrokernelTester()
3629 .batch_size(batch_size)
3630 .y_scale(y_scale)
3631 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3632 }
3633 }
3634 }
3635
3636 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, qmin) {
3637 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3638 VAddMicrokernelTester()
3639 .batch_size(batch_size)
3640 .qmin(128)
3641 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3642 }
3643 }
3644
3645 TEST(QU8_VADD_MINMAX__WASMSIMD_X32, qmax) {
3646 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3647 VAddMicrokernelTester()
3648 .batch_size(batch_size)
3649 .qmax(128)
3650 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3651 }
3652 }
Marat Dukhan4c617792021-12-21 15:47:58 -08003653#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhane20a8732021-12-07 17:11:37 -08003654
3655
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003656TEST(QU8_VADD_MINMAX__SCALAR_X1, batch_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003657 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003658 .batch_size(1)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003659 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003660}
3661
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003662TEST(QU8_VADD_MINMAX__SCALAR_X1, batch_gt_1) {
3663 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003664 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003665 .batch_size(batch_size)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003666 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003667 }
3668}
3669
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003670TEST(QU8_VADD_MINMAX__SCALAR_X1, inplace_a) {
3671 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003672 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003673 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003674 .inplace_a(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003675 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003676 }
3677}
3678
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003679TEST(QU8_VADD_MINMAX__SCALAR_X1, inplace_b) {
3680 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003681 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003682 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003683 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003684 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003685 }
3686}
3687
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003688TEST(QU8_VADD_MINMAX__SCALAR_X1, inplace_a_and_b) {
3689 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003690 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003691 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003692 .inplace_a(true)
3693 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003694 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003695 }
3696}
3697
Marat Dukhan87bd5112021-08-02 11:43:53 -07003698TEST(QU8_VADD_MINMAX__SCALAR_X1, a_zero_point) {
3699 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3700 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3701 VAddMicrokernelTester()
3702 .batch_size(batch_size)
3703 .a_zero_point(a_zero_point)
3704 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3705 }
3706 }
3707}
3708
3709TEST(QU8_VADD_MINMAX__SCALAR_X1, b_zero_point) {
3710 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3711 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3712 VAddMicrokernelTester()
3713 .batch_size(batch_size)
3714 .b_zero_point(b_zero_point)
3715 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3716 }
3717 }
3718}
3719
3720TEST(QU8_VADD_MINMAX__SCALAR_X1, y_zero_point) {
3721 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3722 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3723 VAddMicrokernelTester()
3724 .batch_size(batch_size)
3725 .y_zero_point(y_zero_point)
3726 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3727 }
3728 }
3729}
3730
3731TEST(QU8_VADD_MINMAX__SCALAR_X1, a_scale) {
3732 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3733 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3734 VAddMicrokernelTester()
3735 .batch_size(batch_size)
3736 .a_scale(a_scale)
3737 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3738 }
3739 }
3740}
3741
3742TEST(QU8_VADD_MINMAX__SCALAR_X1, b_scale) {
3743 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3744 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3745 VAddMicrokernelTester()
3746 .batch_size(batch_size)
3747 .b_scale(b_scale)
3748 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3749 }
3750 }
3751}
3752
3753TEST(QU8_VADD_MINMAX__SCALAR_X1, y_scale) {
3754 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3755 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3756 VAddMicrokernelTester()
3757 .batch_size(batch_size)
3758 .y_scale(y_scale)
3759 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3760 }
3761 }
3762}
3763
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003764TEST(QU8_VADD_MINMAX__SCALAR_X1, qmin) {
3765 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003766 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003767 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003768 .qmin(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003769 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003770 }
3771}
3772
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003773TEST(QU8_VADD_MINMAX__SCALAR_X1, qmax) {
3774 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003775 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003776 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003777 .qmax(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003778 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003779 }
Marat Dukhan76e78c82021-07-20 21:11:23 -07003780}
3781
3782TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_eq_2) {
3783 VAddMicrokernelTester()
3784 .batch_size(2)
3785 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3786}
3787
3788TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_div_2) {
3789 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
3790 VAddMicrokernelTester()
3791 .batch_size(batch_size)
3792 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3793 }
3794}
3795
3796TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_lt_2) {
3797 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
3798 VAddMicrokernelTester()
3799 .batch_size(batch_size)
3800 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3801 }
3802}
3803
3804TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_gt_2) {
3805 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
3806 VAddMicrokernelTester()
3807 .batch_size(batch_size)
3808 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3809 }
3810}
3811
3812TEST(QU8_VADD_MINMAX__SCALAR_X2, inplace_a) {
3813 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3814 VAddMicrokernelTester()
3815 .batch_size(batch_size)
3816 .inplace_a(true)
3817 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3818 }
3819}
3820
3821TEST(QU8_VADD_MINMAX__SCALAR_X2, inplace_b) {
3822 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3823 VAddMicrokernelTester()
3824 .batch_size(batch_size)
3825 .inplace_b(true)
3826 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3827 }
3828}
3829
3830TEST(QU8_VADD_MINMAX__SCALAR_X2, inplace_a_and_b) {
3831 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3832 VAddMicrokernelTester()
3833 .batch_size(batch_size)
3834 .inplace_a(true)
3835 .inplace_b(true)
3836 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3837 }
3838}
3839
Marat Dukhan87bd5112021-08-02 11:43:53 -07003840TEST(QU8_VADD_MINMAX__SCALAR_X2, a_zero_point) {
3841 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3842 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3843 VAddMicrokernelTester()
3844 .batch_size(batch_size)
3845 .a_zero_point(a_zero_point)
3846 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3847 }
3848 }
3849}
3850
3851TEST(QU8_VADD_MINMAX__SCALAR_X2, b_zero_point) {
3852 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3853 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3854 VAddMicrokernelTester()
3855 .batch_size(batch_size)
3856 .b_zero_point(b_zero_point)
3857 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3858 }
3859 }
3860}
3861
3862TEST(QU8_VADD_MINMAX__SCALAR_X2, y_zero_point) {
3863 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3864 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3865 VAddMicrokernelTester()
3866 .batch_size(batch_size)
3867 .y_zero_point(y_zero_point)
3868 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3869 }
3870 }
3871}
3872
3873TEST(QU8_VADD_MINMAX__SCALAR_X2, a_scale) {
3874 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3875 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3876 VAddMicrokernelTester()
3877 .batch_size(batch_size)
3878 .a_scale(a_scale)
3879 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3880 }
3881 }
3882}
3883
3884TEST(QU8_VADD_MINMAX__SCALAR_X2, b_scale) {
3885 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3886 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3887 VAddMicrokernelTester()
3888 .batch_size(batch_size)
3889 .b_scale(b_scale)
3890 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3891 }
3892 }
3893}
3894
3895TEST(QU8_VADD_MINMAX__SCALAR_X2, y_scale) {
3896 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3897 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3898 VAddMicrokernelTester()
3899 .batch_size(batch_size)
3900 .y_scale(y_scale)
3901 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3902 }
3903 }
3904}
3905
Marat Dukhan76e78c82021-07-20 21:11:23 -07003906TEST(QU8_VADD_MINMAX__SCALAR_X2, qmin) {
3907 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3908 VAddMicrokernelTester()
3909 .batch_size(batch_size)
3910 .qmin(128)
3911 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3912 }
3913}
3914
3915TEST(QU8_VADD_MINMAX__SCALAR_X2, qmax) {
3916 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3917 VAddMicrokernelTester()
3918 .batch_size(batch_size)
3919 .qmax(128)
3920 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3921 }
3922}
3923
3924TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_eq_4) {
3925 VAddMicrokernelTester()
3926 .batch_size(4)
3927 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3928}
3929
3930TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_div_4) {
3931 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3932 VAddMicrokernelTester()
3933 .batch_size(batch_size)
3934 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3935 }
3936}
3937
3938TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_lt_4) {
3939 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3940 VAddMicrokernelTester()
3941 .batch_size(batch_size)
3942 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3943 }
3944}
3945
3946TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_gt_4) {
3947 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3948 VAddMicrokernelTester()
3949 .batch_size(batch_size)
3950 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3951 }
3952}
3953
3954TEST(QU8_VADD_MINMAX__SCALAR_X4, inplace_a) {
3955 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3956 VAddMicrokernelTester()
3957 .batch_size(batch_size)
3958 .inplace_a(true)
3959 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3960 }
3961}
3962
3963TEST(QU8_VADD_MINMAX__SCALAR_X4, inplace_b) {
3964 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3965 VAddMicrokernelTester()
3966 .batch_size(batch_size)
3967 .inplace_b(true)
3968 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3969 }
3970}
3971
3972TEST(QU8_VADD_MINMAX__SCALAR_X4, inplace_a_and_b) {
3973 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3974 VAddMicrokernelTester()
3975 .batch_size(batch_size)
3976 .inplace_a(true)
3977 .inplace_b(true)
3978 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3979 }
3980}
3981
Marat Dukhan87bd5112021-08-02 11:43:53 -07003982TEST(QU8_VADD_MINMAX__SCALAR_X4, a_zero_point) {
3983 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3984 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3985 VAddMicrokernelTester()
3986 .batch_size(batch_size)
3987 .a_zero_point(a_zero_point)
3988 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3989 }
3990 }
3991}
3992
3993TEST(QU8_VADD_MINMAX__SCALAR_X4, b_zero_point) {
3994 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3995 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3996 VAddMicrokernelTester()
3997 .batch_size(batch_size)
3998 .b_zero_point(b_zero_point)
3999 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4000 }
4001 }
4002}
4003
4004TEST(QU8_VADD_MINMAX__SCALAR_X4, y_zero_point) {
4005 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4006 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4007 VAddMicrokernelTester()
4008 .batch_size(batch_size)
4009 .y_zero_point(y_zero_point)
4010 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4011 }
4012 }
4013}
4014
4015TEST(QU8_VADD_MINMAX__SCALAR_X4, a_scale) {
4016 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4017 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4018 VAddMicrokernelTester()
4019 .batch_size(batch_size)
4020 .a_scale(a_scale)
4021 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4022 }
4023 }
4024}
4025
4026TEST(QU8_VADD_MINMAX__SCALAR_X4, b_scale) {
4027 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4028 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4029 VAddMicrokernelTester()
4030 .batch_size(batch_size)
4031 .b_scale(b_scale)
4032 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4033 }
4034 }
4035}
4036
4037TEST(QU8_VADD_MINMAX__SCALAR_X4, y_scale) {
4038 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4039 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4040 VAddMicrokernelTester()
4041 .batch_size(batch_size)
4042 .y_scale(y_scale)
4043 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4044 }
4045 }
4046}
4047
Marat Dukhan76e78c82021-07-20 21:11:23 -07004048TEST(QU8_VADD_MINMAX__SCALAR_X4, qmin) {
4049 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4050 VAddMicrokernelTester()
4051 .batch_size(batch_size)
4052 .qmin(128)
4053 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4054 }
4055}
4056
4057TEST(QU8_VADD_MINMAX__SCALAR_X4, qmax) {
4058 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4059 VAddMicrokernelTester()
4060 .batch_size(batch_size)
4061 .qmax(128)
4062 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4063 }
Marat Dukhan6e0fc392021-07-19 18:38:24 -07004064}