blob: 6bcd3d6728ff189075d54c7bb17cdc7188ad4718 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
Marat Dukhan6e0fc392021-07-19 18:38:24 -07005//
6// Auto-generated file. Do not edit!
7// Specification: test/qu8-vadd-minmax.yaml
8// Generator: tools/generate-vbinary-test.py
9
XNNPACK Teamb455b122019-09-27 18:10:33 -070010
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <gtest/gtest.h>
12
Marat Dukhan1dadbf72019-10-01 10:46:20 -070013#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070015
Marat Dukhan87bd5112021-08-02 11:43:53 -070016#include <xnnpack/params-init.h>
Marat Dukhan64287252021-09-07 16:20:03 -070017#include <xnnpack/vaddsub.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070018#include "vadd-microkernel-tester.h"
19
20
Marat Dukhan76e78c82021-07-20 21:11:23 -070021#if XNN_ARCH_ARM || XNN_ARCH_ARM64
22 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_eq_8) {
23 TEST_REQUIRES_ARM_NEON;
24 VAddMicrokernelTester()
25 .batch_size(8)
26 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
27 }
28
29 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_div_8) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32 VAddMicrokernelTester()
33 .batch_size(batch_size)
34 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
35 }
36 }
37
38 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_lt_8) {
39 TEST_REQUIRES_ARM_NEON;
40 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41 VAddMicrokernelTester()
42 .batch_size(batch_size)
43 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
44 }
45 }
46
47 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_gt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50 VAddMicrokernelTester()
51 .batch_size(batch_size)
52 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
53 }
54 }
55
56 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, inplace_a) {
57 TEST_REQUIRES_ARM_NEON;
58 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59 VAddMicrokernelTester()
60 .batch_size(batch_size)
61 .inplace_a(true)
62 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
63 }
64 }
65
66 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, inplace_b) {
67 TEST_REQUIRES_ARM_NEON;
68 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69 VAddMicrokernelTester()
70 .batch_size(batch_size)
71 .inplace_b(true)
72 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
73 }
74 }
75
76 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, inplace_a_and_b) {
77 TEST_REQUIRES_ARM_NEON;
78 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
79 VAddMicrokernelTester()
80 .batch_size(batch_size)
81 .inplace_a(true)
82 .inplace_b(true)
83 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
84 }
85 }
86
Marat Dukhan87bd5112021-08-02 11:43:53 -070087 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, a_zero_point) {
88 TEST_REQUIRES_ARM_NEON;
89 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
90 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
91 VAddMicrokernelTester()
92 .batch_size(batch_size)
93 .a_zero_point(a_zero_point)
94 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
95 }
96 }
97 }
98
99 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, b_zero_point) {
100 TEST_REQUIRES_ARM_NEON;
101 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
102 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
103 VAddMicrokernelTester()
104 .batch_size(batch_size)
105 .b_zero_point(b_zero_point)
106 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
107 }
108 }
109 }
110
111 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, y_zero_point) {
112 TEST_REQUIRES_ARM_NEON;
113 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
114 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
115 VAddMicrokernelTester()
116 .batch_size(batch_size)
117 .y_zero_point(y_zero_point)
118 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
119 }
120 }
121 }
122
123 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, a_scale) {
124 TEST_REQUIRES_ARM_NEON;
125 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
126 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
127 VAddMicrokernelTester()
128 .batch_size(batch_size)
129 .a_scale(a_scale)
130 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
131 }
132 }
133 }
134
135 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, b_scale) {
136 TEST_REQUIRES_ARM_NEON;
137 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
138 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
139 VAddMicrokernelTester()
140 .batch_size(batch_size)
141 .b_scale(b_scale)
142 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
143 }
144 }
145 }
146
147 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, y_scale) {
148 TEST_REQUIRES_ARM_NEON;
149 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
151 VAddMicrokernelTester()
152 .batch_size(batch_size)
153 .y_scale(y_scale)
154 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
155 }
156 }
157 }
158
Marat Dukhan76e78c82021-07-20 21:11:23 -0700159 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, qmin) {
160 TEST_REQUIRES_ARM_NEON;
161 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
162 VAddMicrokernelTester()
163 .batch_size(batch_size)
164 .qmin(128)
165 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
166 }
167 }
168
169 TEST(QU8_VADD_MINMAX__NEON_LD64_X8, qmax) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
172 VAddMicrokernelTester()
173 .batch_size(batch_size)
174 .qmax(128)
175 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
176 }
177 }
178#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
179
180
181#if XNN_ARCH_ARM || XNN_ARCH_ARM64
182 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_eq_16) {
183 TEST_REQUIRES_ARM_NEON;
184 VAddMicrokernelTester()
185 .batch_size(16)
186 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
187 }
188
189 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_div_16) {
190 TEST_REQUIRES_ARM_NEON;
191 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
192 VAddMicrokernelTester()
193 .batch_size(batch_size)
194 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
195 }
196 }
197
198 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_lt_16) {
199 TEST_REQUIRES_ARM_NEON;
200 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
201 VAddMicrokernelTester()
202 .batch_size(batch_size)
203 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
204 }
205 }
206
207 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_gt_16) {
208 TEST_REQUIRES_ARM_NEON;
209 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
210 VAddMicrokernelTester()
211 .batch_size(batch_size)
212 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
213 }
214 }
215
216 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, inplace_a) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219 VAddMicrokernelTester()
220 .batch_size(batch_size)
221 .inplace_a(true)
222 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
223 }
224 }
225
226 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, inplace_b) {
227 TEST_REQUIRES_ARM_NEON;
228 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
229 VAddMicrokernelTester()
230 .batch_size(batch_size)
231 .inplace_b(true)
232 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
233 }
234 }
235
236 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, inplace_a_and_b) {
237 TEST_REQUIRES_ARM_NEON;
238 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
239 VAddMicrokernelTester()
240 .batch_size(batch_size)
241 .inplace_a(true)
242 .inplace_b(true)
243 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
244 }
245 }
246
Marat Dukhan87bd5112021-08-02 11:43:53 -0700247 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, a_zero_point) {
248 TEST_REQUIRES_ARM_NEON;
249 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
250 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
251 VAddMicrokernelTester()
252 .batch_size(batch_size)
253 .a_zero_point(a_zero_point)
254 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
255 }
256 }
257 }
258
259 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, b_zero_point) {
260 TEST_REQUIRES_ARM_NEON;
261 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
262 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
263 VAddMicrokernelTester()
264 .batch_size(batch_size)
265 .b_zero_point(b_zero_point)
266 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
267 }
268 }
269 }
270
271 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, y_zero_point) {
272 TEST_REQUIRES_ARM_NEON;
273 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
274 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
275 VAddMicrokernelTester()
276 .batch_size(batch_size)
277 .y_zero_point(y_zero_point)
278 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
279 }
280 }
281 }
282
283 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, a_scale) {
284 TEST_REQUIRES_ARM_NEON;
285 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
286 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
287 VAddMicrokernelTester()
288 .batch_size(batch_size)
289 .a_scale(a_scale)
290 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
291 }
292 }
293 }
294
295 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, b_scale) {
296 TEST_REQUIRES_ARM_NEON;
297 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
298 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
299 VAddMicrokernelTester()
300 .batch_size(batch_size)
301 .b_scale(b_scale)
302 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
303 }
304 }
305 }
306
307 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, y_scale) {
308 TEST_REQUIRES_ARM_NEON;
309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
310 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
311 VAddMicrokernelTester()
312 .batch_size(batch_size)
313 .y_scale(y_scale)
314 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
315 }
316 }
317 }
318
Marat Dukhan76e78c82021-07-20 21:11:23 -0700319 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, qmin) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
322 VAddMicrokernelTester()
323 .batch_size(batch_size)
324 .qmin(128)
325 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
326 }
327 }
328
329 TEST(QU8_VADD_MINMAX__NEON_LD64_X16, qmax) {
330 TEST_REQUIRES_ARM_NEON;
331 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
332 VAddMicrokernelTester()
333 .batch_size(batch_size)
334 .qmax(128)
335 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
336 }
337 }
338#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
339
340
Marat Dukhaneb3cff32021-07-30 11:35:27 -0700341#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard0a3093c2021-08-31 09:58:11 -0700342 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_eq_32) {
343 TEST_REQUIRES_ARM_NEON;
344 VAddMicrokernelTester()
345 .batch_size(32)
346 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
347 }
348
349 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_div_32) {
350 TEST_REQUIRES_ARM_NEON;
351 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
352 VAddMicrokernelTester()
353 .batch_size(batch_size)
354 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
355 }
356 }
357
358 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_lt_32) {
359 TEST_REQUIRES_ARM_NEON;
360 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
361 VAddMicrokernelTester()
362 .batch_size(batch_size)
363 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
364 }
365 }
366
367 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_gt_32) {
368 TEST_REQUIRES_ARM_NEON;
369 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
370 VAddMicrokernelTester()
371 .batch_size(batch_size)
372 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
373 }
374 }
375
376 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, inplace_a) {
377 TEST_REQUIRES_ARM_NEON;
378 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
379 VAddMicrokernelTester()
380 .batch_size(batch_size)
381 .inplace_a(true)
382 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
383 }
384 }
385
386 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, inplace_b) {
387 TEST_REQUIRES_ARM_NEON;
388 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
389 VAddMicrokernelTester()
390 .batch_size(batch_size)
391 .inplace_b(true)
392 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
393 }
394 }
395
396 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, inplace_a_and_b) {
397 TEST_REQUIRES_ARM_NEON;
398 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
399 VAddMicrokernelTester()
400 .batch_size(batch_size)
401 .inplace_a(true)
402 .inplace_b(true)
403 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
404 }
405 }
406
407 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, a_zero_point) {
408 TEST_REQUIRES_ARM_NEON;
409 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
410 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
411 VAddMicrokernelTester()
412 .batch_size(batch_size)
413 .a_zero_point(a_zero_point)
414 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
415 }
416 }
417 }
418
419 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, b_zero_point) {
420 TEST_REQUIRES_ARM_NEON;
421 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
422 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
423 VAddMicrokernelTester()
424 .batch_size(batch_size)
425 .b_zero_point(b_zero_point)
426 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
427 }
428 }
429 }
430
431 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, y_zero_point) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
434 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
435 VAddMicrokernelTester()
436 .batch_size(batch_size)
437 .y_zero_point(y_zero_point)
438 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
439 }
440 }
441 }
442
443 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, a_scale) {
444 TEST_REQUIRES_ARM_NEON;
445 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
446 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
447 VAddMicrokernelTester()
448 .batch_size(batch_size)
449 .a_scale(a_scale)
450 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
451 }
452 }
453 }
454
455 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, b_scale) {
456 TEST_REQUIRES_ARM_NEON;
457 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
458 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
459 VAddMicrokernelTester()
460 .batch_size(batch_size)
461 .b_scale(b_scale)
462 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
463 }
464 }
465 }
466
467 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, y_scale) {
468 TEST_REQUIRES_ARM_NEON;
469 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
470 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
471 VAddMicrokernelTester()
472 .batch_size(batch_size)
473 .y_scale(y_scale)
474 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
475 }
476 }
477 }
478
479 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, qmin) {
480 TEST_REQUIRES_ARM_NEON;
481 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
482 VAddMicrokernelTester()
483 .batch_size(batch_size)
484 .qmin(128)
485 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
486 }
487 }
488
489 TEST(QU8_VADD_MINMAX__NEON_LD64_X32, qmax) {
490 TEST_REQUIRES_ARM_NEON;
491 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
492 VAddMicrokernelTester()
493 .batch_size(batch_size)
494 .qmax(128)
495 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
496 }
497 }
498#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
499
500
501#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhaneb3cff32021-07-30 11:35:27 -0700502 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_eq_16) {
503 TEST_REQUIRES_ARM_NEON;
504 VAddMicrokernelTester()
505 .batch_size(16)
506 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
507 }
508
509 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_div_16) {
510 TEST_REQUIRES_ARM_NEON;
511 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
512 VAddMicrokernelTester()
513 .batch_size(batch_size)
514 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
515 }
516 }
517
518 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_lt_16) {
519 TEST_REQUIRES_ARM_NEON;
520 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
521 VAddMicrokernelTester()
522 .batch_size(batch_size)
523 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
524 }
525 }
526
527 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_gt_16) {
528 TEST_REQUIRES_ARM_NEON;
529 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
530 VAddMicrokernelTester()
531 .batch_size(batch_size)
532 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
533 }
534 }
535
536 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, inplace_a) {
537 TEST_REQUIRES_ARM_NEON;
538 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
539 VAddMicrokernelTester()
540 .batch_size(batch_size)
541 .inplace_a(true)
542 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
543 }
544 }
545
546 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, inplace_b) {
547 TEST_REQUIRES_ARM_NEON;
548 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
549 VAddMicrokernelTester()
550 .batch_size(batch_size)
551 .inplace_b(true)
552 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
553 }
554 }
555
556 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, inplace_a_and_b) {
557 TEST_REQUIRES_ARM_NEON;
558 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
559 VAddMicrokernelTester()
560 .batch_size(batch_size)
561 .inplace_a(true)
562 .inplace_b(true)
563 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
564 }
565 }
566
Marat Dukhan87bd5112021-08-02 11:43:53 -0700567 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, a_zero_point) {
568 TEST_REQUIRES_ARM_NEON;
569 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
570 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
571 VAddMicrokernelTester()
572 .batch_size(batch_size)
573 .a_zero_point(a_zero_point)
574 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
575 }
576 }
577 }
578
579 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, b_zero_point) {
580 TEST_REQUIRES_ARM_NEON;
581 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
582 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
583 VAddMicrokernelTester()
584 .batch_size(batch_size)
585 .b_zero_point(b_zero_point)
586 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
587 }
588 }
589 }
590
591 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, y_zero_point) {
592 TEST_REQUIRES_ARM_NEON;
593 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
595 VAddMicrokernelTester()
596 .batch_size(batch_size)
597 .y_zero_point(y_zero_point)
598 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
599 }
600 }
601 }
602
603 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, a_scale) {
604 TEST_REQUIRES_ARM_NEON;
605 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
606 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
607 VAddMicrokernelTester()
608 .batch_size(batch_size)
609 .a_scale(a_scale)
610 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
611 }
612 }
613 }
614
615 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, b_scale) {
616 TEST_REQUIRES_ARM_NEON;
617 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
618 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
619 VAddMicrokernelTester()
620 .batch_size(batch_size)
621 .b_scale(b_scale)
622 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
623 }
624 }
625 }
626
627 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, y_scale) {
628 TEST_REQUIRES_ARM_NEON;
629 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
630 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
631 VAddMicrokernelTester()
632 .batch_size(batch_size)
633 .y_scale(y_scale)
634 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
635 }
636 }
637 }
638
Marat Dukhaneb3cff32021-07-30 11:35:27 -0700639 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, qmin) {
640 TEST_REQUIRES_ARM_NEON;
641 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
642 VAddMicrokernelTester()
643 .batch_size(batch_size)
644 .qmin(128)
645 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
646 }
647 }
648
649 TEST(QU8_VADD_MINMAX__NEON_LD128_X16, qmax) {
650 TEST_REQUIRES_ARM_NEON;
651 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
652 VAddMicrokernelTester()
653 .batch_size(batch_size)
654 .qmax(128)
655 .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
656 }
657 }
658#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
659
660
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700661#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan76e78c82021-07-20 21:11:23 -0700662 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700663 TEST_REQUIRES_X86_SSE2;
664 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700665 .batch_size(8)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700666 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700667 }
668
Marat Dukhan76e78c82021-07-20 21:11:23 -0700669 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700670 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700671 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700672 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700673 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700674 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700675 }
676 }
677
Marat Dukhan76e78c82021-07-20 21:11:23 -0700678 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700679 TEST_REQUIRES_X86_SSE2;
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700680 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700681 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700682 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700683 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700684 }
685 }
686
Marat Dukhan76e78c82021-07-20 21:11:23 -0700687 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700688 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700689 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700690 VAddMicrokernelTester()
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700691 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700692 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700693 }
694 }
695
Marat Dukhan76e78c82021-07-20 21:11:23 -0700696 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a) {
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700697 TEST_REQUIRES_X86_SSE2;
698 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
699 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700700 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700701 .inplace_a(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700702 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700703 }
704 }
705
Marat Dukhan76e78c82021-07-20 21:11:23 -0700706 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_b) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700707 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700708 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700709 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700710 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700711 .inplace_b(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700712 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700713 }
714 }
715
Marat Dukhan76e78c82021-07-20 21:11:23 -0700716 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700717 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700718 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700719 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700720 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700721 .inplace_a(true)
722 .inplace_b(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700723 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700724 }
725 }
726
Marat Dukhan87bd5112021-08-02 11:43:53 -0700727 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
728 TEST_REQUIRES_X86_SSE2;
729 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
730 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
731 VAddMicrokernelTester()
732 .batch_size(batch_size)
733 .a_zero_point(a_zero_point)
734 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
735 }
736 }
737 }
738
739 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
740 TEST_REQUIRES_X86_SSE2;
741 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
742 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
743 VAddMicrokernelTester()
744 .batch_size(batch_size)
745 .b_zero_point(b_zero_point)
746 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
747 }
748 }
749 }
750
751 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
752 TEST_REQUIRES_X86_SSE2;
753 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
754 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
755 VAddMicrokernelTester()
756 .batch_size(batch_size)
757 .y_zero_point(y_zero_point)
758 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
759 }
760 }
761 }
762
763 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
764 TEST_REQUIRES_X86_SSE2;
765 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
766 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
767 VAddMicrokernelTester()
768 .batch_size(batch_size)
769 .a_scale(a_scale)
770 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
771 }
772 }
773 }
774
775 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
776 TEST_REQUIRES_X86_SSE2;
777 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
778 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
779 VAddMicrokernelTester()
780 .batch_size(batch_size)
781 .b_scale(b_scale)
782 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
783 }
784 }
785 }
786
787 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
788 TEST_REQUIRES_X86_SSE2;
789 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
790 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
791 VAddMicrokernelTester()
792 .batch_size(batch_size)
793 .y_scale(y_scale)
794 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
795 }
796 }
797 }
798
Marat Dukhan76e78c82021-07-20 21:11:23 -0700799 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700800 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700801 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700802 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700803 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700804 .qmin(128)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700805 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700806 }
807 }
808
Marat Dukhan76e78c82021-07-20 21:11:23 -0700809 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700810 TEST_REQUIRES_X86_SSE2;
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700811 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700812 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700813 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700814 .qmax(128)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700815 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700816 }
817 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700818#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700819
Marat Dukhan6e0fc392021-07-19 18:38:24 -0700820
Marat Dukhan76e78c82021-07-20 21:11:23 -0700821#if XNN_ARCH_X86 || XNN_ARCH_X86_64
822 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
823 TEST_REQUIRES_X86_SSE2;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700824 VAddMicrokernelTester()
Marat Dukhan76e78c82021-07-20 21:11:23 -0700825 .batch_size(16)
826 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700827 }
828
Marat Dukhan76e78c82021-07-20 21:11:23 -0700829 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
830 TEST_REQUIRES_X86_SSE2;
831 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700832 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700833 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700834 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700835 }
836 }
837
Marat Dukhan76e78c82021-07-20 21:11:23 -0700838 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
839 TEST_REQUIRES_X86_SSE2;
840 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700841 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700842 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700843 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700844 }
845 }
846
Marat Dukhan76e78c82021-07-20 21:11:23 -0700847 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
848 TEST_REQUIRES_X86_SSE2;
849 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700850 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700851 .batch_size(batch_size)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700852 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700853 }
854 }
855
Marat Dukhan76e78c82021-07-20 21:11:23 -0700856 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a) {
857 TEST_REQUIRES_X86_SSE2;
858 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700859 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700860 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700861 .inplace_a(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700862 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700863 }
864 }
865
Marat Dukhan76e78c82021-07-20 21:11:23 -0700866 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_b) {
867 TEST_REQUIRES_X86_SSE2;
868 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700869 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700870 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700871 .inplace_b(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700872 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700873 }
874 }
875
Marat Dukhan76e78c82021-07-20 21:11:23 -0700876 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
877 TEST_REQUIRES_X86_SSE2;
878 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700879 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700880 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700881 .inplace_a(true)
882 .inplace_b(true)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700883 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700884 }
885 }
886
Marat Dukhan87bd5112021-08-02 11:43:53 -0700887 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
888 TEST_REQUIRES_X86_SSE2;
889 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
890 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
891 VAddMicrokernelTester()
892 .batch_size(batch_size)
893 .a_zero_point(a_zero_point)
894 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
895 }
896 }
897 }
898
899 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
900 TEST_REQUIRES_X86_SSE2;
901 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
902 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
903 VAddMicrokernelTester()
904 .batch_size(batch_size)
905 .b_zero_point(b_zero_point)
906 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
907 }
908 }
909 }
910
911 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
912 TEST_REQUIRES_X86_SSE2;
913 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
914 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
915 VAddMicrokernelTester()
916 .batch_size(batch_size)
917 .y_zero_point(y_zero_point)
918 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
919 }
920 }
921 }
922
923 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
924 TEST_REQUIRES_X86_SSE2;
925 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
926 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
927 VAddMicrokernelTester()
928 .batch_size(batch_size)
929 .a_scale(a_scale)
930 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
931 }
932 }
933 }
934
935 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
936 TEST_REQUIRES_X86_SSE2;
937 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
938 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
939 VAddMicrokernelTester()
940 .batch_size(batch_size)
941 .b_scale(b_scale)
942 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
943 }
944 }
945 }
946
947 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
948 TEST_REQUIRES_X86_SSE2;
949 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
950 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
951 VAddMicrokernelTester()
952 .batch_size(batch_size)
953 .y_scale(y_scale)
954 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
955 }
956 }
957 }
958
Marat Dukhan76e78c82021-07-20 21:11:23 -0700959 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
960 TEST_REQUIRES_X86_SSE2;
961 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700962 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700963 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700964 .qmin(128)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700965 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700966 }
967 }
968
Marat Dukhan76e78c82021-07-20 21:11:23 -0700969 TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
970 TEST_REQUIRES_X86_SSE2;
971 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700972 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700973 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700974 .qmax(128)
Marat Dukhan76e78c82021-07-20 21:11:23 -0700975 .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700976 }
977 }
Marat Dukhan76e78c82021-07-20 21:11:23 -0700978#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
979
980
Marat Dukhan3eac69c2021-07-21 01:42:29 -0700981#if XNN_ARCH_X86 || XNN_ARCH_X86_64
982 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
983 TEST_REQUIRES_X86_SSE41;
984 VAddMicrokernelTester()
985 .batch_size(8)
986 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
987 }
988
989 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
990 TEST_REQUIRES_X86_SSE41;
991 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
992 VAddMicrokernelTester()
993 .batch_size(batch_size)
994 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
995 }
996 }
997
998 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
999 TEST_REQUIRES_X86_SSE41;
1000 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1001 VAddMicrokernelTester()
1002 .batch_size(batch_size)
1003 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1004 }
1005 }
1006
1007 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1008 TEST_REQUIRES_X86_SSE41;
1009 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1010 VAddMicrokernelTester()
1011 .batch_size(batch_size)
1012 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1013 }
1014 }
1015
1016 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a) {
1017 TEST_REQUIRES_X86_SSE41;
1018 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1019 VAddMicrokernelTester()
1020 .batch_size(batch_size)
1021 .inplace_a(true)
1022 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1023 }
1024 }
1025
1026 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_b) {
1027 TEST_REQUIRES_X86_SSE41;
1028 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1029 VAddMicrokernelTester()
1030 .batch_size(batch_size)
1031 .inplace_b(true)
1032 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1033 }
1034 }
1035
1036 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1037 TEST_REQUIRES_X86_SSE41;
1038 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1039 VAddMicrokernelTester()
1040 .batch_size(batch_size)
1041 .inplace_a(true)
1042 .inplace_b(true)
1043 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1044 }
1045 }
1046
Marat Dukhan87bd5112021-08-02 11:43:53 -07001047 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1048 TEST_REQUIRES_X86_SSE41;
1049 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1050 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1051 VAddMicrokernelTester()
1052 .batch_size(batch_size)
1053 .a_zero_point(a_zero_point)
1054 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1055 }
1056 }
1057 }
1058
1059 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1060 TEST_REQUIRES_X86_SSE41;
1061 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1062 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1063 VAddMicrokernelTester()
1064 .batch_size(batch_size)
1065 .b_zero_point(b_zero_point)
1066 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1067 }
1068 }
1069 }
1070
1071 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1072 TEST_REQUIRES_X86_SSE41;
1073 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1074 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1075 VAddMicrokernelTester()
1076 .batch_size(batch_size)
1077 .y_zero_point(y_zero_point)
1078 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1079 }
1080 }
1081 }
1082
1083 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1084 TEST_REQUIRES_X86_SSE41;
1085 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1086 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1087 VAddMicrokernelTester()
1088 .batch_size(batch_size)
1089 .a_scale(a_scale)
1090 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1091 }
1092 }
1093 }
1094
1095 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1096 TEST_REQUIRES_X86_SSE41;
1097 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1098 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1099 VAddMicrokernelTester()
1100 .batch_size(batch_size)
1101 .b_scale(b_scale)
1102 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1103 }
1104 }
1105 }
1106
1107 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1108 TEST_REQUIRES_X86_SSE41;
1109 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1110 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1111 VAddMicrokernelTester()
1112 .batch_size(batch_size)
1113 .y_scale(y_scale)
1114 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1115 }
1116 }
1117 }
1118
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001119 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1120 TEST_REQUIRES_X86_SSE41;
1121 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1122 VAddMicrokernelTester()
1123 .batch_size(batch_size)
1124 .qmin(128)
1125 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1126 }
1127 }
1128
1129 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1130 TEST_REQUIRES_X86_SSE41;
1131 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1132 VAddMicrokernelTester()
1133 .batch_size(batch_size)
1134 .qmax(128)
1135 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1136 }
1137 }
1138#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1139
1140
1141#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1142 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1143 TEST_REQUIRES_X86_SSE41;
1144 VAddMicrokernelTester()
1145 .batch_size(16)
1146 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1147 }
1148
1149 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1150 TEST_REQUIRES_X86_SSE41;
1151 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1152 VAddMicrokernelTester()
1153 .batch_size(batch_size)
1154 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1155 }
1156 }
1157
1158 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1159 TEST_REQUIRES_X86_SSE41;
1160 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1161 VAddMicrokernelTester()
1162 .batch_size(batch_size)
1163 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1164 }
1165 }
1166
1167 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1168 TEST_REQUIRES_X86_SSE41;
1169 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1170 VAddMicrokernelTester()
1171 .batch_size(batch_size)
1172 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1173 }
1174 }
1175
1176 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a) {
1177 TEST_REQUIRES_X86_SSE41;
1178 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1179 VAddMicrokernelTester()
1180 .batch_size(batch_size)
1181 .inplace_a(true)
1182 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1183 }
1184 }
1185
1186 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_b) {
1187 TEST_REQUIRES_X86_SSE41;
1188 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1189 VAddMicrokernelTester()
1190 .batch_size(batch_size)
1191 .inplace_b(true)
1192 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1193 }
1194 }
1195
1196 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1197 TEST_REQUIRES_X86_SSE41;
1198 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1199 VAddMicrokernelTester()
1200 .batch_size(batch_size)
1201 .inplace_a(true)
1202 .inplace_b(true)
1203 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1204 }
1205 }
1206
Marat Dukhan87bd5112021-08-02 11:43:53 -07001207 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1208 TEST_REQUIRES_X86_SSE41;
1209 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1210 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1211 VAddMicrokernelTester()
1212 .batch_size(batch_size)
1213 .a_zero_point(a_zero_point)
1214 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1215 }
1216 }
1217 }
1218
1219 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1220 TEST_REQUIRES_X86_SSE41;
1221 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1222 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1223 VAddMicrokernelTester()
1224 .batch_size(batch_size)
1225 .b_zero_point(b_zero_point)
1226 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1227 }
1228 }
1229 }
1230
1231 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1232 TEST_REQUIRES_X86_SSE41;
1233 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1234 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1235 VAddMicrokernelTester()
1236 .batch_size(batch_size)
1237 .y_zero_point(y_zero_point)
1238 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1239 }
1240 }
1241 }
1242
1243 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1244 TEST_REQUIRES_X86_SSE41;
1245 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1246 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1247 VAddMicrokernelTester()
1248 .batch_size(batch_size)
1249 .a_scale(a_scale)
1250 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1251 }
1252 }
1253 }
1254
1255 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1256 TEST_REQUIRES_X86_SSE41;
1257 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1258 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1259 VAddMicrokernelTester()
1260 .batch_size(batch_size)
1261 .b_scale(b_scale)
1262 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1263 }
1264 }
1265 }
1266
1267 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1268 TEST_REQUIRES_X86_SSE41;
1269 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1270 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1271 VAddMicrokernelTester()
1272 .batch_size(batch_size)
1273 .y_scale(y_scale)
1274 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1275 }
1276 }
1277 }
1278
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001279 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1280 TEST_REQUIRES_X86_SSE41;
1281 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1282 VAddMicrokernelTester()
1283 .batch_size(batch_size)
1284 .qmin(128)
1285 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1286 }
1287 }
1288
1289 TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1290 TEST_REQUIRES_X86_SSE41;
1291 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1292 VAddMicrokernelTester()
1293 .batch_size(batch_size)
1294 .qmax(128)
1295 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1296 }
1297 }
1298#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1299
1300
1301#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1302 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_eq_8) {
1303 TEST_REQUIRES_X86_AVX;
1304 VAddMicrokernelTester()
1305 .batch_size(8)
1306 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1307 }
1308
1309 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_div_8) {
1310 TEST_REQUIRES_X86_AVX;
1311 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1312 VAddMicrokernelTester()
1313 .batch_size(batch_size)
1314 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1315 }
1316 }
1317
1318 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_lt_8) {
1319 TEST_REQUIRES_X86_AVX;
1320 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1321 VAddMicrokernelTester()
1322 .batch_size(batch_size)
1323 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1324 }
1325 }
1326
1327 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_gt_8) {
1328 TEST_REQUIRES_X86_AVX;
1329 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1330 VAddMicrokernelTester()
1331 .batch_size(batch_size)
1332 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1333 }
1334 }
1335
1336 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a) {
1337 TEST_REQUIRES_X86_AVX;
1338 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1339 VAddMicrokernelTester()
1340 .batch_size(batch_size)
1341 .inplace_a(true)
1342 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1343 }
1344 }
1345
1346 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_b) {
1347 TEST_REQUIRES_X86_AVX;
1348 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1349 VAddMicrokernelTester()
1350 .batch_size(batch_size)
1351 .inplace_b(true)
1352 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1353 }
1354 }
1355
1356 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a_and_b) {
1357 TEST_REQUIRES_X86_AVX;
1358 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1359 VAddMicrokernelTester()
1360 .batch_size(batch_size)
1361 .inplace_a(true)
1362 .inplace_b(true)
1363 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1364 }
1365 }
1366
Marat Dukhan87bd5112021-08-02 11:43:53 -07001367 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_zero_point) {
1368 TEST_REQUIRES_X86_AVX;
1369 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1370 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1371 VAddMicrokernelTester()
1372 .batch_size(batch_size)
1373 .a_zero_point(a_zero_point)
1374 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1375 }
1376 }
1377 }
1378
1379 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_zero_point) {
1380 TEST_REQUIRES_X86_AVX;
1381 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1382 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1383 VAddMicrokernelTester()
1384 .batch_size(batch_size)
1385 .b_zero_point(b_zero_point)
1386 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1387 }
1388 }
1389 }
1390
1391 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_zero_point) {
1392 TEST_REQUIRES_X86_AVX;
1393 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1394 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1395 VAddMicrokernelTester()
1396 .batch_size(batch_size)
1397 .y_zero_point(y_zero_point)
1398 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1399 }
1400 }
1401 }
1402
1403 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_scale) {
1404 TEST_REQUIRES_X86_AVX;
1405 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1406 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1407 VAddMicrokernelTester()
1408 .batch_size(batch_size)
1409 .a_scale(a_scale)
1410 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1411 }
1412 }
1413 }
1414
1415 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_scale) {
1416 TEST_REQUIRES_X86_AVX;
1417 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1418 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1419 VAddMicrokernelTester()
1420 .batch_size(batch_size)
1421 .b_scale(b_scale)
1422 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1423 }
1424 }
1425 }
1426
1427 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_scale) {
1428 TEST_REQUIRES_X86_AVX;
1429 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1430 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1431 VAddMicrokernelTester()
1432 .batch_size(batch_size)
1433 .y_scale(y_scale)
1434 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1435 }
1436 }
1437 }
1438
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001439 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmin) {
1440 TEST_REQUIRES_X86_AVX;
1441 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1442 VAddMicrokernelTester()
1443 .batch_size(batch_size)
1444 .qmin(128)
1445 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1446 }
1447 }
1448
1449 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmax) {
1450 TEST_REQUIRES_X86_AVX;
1451 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1452 VAddMicrokernelTester()
1453 .batch_size(batch_size)
1454 .qmax(128)
1455 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1456 }
1457 }
1458#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1459
1460
1461#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1462 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_eq_16) {
1463 TEST_REQUIRES_X86_AVX;
1464 VAddMicrokernelTester()
1465 .batch_size(16)
1466 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1467 }
1468
1469 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_div_16) {
1470 TEST_REQUIRES_X86_AVX;
1471 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1472 VAddMicrokernelTester()
1473 .batch_size(batch_size)
1474 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1475 }
1476 }
1477
1478 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_lt_16) {
1479 TEST_REQUIRES_X86_AVX;
1480 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1481 VAddMicrokernelTester()
1482 .batch_size(batch_size)
1483 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1484 }
1485 }
1486
1487 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_gt_16) {
1488 TEST_REQUIRES_X86_AVX;
1489 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1490 VAddMicrokernelTester()
1491 .batch_size(batch_size)
1492 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1493 }
1494 }
1495
1496 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a) {
1497 TEST_REQUIRES_X86_AVX;
1498 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1499 VAddMicrokernelTester()
1500 .batch_size(batch_size)
1501 .inplace_a(true)
1502 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1503 }
1504 }
1505
1506 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_b) {
1507 TEST_REQUIRES_X86_AVX;
1508 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1509 VAddMicrokernelTester()
1510 .batch_size(batch_size)
1511 .inplace_b(true)
1512 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1513 }
1514 }
1515
1516 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a_and_b) {
1517 TEST_REQUIRES_X86_AVX;
1518 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1519 VAddMicrokernelTester()
1520 .batch_size(batch_size)
1521 .inplace_a(true)
1522 .inplace_b(true)
1523 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1524 }
1525 }
1526
Marat Dukhan87bd5112021-08-02 11:43:53 -07001527 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_zero_point) {
1528 TEST_REQUIRES_X86_AVX;
1529 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1530 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1531 VAddMicrokernelTester()
1532 .batch_size(batch_size)
1533 .a_zero_point(a_zero_point)
1534 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1535 }
1536 }
1537 }
1538
1539 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_zero_point) {
1540 TEST_REQUIRES_X86_AVX;
1541 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1542 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1543 VAddMicrokernelTester()
1544 .batch_size(batch_size)
1545 .b_zero_point(b_zero_point)
1546 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1547 }
1548 }
1549 }
1550
1551 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_zero_point) {
1552 TEST_REQUIRES_X86_AVX;
1553 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1554 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1555 VAddMicrokernelTester()
1556 .batch_size(batch_size)
1557 .y_zero_point(y_zero_point)
1558 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1559 }
1560 }
1561 }
1562
1563 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_scale) {
1564 TEST_REQUIRES_X86_AVX;
1565 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1566 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1567 VAddMicrokernelTester()
1568 .batch_size(batch_size)
1569 .a_scale(a_scale)
1570 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1571 }
1572 }
1573 }
1574
1575 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_scale) {
1576 TEST_REQUIRES_X86_AVX;
1577 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1578 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1579 VAddMicrokernelTester()
1580 .batch_size(batch_size)
1581 .b_scale(b_scale)
1582 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1583 }
1584 }
1585 }
1586
1587 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_scale) {
1588 TEST_REQUIRES_X86_AVX;
1589 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1590 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1591 VAddMicrokernelTester()
1592 .batch_size(batch_size)
1593 .y_scale(y_scale)
1594 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1595 }
1596 }
1597 }
1598
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001599 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmin) {
1600 TEST_REQUIRES_X86_AVX;
1601 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1602 VAddMicrokernelTester()
1603 .batch_size(batch_size)
1604 .qmin(128)
1605 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1606 }
1607 }
1608
1609 TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmax) {
1610 TEST_REQUIRES_X86_AVX;
1611 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1612 VAddMicrokernelTester()
1613 .batch_size(batch_size)
1614 .qmax(128)
1615 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1616 }
1617 }
1618#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1619
1620
1621#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1622 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
1623 TEST_REQUIRES_X86_SSE41;
1624 VAddMicrokernelTester()
1625 .batch_size(8)
1626 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1627 }
1628
1629 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
1630 TEST_REQUIRES_X86_SSE41;
1631 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1632 VAddMicrokernelTester()
1633 .batch_size(batch_size)
1634 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1635 }
1636 }
1637
1638 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
1639 TEST_REQUIRES_X86_SSE41;
1640 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1641 VAddMicrokernelTester()
1642 .batch_size(batch_size)
1643 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1644 }
1645 }
1646
1647 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
1648 TEST_REQUIRES_X86_SSE41;
1649 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1650 VAddMicrokernelTester()
1651 .batch_size(batch_size)
1652 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1653 }
1654 }
1655
1656 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a) {
1657 TEST_REQUIRES_X86_SSE41;
1658 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1659 VAddMicrokernelTester()
1660 .batch_size(batch_size)
1661 .inplace_a(true)
1662 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1663 }
1664 }
1665
1666 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_b) {
1667 TEST_REQUIRES_X86_SSE41;
1668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1669 VAddMicrokernelTester()
1670 .batch_size(batch_size)
1671 .inplace_b(true)
1672 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1673 }
1674 }
1675
1676 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a_and_b) {
1677 TEST_REQUIRES_X86_SSE41;
1678 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1679 VAddMicrokernelTester()
1680 .batch_size(batch_size)
1681 .inplace_a(true)
1682 .inplace_b(true)
1683 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1684 }
1685 }
1686
Marat Dukhan87bd5112021-08-02 11:43:53 -07001687 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
1688 TEST_REQUIRES_X86_SSE41;
1689 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1690 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1691 VAddMicrokernelTester()
1692 .batch_size(batch_size)
1693 .a_zero_point(a_zero_point)
1694 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1695 }
1696 }
1697 }
1698
1699 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
1700 TEST_REQUIRES_X86_SSE41;
1701 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1702 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1703 VAddMicrokernelTester()
1704 .batch_size(batch_size)
1705 .b_zero_point(b_zero_point)
1706 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1707 }
1708 }
1709 }
1710
1711 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
1712 TEST_REQUIRES_X86_SSE41;
1713 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1714 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1715 VAddMicrokernelTester()
1716 .batch_size(batch_size)
1717 .y_zero_point(y_zero_point)
1718 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1719 }
1720 }
1721 }
1722
1723 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
1724 TEST_REQUIRES_X86_SSE41;
1725 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1727 VAddMicrokernelTester()
1728 .batch_size(batch_size)
1729 .a_scale(a_scale)
1730 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1731 }
1732 }
1733 }
1734
1735 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
1736 TEST_REQUIRES_X86_SSE41;
1737 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1738 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1739 VAddMicrokernelTester()
1740 .batch_size(batch_size)
1741 .b_scale(b_scale)
1742 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1743 }
1744 }
1745 }
1746
1747 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
1748 TEST_REQUIRES_X86_SSE41;
1749 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1750 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1751 VAddMicrokernelTester()
1752 .batch_size(batch_size)
1753 .y_scale(y_scale)
1754 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1755 }
1756 }
1757 }
1758
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001759 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
1760 TEST_REQUIRES_X86_SSE41;
1761 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1762 VAddMicrokernelTester()
1763 .batch_size(batch_size)
1764 .qmin(128)
1765 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1766 }
1767 }
1768
1769 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
1770 TEST_REQUIRES_X86_SSE41;
1771 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772 VAddMicrokernelTester()
1773 .batch_size(batch_size)
1774 .qmax(128)
1775 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1776 }
1777 }
1778#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779
1780
1781#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1782 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
1783 TEST_REQUIRES_X86_SSE41;
1784 VAddMicrokernelTester()
1785 .batch_size(16)
1786 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1787 }
1788
1789 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
1790 TEST_REQUIRES_X86_SSE41;
1791 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1792 VAddMicrokernelTester()
1793 .batch_size(batch_size)
1794 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1795 }
1796 }
1797
1798 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
1799 TEST_REQUIRES_X86_SSE41;
1800 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1801 VAddMicrokernelTester()
1802 .batch_size(batch_size)
1803 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1804 }
1805 }
1806
1807 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
1808 TEST_REQUIRES_X86_SSE41;
1809 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1810 VAddMicrokernelTester()
1811 .batch_size(batch_size)
1812 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1813 }
1814 }
1815
1816 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a) {
1817 TEST_REQUIRES_X86_SSE41;
1818 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1819 VAddMicrokernelTester()
1820 .batch_size(batch_size)
1821 .inplace_a(true)
1822 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1823 }
1824 }
1825
1826 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_b) {
1827 TEST_REQUIRES_X86_SSE41;
1828 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1829 VAddMicrokernelTester()
1830 .batch_size(batch_size)
1831 .inplace_b(true)
1832 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1833 }
1834 }
1835
1836 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a_and_b) {
1837 TEST_REQUIRES_X86_SSE41;
1838 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1839 VAddMicrokernelTester()
1840 .batch_size(batch_size)
1841 .inplace_a(true)
1842 .inplace_b(true)
1843 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1844 }
1845 }
1846
Marat Dukhan87bd5112021-08-02 11:43:53 -07001847 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
1848 TEST_REQUIRES_X86_SSE41;
1849 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1850 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1851 VAddMicrokernelTester()
1852 .batch_size(batch_size)
1853 .a_zero_point(a_zero_point)
1854 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1855 }
1856 }
1857 }
1858
1859 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
1860 TEST_REQUIRES_X86_SSE41;
1861 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1862 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1863 VAddMicrokernelTester()
1864 .batch_size(batch_size)
1865 .b_zero_point(b_zero_point)
1866 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1867 }
1868 }
1869 }
1870
1871 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
1872 TEST_REQUIRES_X86_SSE41;
1873 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1874 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1875 VAddMicrokernelTester()
1876 .batch_size(batch_size)
1877 .y_zero_point(y_zero_point)
1878 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1879 }
1880 }
1881 }
1882
1883 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
1884 TEST_REQUIRES_X86_SSE41;
1885 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1886 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1887 VAddMicrokernelTester()
1888 .batch_size(batch_size)
1889 .a_scale(a_scale)
1890 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1891 }
1892 }
1893 }
1894
1895 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
1896 TEST_REQUIRES_X86_SSE41;
1897 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1898 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1899 VAddMicrokernelTester()
1900 .batch_size(batch_size)
1901 .b_scale(b_scale)
1902 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1903 }
1904 }
1905 }
1906
1907 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
1908 TEST_REQUIRES_X86_SSE41;
1909 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1910 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1911 VAddMicrokernelTester()
1912 .batch_size(batch_size)
1913 .y_scale(y_scale)
1914 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1915 }
1916 }
1917 }
1918
Marat Dukhan3eac69c2021-07-21 01:42:29 -07001919 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
1920 TEST_REQUIRES_X86_SSE41;
1921 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1922 VAddMicrokernelTester()
1923 .batch_size(batch_size)
1924 .qmin(128)
1925 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1926 }
1927 }
1928
1929 TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
1930 TEST_REQUIRES_X86_SSE41;
1931 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932 VAddMicrokernelTester()
1933 .batch_size(batch_size)
1934 .qmax(128)
1935 .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1936 }
1937 }
1938#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1939
1940
1941#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1942 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_eq_8) {
1943 TEST_REQUIRES_X86_AVX;
1944 VAddMicrokernelTester()
1945 .batch_size(8)
1946 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1947 }
1948
1949 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_div_8) {
1950 TEST_REQUIRES_X86_AVX;
1951 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1952 VAddMicrokernelTester()
1953 .batch_size(batch_size)
1954 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1955 }
1956 }
1957
1958 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_lt_8) {
1959 TEST_REQUIRES_X86_AVX;
1960 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1961 VAddMicrokernelTester()
1962 .batch_size(batch_size)
1963 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1964 }
1965 }
1966
1967 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_gt_8) {
1968 TEST_REQUIRES_X86_AVX;
1969 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1970 VAddMicrokernelTester()
1971 .batch_size(batch_size)
1972 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1973 }
1974 }
1975
1976 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a) {
1977 TEST_REQUIRES_X86_AVX;
1978 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1979 VAddMicrokernelTester()
1980 .batch_size(batch_size)
1981 .inplace_a(true)
1982 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1983 }
1984 }
1985
1986 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_b) {
1987 TEST_REQUIRES_X86_AVX;
1988 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1989 VAddMicrokernelTester()
1990 .batch_size(batch_size)
1991 .inplace_b(true)
1992 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1993 }
1994 }
1995
1996 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a_and_b) {
1997 TEST_REQUIRES_X86_AVX;
1998 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1999 VAddMicrokernelTester()
2000 .batch_size(batch_size)
2001 .inplace_a(true)
2002 .inplace_b(true)
2003 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2004 }
2005 }
2006
Marat Dukhan87bd5112021-08-02 11:43:53 -07002007 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_zero_point) {
2008 TEST_REQUIRES_X86_AVX;
2009 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2010 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2011 VAddMicrokernelTester()
2012 .batch_size(batch_size)
2013 .a_zero_point(a_zero_point)
2014 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2015 }
2016 }
2017 }
2018
2019 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_zero_point) {
2020 TEST_REQUIRES_X86_AVX;
2021 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2022 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2023 VAddMicrokernelTester()
2024 .batch_size(batch_size)
2025 .b_zero_point(b_zero_point)
2026 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2027 }
2028 }
2029 }
2030
2031 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_zero_point) {
2032 TEST_REQUIRES_X86_AVX;
2033 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2034 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2035 VAddMicrokernelTester()
2036 .batch_size(batch_size)
2037 .y_zero_point(y_zero_point)
2038 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2039 }
2040 }
2041 }
2042
2043 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_scale) {
2044 TEST_REQUIRES_X86_AVX;
2045 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2046 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2047 VAddMicrokernelTester()
2048 .batch_size(batch_size)
2049 .a_scale(a_scale)
2050 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2051 }
2052 }
2053 }
2054
2055 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_scale) {
2056 TEST_REQUIRES_X86_AVX;
2057 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2058 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2059 VAddMicrokernelTester()
2060 .batch_size(batch_size)
2061 .b_scale(b_scale)
2062 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2063 }
2064 }
2065 }
2066
2067 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_scale) {
2068 TEST_REQUIRES_X86_AVX;
2069 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2070 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2071 VAddMicrokernelTester()
2072 .batch_size(batch_size)
2073 .y_scale(y_scale)
2074 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2075 }
2076 }
2077 }
2078
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002079 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmin) {
2080 TEST_REQUIRES_X86_AVX;
2081 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2082 VAddMicrokernelTester()
2083 .batch_size(batch_size)
2084 .qmin(128)
2085 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2086 }
2087 }
2088
2089 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmax) {
2090 TEST_REQUIRES_X86_AVX;
2091 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2092 VAddMicrokernelTester()
2093 .batch_size(batch_size)
2094 .qmax(128)
2095 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2096 }
2097 }
2098#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2099
2100
2101#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2102 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_eq_16) {
2103 TEST_REQUIRES_X86_AVX;
2104 VAddMicrokernelTester()
2105 .batch_size(16)
2106 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2107 }
2108
2109 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_div_16) {
2110 TEST_REQUIRES_X86_AVX;
2111 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2112 VAddMicrokernelTester()
2113 .batch_size(batch_size)
2114 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2115 }
2116 }
2117
2118 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_lt_16) {
2119 TEST_REQUIRES_X86_AVX;
2120 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2121 VAddMicrokernelTester()
2122 .batch_size(batch_size)
2123 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2124 }
2125 }
2126
2127 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_gt_16) {
2128 TEST_REQUIRES_X86_AVX;
2129 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2130 VAddMicrokernelTester()
2131 .batch_size(batch_size)
2132 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2133 }
2134 }
2135
2136 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a) {
2137 TEST_REQUIRES_X86_AVX;
2138 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2139 VAddMicrokernelTester()
2140 .batch_size(batch_size)
2141 .inplace_a(true)
2142 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2143 }
2144 }
2145
2146 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_b) {
2147 TEST_REQUIRES_X86_AVX;
2148 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2149 VAddMicrokernelTester()
2150 .batch_size(batch_size)
2151 .inplace_b(true)
2152 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2153 }
2154 }
2155
2156 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a_and_b) {
2157 TEST_REQUIRES_X86_AVX;
2158 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2159 VAddMicrokernelTester()
2160 .batch_size(batch_size)
2161 .inplace_a(true)
2162 .inplace_b(true)
2163 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2164 }
2165 }
2166
Marat Dukhan87bd5112021-08-02 11:43:53 -07002167 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_zero_point) {
2168 TEST_REQUIRES_X86_AVX;
2169 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2170 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2171 VAddMicrokernelTester()
2172 .batch_size(batch_size)
2173 .a_zero_point(a_zero_point)
2174 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2175 }
2176 }
2177 }
2178
2179 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_zero_point) {
2180 TEST_REQUIRES_X86_AVX;
2181 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2182 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2183 VAddMicrokernelTester()
2184 .batch_size(batch_size)
2185 .b_zero_point(b_zero_point)
2186 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2187 }
2188 }
2189 }
2190
2191 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_zero_point) {
2192 TEST_REQUIRES_X86_AVX;
2193 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2194 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2195 VAddMicrokernelTester()
2196 .batch_size(batch_size)
2197 .y_zero_point(y_zero_point)
2198 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2199 }
2200 }
2201 }
2202
2203 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_scale) {
2204 TEST_REQUIRES_X86_AVX;
2205 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2206 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2207 VAddMicrokernelTester()
2208 .batch_size(batch_size)
2209 .a_scale(a_scale)
2210 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2211 }
2212 }
2213 }
2214
2215 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_scale) {
2216 TEST_REQUIRES_X86_AVX;
2217 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2218 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2219 VAddMicrokernelTester()
2220 .batch_size(batch_size)
2221 .b_scale(b_scale)
2222 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2223 }
2224 }
2225 }
2226
2227 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_scale) {
2228 TEST_REQUIRES_X86_AVX;
2229 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2230 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2231 VAddMicrokernelTester()
2232 .batch_size(batch_size)
2233 .y_scale(y_scale)
2234 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2235 }
2236 }
2237 }
2238
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002239 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmin) {
2240 TEST_REQUIRES_X86_AVX;
2241 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2242 VAddMicrokernelTester()
2243 .batch_size(batch_size)
2244 .qmin(128)
2245 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2246 }
2247 }
2248
2249 TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmax) {
2250 TEST_REQUIRES_X86_AVX;
2251 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2252 VAddMicrokernelTester()
2253 .batch_size(batch_size)
2254 .qmax(128)
2255 .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2256 }
2257 }
2258#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2259
2260
2261#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2262 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
2263 TEST_REQUIRES_X86_XOP;
2264 VAddMicrokernelTester()
2265 .batch_size(8)
2266 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2267 }
2268
2269 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
2270 TEST_REQUIRES_X86_XOP;
2271 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2272 VAddMicrokernelTester()
2273 .batch_size(batch_size)
2274 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2275 }
2276 }
2277
2278 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
2279 TEST_REQUIRES_X86_XOP;
2280 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2281 VAddMicrokernelTester()
2282 .batch_size(batch_size)
2283 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2284 }
2285 }
2286
2287 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
2288 TEST_REQUIRES_X86_XOP;
2289 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2290 VAddMicrokernelTester()
2291 .batch_size(batch_size)
2292 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2293 }
2294 }
2295
2296 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a) {
2297 TEST_REQUIRES_X86_XOP;
2298 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2299 VAddMicrokernelTester()
2300 .batch_size(batch_size)
2301 .inplace_a(true)
2302 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2303 }
2304 }
2305
2306 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_b) {
2307 TEST_REQUIRES_X86_XOP;
2308 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2309 VAddMicrokernelTester()
2310 .batch_size(batch_size)
2311 .inplace_b(true)
2312 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2313 }
2314 }
2315
2316 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a_and_b) {
2317 TEST_REQUIRES_X86_XOP;
2318 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2319 VAddMicrokernelTester()
2320 .batch_size(batch_size)
2321 .inplace_a(true)
2322 .inplace_b(true)
2323 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2324 }
2325 }
2326
Marat Dukhan87bd5112021-08-02 11:43:53 -07002327 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
2328 TEST_REQUIRES_X86_XOP;
2329 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2330 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2331 VAddMicrokernelTester()
2332 .batch_size(batch_size)
2333 .a_zero_point(a_zero_point)
2334 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2335 }
2336 }
2337 }
2338
2339 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
2340 TEST_REQUIRES_X86_XOP;
2341 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2342 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2343 VAddMicrokernelTester()
2344 .batch_size(batch_size)
2345 .b_zero_point(b_zero_point)
2346 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2347 }
2348 }
2349 }
2350
2351 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
2352 TEST_REQUIRES_X86_XOP;
2353 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2354 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2355 VAddMicrokernelTester()
2356 .batch_size(batch_size)
2357 .y_zero_point(y_zero_point)
2358 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2359 }
2360 }
2361 }
2362
2363 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
2364 TEST_REQUIRES_X86_XOP;
2365 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2366 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2367 VAddMicrokernelTester()
2368 .batch_size(batch_size)
2369 .a_scale(a_scale)
2370 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2371 }
2372 }
2373 }
2374
2375 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
2376 TEST_REQUIRES_X86_XOP;
2377 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2378 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2379 VAddMicrokernelTester()
2380 .batch_size(batch_size)
2381 .b_scale(b_scale)
2382 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2383 }
2384 }
2385 }
2386
2387 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
2388 TEST_REQUIRES_X86_XOP;
2389 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2390 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2391 VAddMicrokernelTester()
2392 .batch_size(batch_size)
2393 .y_scale(y_scale)
2394 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2395 }
2396 }
2397 }
2398
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002399 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmin) {
2400 TEST_REQUIRES_X86_XOP;
2401 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2402 VAddMicrokernelTester()
2403 .batch_size(batch_size)
2404 .qmin(128)
2405 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2406 }
2407 }
2408
2409 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmax) {
2410 TEST_REQUIRES_X86_XOP;
2411 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2412 VAddMicrokernelTester()
2413 .batch_size(batch_size)
2414 .qmax(128)
2415 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2416 }
2417 }
2418#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2419
2420
2421#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2422 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
2423 TEST_REQUIRES_X86_XOP;
2424 VAddMicrokernelTester()
2425 .batch_size(16)
2426 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2427 }
2428
2429 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
2430 TEST_REQUIRES_X86_XOP;
2431 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2432 VAddMicrokernelTester()
2433 .batch_size(batch_size)
2434 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2435 }
2436 }
2437
2438 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
2439 TEST_REQUIRES_X86_XOP;
2440 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2441 VAddMicrokernelTester()
2442 .batch_size(batch_size)
2443 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2444 }
2445 }
2446
2447 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
2448 TEST_REQUIRES_X86_XOP;
2449 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2450 VAddMicrokernelTester()
2451 .batch_size(batch_size)
2452 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2453 }
2454 }
2455
2456 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a) {
2457 TEST_REQUIRES_X86_XOP;
2458 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2459 VAddMicrokernelTester()
2460 .batch_size(batch_size)
2461 .inplace_a(true)
2462 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2463 }
2464 }
2465
2466 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_b) {
2467 TEST_REQUIRES_X86_XOP;
2468 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2469 VAddMicrokernelTester()
2470 .batch_size(batch_size)
2471 .inplace_b(true)
2472 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2473 }
2474 }
2475
2476 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a_and_b) {
2477 TEST_REQUIRES_X86_XOP;
2478 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2479 VAddMicrokernelTester()
2480 .batch_size(batch_size)
2481 .inplace_a(true)
2482 .inplace_b(true)
2483 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2484 }
2485 }
2486
Marat Dukhan87bd5112021-08-02 11:43:53 -07002487 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
2488 TEST_REQUIRES_X86_XOP;
2489 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2490 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2491 VAddMicrokernelTester()
2492 .batch_size(batch_size)
2493 .a_zero_point(a_zero_point)
2494 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2495 }
2496 }
2497 }
2498
2499 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
2500 TEST_REQUIRES_X86_XOP;
2501 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2502 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2503 VAddMicrokernelTester()
2504 .batch_size(batch_size)
2505 .b_zero_point(b_zero_point)
2506 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2507 }
2508 }
2509 }
2510
2511 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
2512 TEST_REQUIRES_X86_XOP;
2513 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2514 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2515 VAddMicrokernelTester()
2516 .batch_size(batch_size)
2517 .y_zero_point(y_zero_point)
2518 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2519 }
2520 }
2521 }
2522
2523 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
2524 TEST_REQUIRES_X86_XOP;
2525 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2526 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2527 VAddMicrokernelTester()
2528 .batch_size(batch_size)
2529 .a_scale(a_scale)
2530 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2531 }
2532 }
2533 }
2534
2535 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
2536 TEST_REQUIRES_X86_XOP;
2537 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2538 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2539 VAddMicrokernelTester()
2540 .batch_size(batch_size)
2541 .b_scale(b_scale)
2542 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2543 }
2544 }
2545 }
2546
2547 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
2548 TEST_REQUIRES_X86_XOP;
2549 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2550 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2551 VAddMicrokernelTester()
2552 .batch_size(batch_size)
2553 .y_scale(y_scale)
2554 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2555 }
2556 }
2557 }
2558
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002559 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmin) {
2560 TEST_REQUIRES_X86_XOP;
2561 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2562 VAddMicrokernelTester()
2563 .batch_size(batch_size)
2564 .qmin(128)
2565 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2566 }
2567 }
2568
2569 TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmax) {
2570 TEST_REQUIRES_X86_XOP;
2571 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2572 VAddMicrokernelTester()
2573 .batch_size(batch_size)
2574 .qmax(128)
2575 .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2576 }
2577 }
2578#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2579
2580
2581#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2582 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
2583 TEST_REQUIRES_X86_AVX2;
2584 VAddMicrokernelTester()
2585 .batch_size(8)
2586 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2587 }
2588
2589 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
2590 TEST_REQUIRES_X86_AVX2;
2591 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2592 VAddMicrokernelTester()
2593 .batch_size(batch_size)
2594 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2595 }
2596 }
2597
2598 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
2599 TEST_REQUIRES_X86_AVX2;
2600 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2601 VAddMicrokernelTester()
2602 .batch_size(batch_size)
2603 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2604 }
2605 }
2606
2607 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
2608 TEST_REQUIRES_X86_AVX2;
2609 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2610 VAddMicrokernelTester()
2611 .batch_size(batch_size)
2612 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2613 }
2614 }
2615
2616 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a) {
2617 TEST_REQUIRES_X86_AVX2;
2618 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2619 VAddMicrokernelTester()
2620 .batch_size(batch_size)
2621 .inplace_a(true)
2622 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2623 }
2624 }
2625
2626 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_b) {
2627 TEST_REQUIRES_X86_AVX2;
2628 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2629 VAddMicrokernelTester()
2630 .batch_size(batch_size)
2631 .inplace_b(true)
2632 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2633 }
2634 }
2635
2636 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a_and_b) {
2637 TEST_REQUIRES_X86_AVX2;
2638 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2639 VAddMicrokernelTester()
2640 .batch_size(batch_size)
2641 .inplace_a(true)
2642 .inplace_b(true)
2643 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2644 }
2645 }
2646
Marat Dukhan87bd5112021-08-02 11:43:53 -07002647 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
2648 TEST_REQUIRES_X86_AVX2;
2649 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2650 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2651 VAddMicrokernelTester()
2652 .batch_size(batch_size)
2653 .a_zero_point(a_zero_point)
2654 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2655 }
2656 }
2657 }
2658
2659 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
2660 TEST_REQUIRES_X86_AVX2;
2661 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2662 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2663 VAddMicrokernelTester()
2664 .batch_size(batch_size)
2665 .b_zero_point(b_zero_point)
2666 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2667 }
2668 }
2669 }
2670
2671 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
2672 TEST_REQUIRES_X86_AVX2;
2673 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2674 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2675 VAddMicrokernelTester()
2676 .batch_size(batch_size)
2677 .y_zero_point(y_zero_point)
2678 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2679 }
2680 }
2681 }
2682
2683 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
2684 TEST_REQUIRES_X86_AVX2;
2685 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2686 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2687 VAddMicrokernelTester()
2688 .batch_size(batch_size)
2689 .a_scale(a_scale)
2690 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2691 }
2692 }
2693 }
2694
2695 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
2696 TEST_REQUIRES_X86_AVX2;
2697 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2698 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2699 VAddMicrokernelTester()
2700 .batch_size(batch_size)
2701 .b_scale(b_scale)
2702 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2703 }
2704 }
2705 }
2706
2707 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
2708 TEST_REQUIRES_X86_AVX2;
2709 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2710 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2711 VAddMicrokernelTester()
2712 .batch_size(batch_size)
2713 .y_scale(y_scale)
2714 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2715 }
2716 }
2717 }
2718
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002719 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
2720 TEST_REQUIRES_X86_AVX2;
2721 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2722 VAddMicrokernelTester()
2723 .batch_size(batch_size)
2724 .qmin(128)
2725 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2726 }
2727 }
2728
2729 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
2730 TEST_REQUIRES_X86_AVX2;
2731 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2732 VAddMicrokernelTester()
2733 .batch_size(batch_size)
2734 .qmax(128)
2735 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2736 }
2737 }
2738#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2739
2740
2741#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2742 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
2743 TEST_REQUIRES_X86_AVX2;
2744 VAddMicrokernelTester()
2745 .batch_size(16)
2746 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2747 }
2748
2749 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
2750 TEST_REQUIRES_X86_AVX2;
2751 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2752 VAddMicrokernelTester()
2753 .batch_size(batch_size)
2754 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2755 }
2756 }
2757
2758 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
2759 TEST_REQUIRES_X86_AVX2;
2760 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2761 VAddMicrokernelTester()
2762 .batch_size(batch_size)
2763 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2764 }
2765 }
2766
2767 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
2768 TEST_REQUIRES_X86_AVX2;
2769 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2770 VAddMicrokernelTester()
2771 .batch_size(batch_size)
2772 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2773 }
2774 }
2775
2776 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a) {
2777 TEST_REQUIRES_X86_AVX2;
2778 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2779 VAddMicrokernelTester()
2780 .batch_size(batch_size)
2781 .inplace_a(true)
2782 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2783 }
2784 }
2785
2786 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_b) {
2787 TEST_REQUIRES_X86_AVX2;
2788 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2789 VAddMicrokernelTester()
2790 .batch_size(batch_size)
2791 .inplace_b(true)
2792 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2793 }
2794 }
2795
2796 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a_and_b) {
2797 TEST_REQUIRES_X86_AVX2;
2798 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2799 VAddMicrokernelTester()
2800 .batch_size(batch_size)
2801 .inplace_a(true)
2802 .inplace_b(true)
2803 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2804 }
2805 }
2806
Marat Dukhan87bd5112021-08-02 11:43:53 -07002807 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
2808 TEST_REQUIRES_X86_AVX2;
2809 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2810 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2811 VAddMicrokernelTester()
2812 .batch_size(batch_size)
2813 .a_zero_point(a_zero_point)
2814 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2815 }
2816 }
2817 }
2818
2819 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
2820 TEST_REQUIRES_X86_AVX2;
2821 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2822 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2823 VAddMicrokernelTester()
2824 .batch_size(batch_size)
2825 .b_zero_point(b_zero_point)
2826 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2827 }
2828 }
2829 }
2830
2831 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
2832 TEST_REQUIRES_X86_AVX2;
2833 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2834 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2835 VAddMicrokernelTester()
2836 .batch_size(batch_size)
2837 .y_zero_point(y_zero_point)
2838 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2839 }
2840 }
2841 }
2842
2843 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
2844 TEST_REQUIRES_X86_AVX2;
2845 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2846 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2847 VAddMicrokernelTester()
2848 .batch_size(batch_size)
2849 .a_scale(a_scale)
2850 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2851 }
2852 }
2853 }
2854
2855 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
2856 TEST_REQUIRES_X86_AVX2;
2857 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2858 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2859 VAddMicrokernelTester()
2860 .batch_size(batch_size)
2861 .b_scale(b_scale)
2862 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2863 }
2864 }
2865 }
2866
2867 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
2868 TEST_REQUIRES_X86_AVX2;
2869 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2870 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2871 VAddMicrokernelTester()
2872 .batch_size(batch_size)
2873 .y_scale(y_scale)
2874 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2875 }
2876 }
2877 }
2878
Marat Dukhan3eac69c2021-07-21 01:42:29 -07002879 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
2880 TEST_REQUIRES_X86_AVX2;
2881 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2882 VAddMicrokernelTester()
2883 .batch_size(batch_size)
2884 .qmin(128)
2885 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2886 }
2887 }
2888
2889 TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
2890 TEST_REQUIRES_X86_AVX2;
2891 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2892 VAddMicrokernelTester()
2893 .batch_size(batch_size)
2894 .qmax(128)
2895 .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2896 }
2897 }
2898#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2899
2900
Marat Dukhane76049a2021-07-22 14:48:59 -07002901#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2902 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_eq_16) {
2903 TEST_REQUIRES_X86_AVX512SKX;
2904 VAddMicrokernelTester()
2905 .batch_size(16)
2906 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2907 }
2908
2909 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_div_16) {
2910 TEST_REQUIRES_X86_AVX512SKX;
2911 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2912 VAddMicrokernelTester()
2913 .batch_size(batch_size)
2914 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2915 }
2916 }
2917
2918 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_lt_16) {
2919 TEST_REQUIRES_X86_AVX512SKX;
2920 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2921 VAddMicrokernelTester()
2922 .batch_size(batch_size)
2923 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2924 }
2925 }
2926
2927 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_gt_16) {
2928 TEST_REQUIRES_X86_AVX512SKX;
2929 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2930 VAddMicrokernelTester()
2931 .batch_size(batch_size)
2932 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2933 }
2934 }
2935
2936 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a) {
2937 TEST_REQUIRES_X86_AVX512SKX;
2938 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2939 VAddMicrokernelTester()
2940 .batch_size(batch_size)
2941 .inplace_a(true)
2942 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2943 }
2944 }
2945
2946 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_b) {
2947 TEST_REQUIRES_X86_AVX512SKX;
2948 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2949 VAddMicrokernelTester()
2950 .batch_size(batch_size)
2951 .inplace_b(true)
2952 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2953 }
2954 }
2955
2956 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a_and_b) {
2957 TEST_REQUIRES_X86_AVX512SKX;
2958 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2959 VAddMicrokernelTester()
2960 .batch_size(batch_size)
2961 .inplace_a(true)
2962 .inplace_b(true)
2963 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2964 }
2965 }
2966
Marat Dukhan87bd5112021-08-02 11:43:53 -07002967 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_zero_point) {
2968 TEST_REQUIRES_X86_AVX512SKX;
2969 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2970 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2971 VAddMicrokernelTester()
2972 .batch_size(batch_size)
2973 .a_zero_point(a_zero_point)
2974 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2975 }
2976 }
2977 }
2978
2979 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_zero_point) {
2980 TEST_REQUIRES_X86_AVX512SKX;
2981 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2982 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2983 VAddMicrokernelTester()
2984 .batch_size(batch_size)
2985 .b_zero_point(b_zero_point)
2986 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2987 }
2988 }
2989 }
2990
2991 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_zero_point) {
2992 TEST_REQUIRES_X86_AVX512SKX;
2993 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2994 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2995 VAddMicrokernelTester()
2996 .batch_size(batch_size)
2997 .y_zero_point(y_zero_point)
2998 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2999 }
3000 }
3001 }
3002
3003 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_scale) {
3004 TEST_REQUIRES_X86_AVX512SKX;
3005 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3006 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3007 VAddMicrokernelTester()
3008 .batch_size(batch_size)
3009 .a_scale(a_scale)
3010 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3011 }
3012 }
3013 }
3014
3015 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_scale) {
3016 TEST_REQUIRES_X86_AVX512SKX;
3017 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3018 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3019 VAddMicrokernelTester()
3020 .batch_size(batch_size)
3021 .b_scale(b_scale)
3022 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3023 }
3024 }
3025 }
3026
3027 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_scale) {
3028 TEST_REQUIRES_X86_AVX512SKX;
3029 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3030 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3031 VAddMicrokernelTester()
3032 .batch_size(batch_size)
3033 .y_scale(y_scale)
3034 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3035 }
3036 }
3037 }
3038
Marat Dukhane76049a2021-07-22 14:48:59 -07003039 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmin) {
3040 TEST_REQUIRES_X86_AVX512SKX;
3041 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3042 VAddMicrokernelTester()
3043 .batch_size(batch_size)
3044 .qmin(128)
3045 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3046 }
3047 }
3048
3049 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmax) {
3050 TEST_REQUIRES_X86_AVX512SKX;
3051 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3052 VAddMicrokernelTester()
3053 .batch_size(batch_size)
3054 .qmax(128)
3055 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3056 }
3057 }
3058#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3059
3060
3061#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3062 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_eq_32) {
3063 TEST_REQUIRES_X86_AVX512SKX;
3064 VAddMicrokernelTester()
3065 .batch_size(32)
3066 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3067 }
3068
3069 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_div_32) {
3070 TEST_REQUIRES_X86_AVX512SKX;
3071 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3072 VAddMicrokernelTester()
3073 .batch_size(batch_size)
3074 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3075 }
3076 }
3077
3078 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_lt_32) {
3079 TEST_REQUIRES_X86_AVX512SKX;
3080 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3081 VAddMicrokernelTester()
3082 .batch_size(batch_size)
3083 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3084 }
3085 }
3086
3087 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_gt_32) {
3088 TEST_REQUIRES_X86_AVX512SKX;
3089 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3090 VAddMicrokernelTester()
3091 .batch_size(batch_size)
3092 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3093 }
3094 }
3095
3096 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a) {
3097 TEST_REQUIRES_X86_AVX512SKX;
3098 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3099 VAddMicrokernelTester()
3100 .batch_size(batch_size)
3101 .inplace_a(true)
3102 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3103 }
3104 }
3105
3106 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_b) {
3107 TEST_REQUIRES_X86_AVX512SKX;
3108 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3109 VAddMicrokernelTester()
3110 .batch_size(batch_size)
3111 .inplace_b(true)
3112 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3113 }
3114 }
3115
3116 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a_and_b) {
3117 TEST_REQUIRES_X86_AVX512SKX;
3118 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3119 VAddMicrokernelTester()
3120 .batch_size(batch_size)
3121 .inplace_a(true)
3122 .inplace_b(true)
3123 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3124 }
3125 }
3126
Marat Dukhan87bd5112021-08-02 11:43:53 -07003127 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_zero_point) {
3128 TEST_REQUIRES_X86_AVX512SKX;
3129 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3130 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3131 VAddMicrokernelTester()
3132 .batch_size(batch_size)
3133 .a_zero_point(a_zero_point)
3134 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3135 }
3136 }
3137 }
3138
3139 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_zero_point) {
3140 TEST_REQUIRES_X86_AVX512SKX;
3141 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3142 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3143 VAddMicrokernelTester()
3144 .batch_size(batch_size)
3145 .b_zero_point(b_zero_point)
3146 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3147 }
3148 }
3149 }
3150
3151 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_zero_point) {
3152 TEST_REQUIRES_X86_AVX512SKX;
3153 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3154 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3155 VAddMicrokernelTester()
3156 .batch_size(batch_size)
3157 .y_zero_point(y_zero_point)
3158 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3159 }
3160 }
3161 }
3162
3163 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_scale) {
3164 TEST_REQUIRES_X86_AVX512SKX;
3165 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3166 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3167 VAddMicrokernelTester()
3168 .batch_size(batch_size)
3169 .a_scale(a_scale)
3170 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3171 }
3172 }
3173 }
3174
3175 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_scale) {
3176 TEST_REQUIRES_X86_AVX512SKX;
3177 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3178 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3179 VAddMicrokernelTester()
3180 .batch_size(batch_size)
3181 .b_scale(b_scale)
3182 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3183 }
3184 }
3185 }
3186
3187 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_scale) {
3188 TEST_REQUIRES_X86_AVX512SKX;
3189 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3190 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3191 VAddMicrokernelTester()
3192 .batch_size(batch_size)
3193 .y_scale(y_scale)
3194 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3195 }
3196 }
3197 }
3198
Marat Dukhane76049a2021-07-22 14:48:59 -07003199 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmin) {
3200 TEST_REQUIRES_X86_AVX512SKX;
3201 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3202 VAddMicrokernelTester()
3203 .batch_size(batch_size)
3204 .qmin(128)
3205 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3206 }
3207 }
3208
3209 TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmax) {
3210 TEST_REQUIRES_X86_AVX512SKX;
3211 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3212 VAddMicrokernelTester()
3213 .batch_size(batch_size)
3214 .qmax(128)
3215 .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3216 }
3217 }
3218#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3219
3220
Marat Dukhan76e78c82021-07-20 21:11:23 -07003221#if XNN_ARCH_WASMSIMD
3222 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_eq_8) {
3223 VAddMicrokernelTester()
3224 .batch_size(8)
3225 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3226 }
3227
3228 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_div_8) {
3229 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3230 VAddMicrokernelTester()
3231 .batch_size(batch_size)
3232 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3233 }
3234 }
3235
3236 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_lt_8) {
3237 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3238 VAddMicrokernelTester()
3239 .batch_size(batch_size)
3240 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3241 }
3242 }
3243
3244 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_gt_8) {
3245 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3246 VAddMicrokernelTester()
3247 .batch_size(batch_size)
3248 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3249 }
3250 }
3251
3252 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, inplace_a) {
3253 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3254 VAddMicrokernelTester()
3255 .batch_size(batch_size)
3256 .inplace_a(true)
3257 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3258 }
3259 }
3260
3261 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, inplace_b) {
3262 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3263 VAddMicrokernelTester()
3264 .batch_size(batch_size)
3265 .inplace_b(true)
3266 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3267 }
3268 }
3269
3270 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, inplace_a_and_b) {
3271 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3272 VAddMicrokernelTester()
3273 .batch_size(batch_size)
3274 .inplace_a(true)
3275 .inplace_b(true)
3276 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3277 }
3278 }
3279
Marat Dukhan87bd5112021-08-02 11:43:53 -07003280 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, a_zero_point) {
3281 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3282 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3283 VAddMicrokernelTester()
3284 .batch_size(batch_size)
3285 .a_zero_point(a_zero_point)
3286 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3287 }
3288 }
3289 }
3290
3291 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, b_zero_point) {
3292 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3293 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3294 VAddMicrokernelTester()
3295 .batch_size(batch_size)
3296 .b_zero_point(b_zero_point)
3297 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3298 }
3299 }
3300 }
3301
3302 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, y_zero_point) {
3303 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3304 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3305 VAddMicrokernelTester()
3306 .batch_size(batch_size)
3307 .y_zero_point(y_zero_point)
3308 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3309 }
3310 }
3311 }
3312
3313 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, a_scale) {
3314 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3315 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3316 VAddMicrokernelTester()
3317 .batch_size(batch_size)
3318 .a_scale(a_scale)
3319 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3320 }
3321 }
3322 }
3323
3324 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, b_scale) {
3325 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3326 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3327 VAddMicrokernelTester()
3328 .batch_size(batch_size)
3329 .b_scale(b_scale)
3330 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3331 }
3332 }
3333 }
3334
3335 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, y_scale) {
3336 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3337 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3338 VAddMicrokernelTester()
3339 .batch_size(batch_size)
3340 .y_scale(y_scale)
3341 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3342 }
3343 }
3344 }
3345
Marat Dukhan76e78c82021-07-20 21:11:23 -07003346 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, qmin) {
3347 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3348 VAddMicrokernelTester()
3349 .batch_size(batch_size)
3350 .qmin(128)
3351 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3352 }
3353 }
3354
3355 TEST(QU8_VADD_MINMAX__WASMSIMD_X8, qmax) {
3356 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3357 VAddMicrokernelTester()
3358 .batch_size(batch_size)
3359 .qmax(128)
3360 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3361 }
3362 }
3363#endif // XNN_ARCH_WASMSIMD
3364
3365
3366#if XNN_ARCH_WASMSIMD
3367 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_eq_16) {
3368 VAddMicrokernelTester()
3369 .batch_size(16)
3370 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3371 }
3372
3373 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_div_16) {
3374 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3375 VAddMicrokernelTester()
3376 .batch_size(batch_size)
3377 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3378 }
3379 }
3380
3381 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_lt_16) {
3382 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3383 VAddMicrokernelTester()
3384 .batch_size(batch_size)
3385 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3386 }
3387 }
3388
3389 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_gt_16) {
3390 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3391 VAddMicrokernelTester()
3392 .batch_size(batch_size)
3393 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3394 }
3395 }
3396
3397 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, inplace_a) {
3398 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3399 VAddMicrokernelTester()
3400 .batch_size(batch_size)
3401 .inplace_a(true)
3402 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3403 }
3404 }
3405
3406 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, inplace_b) {
3407 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3408 VAddMicrokernelTester()
3409 .batch_size(batch_size)
3410 .inplace_b(true)
3411 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3412 }
3413 }
3414
3415 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, inplace_a_and_b) {
3416 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3417 VAddMicrokernelTester()
3418 .batch_size(batch_size)
3419 .inplace_a(true)
3420 .inplace_b(true)
3421 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3422 }
3423 }
3424
Marat Dukhan87bd5112021-08-02 11:43:53 -07003425 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, a_zero_point) {
3426 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3427 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3428 VAddMicrokernelTester()
3429 .batch_size(batch_size)
3430 .a_zero_point(a_zero_point)
3431 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3432 }
3433 }
3434 }
3435
3436 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, b_zero_point) {
3437 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3438 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3439 VAddMicrokernelTester()
3440 .batch_size(batch_size)
3441 .b_zero_point(b_zero_point)
3442 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3443 }
3444 }
3445 }
3446
3447 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, y_zero_point) {
3448 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3449 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3450 VAddMicrokernelTester()
3451 .batch_size(batch_size)
3452 .y_zero_point(y_zero_point)
3453 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3454 }
3455 }
3456 }
3457
3458 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, a_scale) {
3459 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3460 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3461 VAddMicrokernelTester()
3462 .batch_size(batch_size)
3463 .a_scale(a_scale)
3464 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3465 }
3466 }
3467 }
3468
3469 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, b_scale) {
3470 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3471 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3472 VAddMicrokernelTester()
3473 .batch_size(batch_size)
3474 .b_scale(b_scale)
3475 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3476 }
3477 }
3478 }
3479
3480 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, y_scale) {
3481 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3482 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3483 VAddMicrokernelTester()
3484 .batch_size(batch_size)
3485 .y_scale(y_scale)
3486 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3487 }
3488 }
3489 }
3490
Marat Dukhan76e78c82021-07-20 21:11:23 -07003491 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, qmin) {
3492 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3493 VAddMicrokernelTester()
3494 .batch_size(batch_size)
3495 .qmin(128)
3496 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3497 }
3498 }
3499
3500 TEST(QU8_VADD_MINMAX__WASMSIMD_X16, qmax) {
3501 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3502 VAddMicrokernelTester()
3503 .batch_size(batch_size)
3504 .qmax(128)
3505 .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3506 }
3507 }
3508#endif // XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -07003509
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003510
3511TEST(QU8_VADD_MINMAX__SCALAR_X1, batch_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003512 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003513 .batch_size(1)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003514 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003515}
3516
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003517TEST(QU8_VADD_MINMAX__SCALAR_X1, batch_gt_1) {
3518 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003519 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003520 .batch_size(batch_size)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003521 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003522 }
3523}
3524
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003525TEST(QU8_VADD_MINMAX__SCALAR_X1, inplace_a) {
3526 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003527 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003528 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003529 .inplace_a(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003530 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003531 }
3532}
3533
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003534TEST(QU8_VADD_MINMAX__SCALAR_X1, inplace_b) {
3535 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003536 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003537 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003538 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003539 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003540 }
3541}
3542
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003543TEST(QU8_VADD_MINMAX__SCALAR_X1, inplace_a_and_b) {
3544 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003545 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003546 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003547 .inplace_a(true)
3548 .inplace_b(true)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003549 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003550 }
3551}
3552
Marat Dukhan87bd5112021-08-02 11:43:53 -07003553TEST(QU8_VADD_MINMAX__SCALAR_X1, a_zero_point) {
3554 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3555 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3556 VAddMicrokernelTester()
3557 .batch_size(batch_size)
3558 .a_zero_point(a_zero_point)
3559 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3560 }
3561 }
3562}
3563
3564TEST(QU8_VADD_MINMAX__SCALAR_X1, b_zero_point) {
3565 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3566 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3567 VAddMicrokernelTester()
3568 .batch_size(batch_size)
3569 .b_zero_point(b_zero_point)
3570 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3571 }
3572 }
3573}
3574
3575TEST(QU8_VADD_MINMAX__SCALAR_X1, y_zero_point) {
3576 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3577 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3578 VAddMicrokernelTester()
3579 .batch_size(batch_size)
3580 .y_zero_point(y_zero_point)
3581 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3582 }
3583 }
3584}
3585
3586TEST(QU8_VADD_MINMAX__SCALAR_X1, a_scale) {
3587 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3588 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3589 VAddMicrokernelTester()
3590 .batch_size(batch_size)
3591 .a_scale(a_scale)
3592 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3593 }
3594 }
3595}
3596
3597TEST(QU8_VADD_MINMAX__SCALAR_X1, b_scale) {
3598 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3599 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3600 VAddMicrokernelTester()
3601 .batch_size(batch_size)
3602 .b_scale(b_scale)
3603 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3604 }
3605 }
3606}
3607
3608TEST(QU8_VADD_MINMAX__SCALAR_X1, y_scale) {
3609 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3610 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3611 VAddMicrokernelTester()
3612 .batch_size(batch_size)
3613 .y_scale(y_scale)
3614 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3615 }
3616 }
3617}
3618
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003619TEST(QU8_VADD_MINMAX__SCALAR_X1, qmin) {
3620 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003621 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003622 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003623 .qmin(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003624 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003625 }
3626}
3627
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003628TEST(QU8_VADD_MINMAX__SCALAR_X1, qmax) {
3629 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07003630 VAddMicrokernelTester()
Marat Dukhand9f3ad42020-08-10 12:30:58 -07003631 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -07003632 .qmax(128)
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003633 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07003634 }
Marat Dukhan76e78c82021-07-20 21:11:23 -07003635}
3636
3637TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_eq_2) {
3638 VAddMicrokernelTester()
3639 .batch_size(2)
3640 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3641}
3642
3643TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_div_2) {
3644 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
3645 VAddMicrokernelTester()
3646 .batch_size(batch_size)
3647 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3648 }
3649}
3650
3651TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_lt_2) {
3652 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
3653 VAddMicrokernelTester()
3654 .batch_size(batch_size)
3655 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3656 }
3657}
3658
3659TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_gt_2) {
3660 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
3661 VAddMicrokernelTester()
3662 .batch_size(batch_size)
3663 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3664 }
3665}
3666
3667TEST(QU8_VADD_MINMAX__SCALAR_X2, inplace_a) {
3668 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3669 VAddMicrokernelTester()
3670 .batch_size(batch_size)
3671 .inplace_a(true)
3672 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3673 }
3674}
3675
3676TEST(QU8_VADD_MINMAX__SCALAR_X2, inplace_b) {
3677 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3678 VAddMicrokernelTester()
3679 .batch_size(batch_size)
3680 .inplace_b(true)
3681 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3682 }
3683}
3684
3685TEST(QU8_VADD_MINMAX__SCALAR_X2, inplace_a_and_b) {
3686 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3687 VAddMicrokernelTester()
3688 .batch_size(batch_size)
3689 .inplace_a(true)
3690 .inplace_b(true)
3691 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3692 }
3693}
3694
Marat Dukhan87bd5112021-08-02 11:43:53 -07003695TEST(QU8_VADD_MINMAX__SCALAR_X2, a_zero_point) {
3696 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3697 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3698 VAddMicrokernelTester()
3699 .batch_size(batch_size)
3700 .a_zero_point(a_zero_point)
3701 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3702 }
3703 }
3704}
3705
3706TEST(QU8_VADD_MINMAX__SCALAR_X2, b_zero_point) {
3707 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3708 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3709 VAddMicrokernelTester()
3710 .batch_size(batch_size)
3711 .b_zero_point(b_zero_point)
3712 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3713 }
3714 }
3715}
3716
3717TEST(QU8_VADD_MINMAX__SCALAR_X2, y_zero_point) {
3718 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3719 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3720 VAddMicrokernelTester()
3721 .batch_size(batch_size)
3722 .y_zero_point(y_zero_point)
3723 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3724 }
3725 }
3726}
3727
3728TEST(QU8_VADD_MINMAX__SCALAR_X2, a_scale) {
3729 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3730 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3731 VAddMicrokernelTester()
3732 .batch_size(batch_size)
3733 .a_scale(a_scale)
3734 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3735 }
3736 }
3737}
3738
3739TEST(QU8_VADD_MINMAX__SCALAR_X2, b_scale) {
3740 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3741 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3742 VAddMicrokernelTester()
3743 .batch_size(batch_size)
3744 .b_scale(b_scale)
3745 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3746 }
3747 }
3748}
3749
3750TEST(QU8_VADD_MINMAX__SCALAR_X2, y_scale) {
3751 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3752 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3753 VAddMicrokernelTester()
3754 .batch_size(batch_size)
3755 .y_scale(y_scale)
3756 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3757 }
3758 }
3759}
3760
Marat Dukhan76e78c82021-07-20 21:11:23 -07003761TEST(QU8_VADD_MINMAX__SCALAR_X2, qmin) {
3762 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3763 VAddMicrokernelTester()
3764 .batch_size(batch_size)
3765 .qmin(128)
3766 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3767 }
3768}
3769
3770TEST(QU8_VADD_MINMAX__SCALAR_X2, qmax) {
3771 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3772 VAddMicrokernelTester()
3773 .batch_size(batch_size)
3774 .qmax(128)
3775 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3776 }
3777}
3778
3779TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_eq_4) {
3780 VAddMicrokernelTester()
3781 .batch_size(4)
3782 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3783}
3784
3785TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_div_4) {
3786 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3787 VAddMicrokernelTester()
3788 .batch_size(batch_size)
3789 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3790 }
3791}
3792
3793TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_lt_4) {
3794 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3795 VAddMicrokernelTester()
3796 .batch_size(batch_size)
3797 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3798 }
3799}
3800
3801TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_gt_4) {
3802 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3803 VAddMicrokernelTester()
3804 .batch_size(batch_size)
3805 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3806 }
3807}
3808
3809TEST(QU8_VADD_MINMAX__SCALAR_X4, inplace_a) {
3810 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3811 VAddMicrokernelTester()
3812 .batch_size(batch_size)
3813 .inplace_a(true)
3814 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3815 }
3816}
3817
3818TEST(QU8_VADD_MINMAX__SCALAR_X4, inplace_b) {
3819 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3820 VAddMicrokernelTester()
3821 .batch_size(batch_size)
3822 .inplace_b(true)
3823 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3824 }
3825}
3826
3827TEST(QU8_VADD_MINMAX__SCALAR_X4, inplace_a_and_b) {
3828 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3829 VAddMicrokernelTester()
3830 .batch_size(batch_size)
3831 .inplace_a(true)
3832 .inplace_b(true)
3833 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3834 }
3835}
3836
Marat Dukhan87bd5112021-08-02 11:43:53 -07003837TEST(QU8_VADD_MINMAX__SCALAR_X4, a_zero_point) {
3838 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3839 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3840 VAddMicrokernelTester()
3841 .batch_size(batch_size)
3842 .a_zero_point(a_zero_point)
3843 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3844 }
3845 }
3846}
3847
3848TEST(QU8_VADD_MINMAX__SCALAR_X4, b_zero_point) {
3849 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3850 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3851 VAddMicrokernelTester()
3852 .batch_size(batch_size)
3853 .b_zero_point(b_zero_point)
3854 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3855 }
3856 }
3857}
3858
3859TEST(QU8_VADD_MINMAX__SCALAR_X4, y_zero_point) {
3860 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3861 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3862 VAddMicrokernelTester()
3863 .batch_size(batch_size)
3864 .y_zero_point(y_zero_point)
3865 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3866 }
3867 }
3868}
3869
3870TEST(QU8_VADD_MINMAX__SCALAR_X4, a_scale) {
3871 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3872 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3873 VAddMicrokernelTester()
3874 .batch_size(batch_size)
3875 .a_scale(a_scale)
3876 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3877 }
3878 }
3879}
3880
3881TEST(QU8_VADD_MINMAX__SCALAR_X4, b_scale) {
3882 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3883 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3884 VAddMicrokernelTester()
3885 .batch_size(batch_size)
3886 .b_scale(b_scale)
3887 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3888 }
3889 }
3890}
3891
3892TEST(QU8_VADD_MINMAX__SCALAR_X4, y_scale) {
3893 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3894 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3895 VAddMicrokernelTester()
3896 .batch_size(batch_size)
3897 .y_scale(y_scale)
3898 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3899 }
3900 }
3901}
3902
Marat Dukhan76e78c82021-07-20 21:11:23 -07003903TEST(QU8_VADD_MINMAX__SCALAR_X4, qmin) {
3904 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3905 VAddMicrokernelTester()
3906 .batch_size(batch_size)
3907 .qmin(128)
3908 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3909 }
3910}
3911
3912TEST(QU8_VADD_MINMAX__SCALAR_X4, qmax) {
3913 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3914 VAddMicrokernelTester()
3915 .batch_size(batch_size)
3916 .qmax(128)
3917 .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3918 }
Marat Dukhan6e0fc392021-07-19 18:38:24 -07003919}