blob: 841e723ba66ae039d3c8ac31335f281fb5020527 [file] [log] [blame]
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/qs8-vadd-minmax.yaml
8// Generator: tools/generate-vbinary-test.py
9
10
11#include <gtest/gtest.h>
12
13#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
16#include <xnnpack/vadd.h>
17#include "vadd-microkernel-tester.h"
18
19
Marat Dukhanba7b2792020-09-02 14:26:45 -070020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
21 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_eq_8) {
22 TEST_REQUIRES_ARM_NEON;
23 VAddMicrokernelTester()
24 .batch_size(8)
25 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
26 }
27
28 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_div_8) {
29 TEST_REQUIRES_ARM_NEON;
30 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
31 VAddMicrokernelTester()
32 .batch_size(batch_size)
33 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
34 }
35 }
36
37 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_lt_8) {
38 TEST_REQUIRES_ARM_NEON;
39 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
40 VAddMicrokernelTester()
41 .batch_size(batch_size)
42 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
43 }
44 }
45
46 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_gt_8) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
49 VAddMicrokernelTester()
50 .batch_size(batch_size)
51 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
52 }
53 }
54
55 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_a) {
56 TEST_REQUIRES_ARM_NEON;
57 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
58 VAddMicrokernelTester()
59 .batch_size(batch_size)
60 .inplace_a(true)
61 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
62 }
63 }
64
65 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_b) {
66 TEST_REQUIRES_ARM_NEON;
67 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
68 VAddMicrokernelTester()
69 .batch_size(batch_size)
70 .inplace_b(true)
71 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
72 }
73 }
74
75 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_a_and_b) {
76 TEST_REQUIRES_ARM_NEON;
77 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
78 VAddMicrokernelTester()
79 .batch_size(batch_size)
80 .inplace_a(true)
81 .inplace_b(true)
82 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
83 }
84 }
85
86 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, a_zero_point) {
87 TEST_REQUIRES_ARM_NEON;
88 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
89 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
90 VAddMicrokernelTester()
91 .batch_size(batch_size)
92 .a_zero_point(a_zero_point)
93 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
94 }
95 }
96 }
97
98 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, b_zero_point) {
99 TEST_REQUIRES_ARM_NEON;
100 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
101 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
102 VAddMicrokernelTester()
103 .batch_size(batch_size)
104 .b_zero_point(b_zero_point)
105 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
106 }
107 }
108 }
109
110 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, y_zero_point) {
111 TEST_REQUIRES_ARM_NEON;
112 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
113 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
114 VAddMicrokernelTester()
115 .batch_size(batch_size)
116 .y_zero_point(y_zero_point)
117 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
118 }
119 }
120 }
121
122 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, a_scale) {
123 TEST_REQUIRES_ARM_NEON;
124 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
125 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
126 VAddMicrokernelTester()
127 .batch_size(batch_size)
128 .a_scale(a_scale)
129 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
130 }
131 }
132 }
133
134 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, b_scale) {
135 TEST_REQUIRES_ARM_NEON;
136 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
137 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
138 VAddMicrokernelTester()
139 .batch_size(batch_size)
140 .b_scale(b_scale)
141 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
142 }
143 }
144 }
145
146 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, y_scale) {
147 TEST_REQUIRES_ARM_NEON;
148 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
149 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
150 VAddMicrokernelTester()
151 .batch_size(batch_size)
152 .y_scale(y_scale)
153 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
154 }
155 }
156 }
157
158 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, qmin) {
159 TEST_REQUIRES_ARM_NEON;
160 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
161 VAddMicrokernelTester()
162 .batch_size(batch_size)
163 .qmin(128)
164 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
165 }
166 }
167
168 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, qmax) {
169 TEST_REQUIRES_ARM_NEON;
170 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
171 VAddMicrokernelTester()
172 .batch_size(batch_size)
173 .qmax(128)
174 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8);
175 }
176 }
177#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
178
179
180#if XNN_ARCH_ARM || XNN_ARCH_ARM64
181 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_eq_16) {
182 TEST_REQUIRES_ARM_NEON;
183 VAddMicrokernelTester()
184 .batch_size(16)
185 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
186 }
187
188 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_div_16) {
189 TEST_REQUIRES_ARM_NEON;
190 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
191 VAddMicrokernelTester()
192 .batch_size(batch_size)
193 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
194 }
195 }
196
197 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_lt_16) {
198 TEST_REQUIRES_ARM_NEON;
199 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
200 VAddMicrokernelTester()
201 .batch_size(batch_size)
202 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
203 }
204 }
205
206 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_gt_16) {
207 TEST_REQUIRES_ARM_NEON;
208 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
209 VAddMicrokernelTester()
210 .batch_size(batch_size)
211 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
212 }
213 }
214
215 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_a) {
216 TEST_REQUIRES_ARM_NEON;
217 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
218 VAddMicrokernelTester()
219 .batch_size(batch_size)
220 .inplace_a(true)
221 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
222 }
223 }
224
225 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_b) {
226 TEST_REQUIRES_ARM_NEON;
227 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
228 VAddMicrokernelTester()
229 .batch_size(batch_size)
230 .inplace_b(true)
231 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
232 }
233 }
234
235 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_a_and_b) {
236 TEST_REQUIRES_ARM_NEON;
237 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
238 VAddMicrokernelTester()
239 .batch_size(batch_size)
240 .inplace_a(true)
241 .inplace_b(true)
242 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
243 }
244 }
245
246 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, a_zero_point) {
247 TEST_REQUIRES_ARM_NEON;
248 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
249 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
250 VAddMicrokernelTester()
251 .batch_size(batch_size)
252 .a_zero_point(a_zero_point)
253 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
254 }
255 }
256 }
257
258 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, b_zero_point) {
259 TEST_REQUIRES_ARM_NEON;
260 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
261 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
262 VAddMicrokernelTester()
263 .batch_size(batch_size)
264 .b_zero_point(b_zero_point)
265 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
266 }
267 }
268 }
269
270 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, y_zero_point) {
271 TEST_REQUIRES_ARM_NEON;
272 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
273 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
274 VAddMicrokernelTester()
275 .batch_size(batch_size)
276 .y_zero_point(y_zero_point)
277 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
278 }
279 }
280 }
281
282 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, a_scale) {
283 TEST_REQUIRES_ARM_NEON;
284 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
285 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
286 VAddMicrokernelTester()
287 .batch_size(batch_size)
288 .a_scale(a_scale)
289 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
290 }
291 }
292 }
293
294 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, b_scale) {
295 TEST_REQUIRES_ARM_NEON;
296 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
297 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
298 VAddMicrokernelTester()
299 .batch_size(batch_size)
300 .b_scale(b_scale)
301 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
302 }
303 }
304 }
305
306 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, y_scale) {
307 TEST_REQUIRES_ARM_NEON;
308 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
309 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
310 VAddMicrokernelTester()
311 .batch_size(batch_size)
312 .y_scale(y_scale)
313 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
314 }
315 }
316 }
317
318 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, qmin) {
319 TEST_REQUIRES_ARM_NEON;
320 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
321 VAddMicrokernelTester()
322 .batch_size(batch_size)
323 .qmin(128)
324 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
325 }
326 }
327
328 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, qmax) {
329 TEST_REQUIRES_ARM_NEON;
330 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
331 VAddMicrokernelTester()
332 .batch_size(batch_size)
333 .qmax(128)
334 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16);
335 }
336 }
337#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
338
339
340#if XNN_ARCH_ARM || XNN_ARCH_ARM64
341 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_eq_24) {
342 TEST_REQUIRES_ARM_NEON;
343 VAddMicrokernelTester()
344 .batch_size(24)
345 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
346 }
347
348 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_div_24) {
349 TEST_REQUIRES_ARM_NEON;
350 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
351 VAddMicrokernelTester()
352 .batch_size(batch_size)
353 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
354 }
355 }
356
357 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_lt_24) {
358 TEST_REQUIRES_ARM_NEON;
359 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
360 VAddMicrokernelTester()
361 .batch_size(batch_size)
362 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
363 }
364 }
365
366 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_gt_24) {
367 TEST_REQUIRES_ARM_NEON;
368 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
369 VAddMicrokernelTester()
370 .batch_size(batch_size)
371 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
372 }
373 }
374
375 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_a) {
376 TEST_REQUIRES_ARM_NEON;
377 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
378 VAddMicrokernelTester()
379 .batch_size(batch_size)
380 .inplace_a(true)
381 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
382 }
383 }
384
385 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_b) {
386 TEST_REQUIRES_ARM_NEON;
387 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
388 VAddMicrokernelTester()
389 .batch_size(batch_size)
390 .inplace_b(true)
391 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
392 }
393 }
394
395 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_a_and_b) {
396 TEST_REQUIRES_ARM_NEON;
397 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
398 VAddMicrokernelTester()
399 .batch_size(batch_size)
400 .inplace_a(true)
401 .inplace_b(true)
402 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
403 }
404 }
405
406 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, a_zero_point) {
407 TEST_REQUIRES_ARM_NEON;
408 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
409 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
410 VAddMicrokernelTester()
411 .batch_size(batch_size)
412 .a_zero_point(a_zero_point)
413 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
414 }
415 }
416 }
417
418 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, b_zero_point) {
419 TEST_REQUIRES_ARM_NEON;
420 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
421 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
422 VAddMicrokernelTester()
423 .batch_size(batch_size)
424 .b_zero_point(b_zero_point)
425 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
426 }
427 }
428 }
429
430 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, y_zero_point) {
431 TEST_REQUIRES_ARM_NEON;
432 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
433 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
434 VAddMicrokernelTester()
435 .batch_size(batch_size)
436 .y_zero_point(y_zero_point)
437 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
438 }
439 }
440 }
441
442 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, a_scale) {
443 TEST_REQUIRES_ARM_NEON;
444 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
445 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
446 VAddMicrokernelTester()
447 .batch_size(batch_size)
448 .a_scale(a_scale)
449 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
450 }
451 }
452 }
453
454 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, b_scale) {
455 TEST_REQUIRES_ARM_NEON;
456 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
457 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
458 VAddMicrokernelTester()
459 .batch_size(batch_size)
460 .b_scale(b_scale)
461 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
462 }
463 }
464 }
465
466 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, y_scale) {
467 TEST_REQUIRES_ARM_NEON;
468 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
469 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
470 VAddMicrokernelTester()
471 .batch_size(batch_size)
472 .y_scale(y_scale)
473 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
474 }
475 }
476 }
477
478 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, qmin) {
479 TEST_REQUIRES_ARM_NEON;
480 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
481 VAddMicrokernelTester()
482 .batch_size(batch_size)
483 .qmin(128)
484 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
485 }
486 }
487
488 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, qmax) {
489 TEST_REQUIRES_ARM_NEON;
490 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
491 VAddMicrokernelTester()
492 .batch_size(batch_size)
493 .qmax(128)
494 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24);
495 }
496 }
497#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
498
499
500#if XNN_ARCH_ARM || XNN_ARCH_ARM64
501 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_eq_32) {
502 TEST_REQUIRES_ARM_NEON;
503 VAddMicrokernelTester()
504 .batch_size(32)
505 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
506 }
507
508 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_div_32) {
509 TEST_REQUIRES_ARM_NEON;
510 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
511 VAddMicrokernelTester()
512 .batch_size(batch_size)
513 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
514 }
515 }
516
517 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_lt_32) {
518 TEST_REQUIRES_ARM_NEON;
519 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
520 VAddMicrokernelTester()
521 .batch_size(batch_size)
522 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
523 }
524 }
525
526 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_gt_32) {
527 TEST_REQUIRES_ARM_NEON;
528 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
529 VAddMicrokernelTester()
530 .batch_size(batch_size)
531 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
532 }
533 }
534
535 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_a) {
536 TEST_REQUIRES_ARM_NEON;
537 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
538 VAddMicrokernelTester()
539 .batch_size(batch_size)
540 .inplace_a(true)
541 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
542 }
543 }
544
545 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_b) {
546 TEST_REQUIRES_ARM_NEON;
547 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
548 VAddMicrokernelTester()
549 .batch_size(batch_size)
550 .inplace_b(true)
551 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
552 }
553 }
554
555 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_a_and_b) {
556 TEST_REQUIRES_ARM_NEON;
557 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
558 VAddMicrokernelTester()
559 .batch_size(batch_size)
560 .inplace_a(true)
561 .inplace_b(true)
562 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
563 }
564 }
565
566 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, a_zero_point) {
567 TEST_REQUIRES_ARM_NEON;
568 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
569 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
570 VAddMicrokernelTester()
571 .batch_size(batch_size)
572 .a_zero_point(a_zero_point)
573 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
574 }
575 }
576 }
577
578 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, b_zero_point) {
579 TEST_REQUIRES_ARM_NEON;
580 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
581 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
582 VAddMicrokernelTester()
583 .batch_size(batch_size)
584 .b_zero_point(b_zero_point)
585 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
586 }
587 }
588 }
589
590 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, y_zero_point) {
591 TEST_REQUIRES_ARM_NEON;
592 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
593 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
594 VAddMicrokernelTester()
595 .batch_size(batch_size)
596 .y_zero_point(y_zero_point)
597 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
598 }
599 }
600 }
601
602 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, a_scale) {
603 TEST_REQUIRES_ARM_NEON;
604 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
605 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
606 VAddMicrokernelTester()
607 .batch_size(batch_size)
608 .a_scale(a_scale)
609 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
610 }
611 }
612 }
613
614 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, b_scale) {
615 TEST_REQUIRES_ARM_NEON;
616 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
617 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
618 VAddMicrokernelTester()
619 .batch_size(batch_size)
620 .b_scale(b_scale)
621 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
622 }
623 }
624 }
625
626 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, y_scale) {
627 TEST_REQUIRES_ARM_NEON;
628 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
629 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
630 VAddMicrokernelTester()
631 .batch_size(batch_size)
632 .y_scale(y_scale)
633 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
634 }
635 }
636 }
637
638 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, qmin) {
639 TEST_REQUIRES_ARM_NEON;
640 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
641 VAddMicrokernelTester()
642 .batch_size(batch_size)
643 .qmin(128)
644 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
645 }
646 }
647
648 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, qmax) {
649 TEST_REQUIRES_ARM_NEON;
650 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
651 VAddMicrokernelTester()
652 .batch_size(batch_size)
653 .qmax(128)
654 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32);
655 }
656 }
657#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
658
659
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700660#if XNN_ARCH_X86 || XNN_ARCH_X86_64
661 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
662 TEST_REQUIRES_X86_SSE2;
663 VAddMicrokernelTester()
664 .batch_size(8)
665 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
666 }
667
668 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
669 TEST_REQUIRES_X86_SSE2;
670 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
671 VAddMicrokernelTester()
672 .batch_size(batch_size)
673 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
674 }
675 }
676
677 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
678 TEST_REQUIRES_X86_SSE2;
679 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
680 VAddMicrokernelTester()
681 .batch_size(batch_size)
682 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
683 }
684 }
685
686 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
687 TEST_REQUIRES_X86_SSE2;
688 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
689 VAddMicrokernelTester()
690 .batch_size(batch_size)
691 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
692 }
693 }
694
695 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a) {
696 TEST_REQUIRES_X86_SSE2;
697 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
698 VAddMicrokernelTester()
699 .batch_size(batch_size)
700 .inplace_a(true)
701 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
702 }
703 }
704
705 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_b) {
706 TEST_REQUIRES_X86_SSE2;
707 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
708 VAddMicrokernelTester()
709 .batch_size(batch_size)
710 .inplace_b(true)
711 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
712 }
713 }
714
715 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
716 TEST_REQUIRES_X86_SSE2;
717 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
718 VAddMicrokernelTester()
719 .batch_size(batch_size)
720 .inplace_a(true)
721 .inplace_b(true)
722 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
723 }
724 }
725
Marat Dukhan95caee52020-09-02 03:41:32 -0700726 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
727 TEST_REQUIRES_X86_SSE2;
728 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
729 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
730 VAddMicrokernelTester()
731 .batch_size(batch_size)
732 .a_zero_point(a_zero_point)
733 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
734 }
735 }
736 }
737
738 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
739 TEST_REQUIRES_X86_SSE2;
740 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
741 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
742 VAddMicrokernelTester()
743 .batch_size(batch_size)
744 .b_zero_point(b_zero_point)
745 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
746 }
747 }
748 }
749
750 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
751 TEST_REQUIRES_X86_SSE2;
752 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
753 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
754 VAddMicrokernelTester()
755 .batch_size(batch_size)
756 .y_zero_point(y_zero_point)
757 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
758 }
759 }
760 }
761
762 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
763 TEST_REQUIRES_X86_SSE2;
764 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
765 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
766 VAddMicrokernelTester()
767 .batch_size(batch_size)
768 .a_scale(a_scale)
769 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
770 }
771 }
772 }
773
774 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
775 TEST_REQUIRES_X86_SSE2;
776 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
777 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
778 VAddMicrokernelTester()
779 .batch_size(batch_size)
780 .b_scale(b_scale)
781 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
782 }
783 }
784 }
785
786 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
787 TEST_REQUIRES_X86_SSE2;
788 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
789 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
790 VAddMicrokernelTester()
791 .batch_size(batch_size)
792 .y_scale(y_scale)
793 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
794 }
795 }
796 }
797
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700798 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
799 TEST_REQUIRES_X86_SSE2;
800 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
801 VAddMicrokernelTester()
802 .batch_size(batch_size)
803 .qmin(128)
804 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
805 }
806 }
807
808 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
809 TEST_REQUIRES_X86_SSE2;
810 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
811 VAddMicrokernelTester()
812 .batch_size(batch_size)
813 .qmax(128)
814 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
815 }
816 }
817#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
818
819
820#if XNN_ARCH_X86 || XNN_ARCH_X86_64
821 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
822 TEST_REQUIRES_X86_SSE2;
823 VAddMicrokernelTester()
824 .batch_size(16)
825 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
826 }
827
828 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
829 TEST_REQUIRES_X86_SSE2;
830 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
831 VAddMicrokernelTester()
832 .batch_size(batch_size)
833 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
834 }
835 }
836
837 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
838 TEST_REQUIRES_X86_SSE2;
839 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
840 VAddMicrokernelTester()
841 .batch_size(batch_size)
842 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
843 }
844 }
845
846 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
847 TEST_REQUIRES_X86_SSE2;
848 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
849 VAddMicrokernelTester()
850 .batch_size(batch_size)
851 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
852 }
853 }
854
855 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a) {
856 TEST_REQUIRES_X86_SSE2;
857 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
858 VAddMicrokernelTester()
859 .batch_size(batch_size)
860 .inplace_a(true)
861 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
862 }
863 }
864
865 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_b) {
866 TEST_REQUIRES_X86_SSE2;
867 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
868 VAddMicrokernelTester()
869 .batch_size(batch_size)
870 .inplace_b(true)
871 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
872 }
873 }
874
875 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
876 TEST_REQUIRES_X86_SSE2;
877 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
878 VAddMicrokernelTester()
879 .batch_size(batch_size)
880 .inplace_a(true)
881 .inplace_b(true)
882 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
883 }
884 }
885
Marat Dukhan95caee52020-09-02 03:41:32 -0700886 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
887 TEST_REQUIRES_X86_SSE2;
888 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
889 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
890 VAddMicrokernelTester()
891 .batch_size(batch_size)
892 .a_zero_point(a_zero_point)
893 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
894 }
895 }
896 }
897
898 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
899 TEST_REQUIRES_X86_SSE2;
900 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
901 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
902 VAddMicrokernelTester()
903 .batch_size(batch_size)
904 .b_zero_point(b_zero_point)
905 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
906 }
907 }
908 }
909
910 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
911 TEST_REQUIRES_X86_SSE2;
912 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
913 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
914 VAddMicrokernelTester()
915 .batch_size(batch_size)
916 .y_zero_point(y_zero_point)
917 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
918 }
919 }
920 }
921
922 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
923 TEST_REQUIRES_X86_SSE2;
924 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
925 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
926 VAddMicrokernelTester()
927 .batch_size(batch_size)
928 .a_scale(a_scale)
929 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
930 }
931 }
932 }
933
934 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
935 TEST_REQUIRES_X86_SSE2;
936 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
937 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
938 VAddMicrokernelTester()
939 .batch_size(batch_size)
940 .b_scale(b_scale)
941 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
942 }
943 }
944 }
945
946 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
947 TEST_REQUIRES_X86_SSE2;
948 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
949 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
950 VAddMicrokernelTester()
951 .batch_size(batch_size)
952 .y_scale(y_scale)
953 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
954 }
955 }
956 }
957
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700958 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
959 TEST_REQUIRES_X86_SSE2;
960 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
961 VAddMicrokernelTester()
962 .batch_size(batch_size)
963 .qmin(128)
964 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
965 }
966 }
967
968 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
969 TEST_REQUIRES_X86_SSE2;
970 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
971 VAddMicrokernelTester()
972 .batch_size(batch_size)
973 .qmax(128)
974 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
975 }
976 }
977#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
978
979
980#if XNN_ARCH_X86 || XNN_ARCH_X86_64
981 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_eq_24) {
982 TEST_REQUIRES_X86_SSE2;
983 VAddMicrokernelTester()
984 .batch_size(24)
985 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
986 }
987
988 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_div_24) {
989 TEST_REQUIRES_X86_SSE2;
990 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
991 VAddMicrokernelTester()
992 .batch_size(batch_size)
993 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
994 }
995 }
996
997 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_lt_24) {
998 TEST_REQUIRES_X86_SSE2;
999 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1000 VAddMicrokernelTester()
1001 .batch_size(batch_size)
1002 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1003 }
1004 }
1005
1006 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_gt_24) {
1007 TEST_REQUIRES_X86_SSE2;
1008 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1009 VAddMicrokernelTester()
1010 .batch_size(batch_size)
1011 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1012 }
1013 }
1014
1015 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a) {
1016 TEST_REQUIRES_X86_SSE2;
1017 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1018 VAddMicrokernelTester()
1019 .batch_size(batch_size)
1020 .inplace_a(true)
1021 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1022 }
1023 }
1024
1025 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_b) {
1026 TEST_REQUIRES_X86_SSE2;
1027 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1028 VAddMicrokernelTester()
1029 .batch_size(batch_size)
1030 .inplace_b(true)
1031 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1032 }
1033 }
1034
1035 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a_and_b) {
1036 TEST_REQUIRES_X86_SSE2;
1037 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1038 VAddMicrokernelTester()
1039 .batch_size(batch_size)
1040 .inplace_a(true)
1041 .inplace_b(true)
1042 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1043 }
1044 }
1045
Marat Dukhan95caee52020-09-02 03:41:32 -07001046 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_zero_point) {
1047 TEST_REQUIRES_X86_SSE2;
1048 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1049 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1050 VAddMicrokernelTester()
1051 .batch_size(batch_size)
1052 .a_zero_point(a_zero_point)
1053 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1054 }
1055 }
1056 }
1057
1058 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_zero_point) {
1059 TEST_REQUIRES_X86_SSE2;
1060 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1061 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1062 VAddMicrokernelTester()
1063 .batch_size(batch_size)
1064 .b_zero_point(b_zero_point)
1065 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1066 }
1067 }
1068 }
1069
1070 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_zero_point) {
1071 TEST_REQUIRES_X86_SSE2;
1072 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1073 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1074 VAddMicrokernelTester()
1075 .batch_size(batch_size)
1076 .y_zero_point(y_zero_point)
1077 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1078 }
1079 }
1080 }
1081
1082 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_scale) {
1083 TEST_REQUIRES_X86_SSE2;
1084 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1085 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1086 VAddMicrokernelTester()
1087 .batch_size(batch_size)
1088 .a_scale(a_scale)
1089 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1090 }
1091 }
1092 }
1093
1094 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_scale) {
1095 TEST_REQUIRES_X86_SSE2;
1096 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1097 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1098 VAddMicrokernelTester()
1099 .batch_size(batch_size)
1100 .b_scale(b_scale)
1101 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1102 }
1103 }
1104 }
1105
1106 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_scale) {
1107 TEST_REQUIRES_X86_SSE2;
1108 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1109 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1110 VAddMicrokernelTester()
1111 .batch_size(batch_size)
1112 .y_scale(y_scale)
1113 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1114 }
1115 }
1116 }
1117
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001118 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmin) {
1119 TEST_REQUIRES_X86_SSE2;
1120 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1121 VAddMicrokernelTester()
1122 .batch_size(batch_size)
1123 .qmin(128)
1124 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1125 }
1126 }
1127
1128 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmax) {
1129 TEST_REQUIRES_X86_SSE2;
1130 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1131 VAddMicrokernelTester()
1132 .batch_size(batch_size)
1133 .qmax(128)
1134 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
1135 }
1136 }
1137#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1138
1139
1140#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1141 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_eq_32) {
1142 TEST_REQUIRES_X86_SSE2;
1143 VAddMicrokernelTester()
1144 .batch_size(32)
1145 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1146 }
1147
1148 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_div_32) {
1149 TEST_REQUIRES_X86_SSE2;
1150 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1151 VAddMicrokernelTester()
1152 .batch_size(batch_size)
1153 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1154 }
1155 }
1156
1157 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_lt_32) {
1158 TEST_REQUIRES_X86_SSE2;
1159 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1160 VAddMicrokernelTester()
1161 .batch_size(batch_size)
1162 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1163 }
1164 }
1165
1166 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_gt_32) {
1167 TEST_REQUIRES_X86_SSE2;
1168 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1169 VAddMicrokernelTester()
1170 .batch_size(batch_size)
1171 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1172 }
1173 }
1174
1175 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a) {
1176 TEST_REQUIRES_X86_SSE2;
1177 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1178 VAddMicrokernelTester()
1179 .batch_size(batch_size)
1180 .inplace_a(true)
1181 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1182 }
1183 }
1184
1185 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_b) {
1186 TEST_REQUIRES_X86_SSE2;
1187 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1188 VAddMicrokernelTester()
1189 .batch_size(batch_size)
1190 .inplace_b(true)
1191 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1192 }
1193 }
1194
1195 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a_and_b) {
1196 TEST_REQUIRES_X86_SSE2;
1197 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1198 VAddMicrokernelTester()
1199 .batch_size(batch_size)
1200 .inplace_a(true)
1201 .inplace_b(true)
1202 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1203 }
1204 }
1205
Marat Dukhan95caee52020-09-02 03:41:32 -07001206 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_zero_point) {
1207 TEST_REQUIRES_X86_SSE2;
1208 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1209 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1210 VAddMicrokernelTester()
1211 .batch_size(batch_size)
1212 .a_zero_point(a_zero_point)
1213 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1214 }
1215 }
1216 }
1217
1218 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_zero_point) {
1219 TEST_REQUIRES_X86_SSE2;
1220 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1221 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1222 VAddMicrokernelTester()
1223 .batch_size(batch_size)
1224 .b_zero_point(b_zero_point)
1225 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1226 }
1227 }
1228 }
1229
1230 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_zero_point) {
1231 TEST_REQUIRES_X86_SSE2;
1232 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1233 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1234 VAddMicrokernelTester()
1235 .batch_size(batch_size)
1236 .y_zero_point(y_zero_point)
1237 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1238 }
1239 }
1240 }
1241
1242 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_scale) {
1243 TEST_REQUIRES_X86_SSE2;
1244 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1245 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1246 VAddMicrokernelTester()
1247 .batch_size(batch_size)
1248 .a_scale(a_scale)
1249 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1250 }
1251 }
1252 }
1253
1254 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_scale) {
1255 TEST_REQUIRES_X86_SSE2;
1256 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1257 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1258 VAddMicrokernelTester()
1259 .batch_size(batch_size)
1260 .b_scale(b_scale)
1261 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1262 }
1263 }
1264 }
1265
1266 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_scale) {
1267 TEST_REQUIRES_X86_SSE2;
1268 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1269 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1270 VAddMicrokernelTester()
1271 .batch_size(batch_size)
1272 .y_scale(y_scale)
1273 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1274 }
1275 }
1276 }
1277
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001278 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmin) {
1279 TEST_REQUIRES_X86_SSE2;
1280 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1281 VAddMicrokernelTester()
1282 .batch_size(batch_size)
1283 .qmin(128)
1284 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1285 }
1286 }
1287
1288 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmax) {
1289 TEST_REQUIRES_X86_SSE2;
1290 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1291 VAddMicrokernelTester()
1292 .batch_size(batch_size)
1293 .qmax(128)
1294 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
1295 }
1296 }
1297#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1298
1299
1300#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1301 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
1302 TEST_REQUIRES_X86_SSE41;
1303 VAddMicrokernelTester()
1304 .batch_size(8)
1305 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1306 }
1307
1308 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
1309 TEST_REQUIRES_X86_SSE41;
1310 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1311 VAddMicrokernelTester()
1312 .batch_size(batch_size)
1313 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1314 }
1315 }
1316
1317 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
1318 TEST_REQUIRES_X86_SSE41;
1319 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1320 VAddMicrokernelTester()
1321 .batch_size(batch_size)
1322 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1323 }
1324 }
1325
1326 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1327 TEST_REQUIRES_X86_SSE41;
1328 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1329 VAddMicrokernelTester()
1330 .batch_size(batch_size)
1331 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1332 }
1333 }
1334
1335 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a) {
1336 TEST_REQUIRES_X86_SSE41;
1337 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1338 VAddMicrokernelTester()
1339 .batch_size(batch_size)
1340 .inplace_a(true)
1341 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1342 }
1343 }
1344
1345 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_b) {
1346 TEST_REQUIRES_X86_SSE41;
1347 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1348 VAddMicrokernelTester()
1349 .batch_size(batch_size)
1350 .inplace_b(true)
1351 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1352 }
1353 }
1354
1355 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1356 TEST_REQUIRES_X86_SSE41;
1357 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1358 VAddMicrokernelTester()
1359 .batch_size(batch_size)
1360 .inplace_a(true)
1361 .inplace_b(true)
1362 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1363 }
1364 }
1365
Marat Dukhan95caee52020-09-02 03:41:32 -07001366 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1367 TEST_REQUIRES_X86_SSE41;
1368 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1369 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1370 VAddMicrokernelTester()
1371 .batch_size(batch_size)
1372 .a_zero_point(a_zero_point)
1373 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1374 }
1375 }
1376 }
1377
1378 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1379 TEST_REQUIRES_X86_SSE41;
1380 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1381 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1382 VAddMicrokernelTester()
1383 .batch_size(batch_size)
1384 .b_zero_point(b_zero_point)
1385 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1386 }
1387 }
1388 }
1389
1390 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1391 TEST_REQUIRES_X86_SSE41;
1392 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1393 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1394 VAddMicrokernelTester()
1395 .batch_size(batch_size)
1396 .y_zero_point(y_zero_point)
1397 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1398 }
1399 }
1400 }
1401
1402 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1403 TEST_REQUIRES_X86_SSE41;
1404 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1405 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1406 VAddMicrokernelTester()
1407 .batch_size(batch_size)
1408 .a_scale(a_scale)
1409 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1410 }
1411 }
1412 }
1413
1414 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1415 TEST_REQUIRES_X86_SSE41;
1416 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1417 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1418 VAddMicrokernelTester()
1419 .batch_size(batch_size)
1420 .b_scale(b_scale)
1421 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1422 }
1423 }
1424 }
1425
1426 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1427 TEST_REQUIRES_X86_SSE41;
1428 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1429 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1430 VAddMicrokernelTester()
1431 .batch_size(batch_size)
1432 .y_scale(y_scale)
1433 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1434 }
1435 }
1436 }
1437
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001438 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1439 TEST_REQUIRES_X86_SSE41;
1440 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1441 VAddMicrokernelTester()
1442 .batch_size(batch_size)
1443 .qmin(128)
1444 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1445 }
1446 }
1447
1448 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1449 TEST_REQUIRES_X86_SSE41;
1450 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1451 VAddMicrokernelTester()
1452 .batch_size(batch_size)
1453 .qmax(128)
1454 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
1455 }
1456 }
1457#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1458
1459
1460#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1461 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1462 TEST_REQUIRES_X86_SSE41;
1463 VAddMicrokernelTester()
1464 .batch_size(16)
1465 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1466 }
1467
1468 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1469 TEST_REQUIRES_X86_SSE41;
1470 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1471 VAddMicrokernelTester()
1472 .batch_size(batch_size)
1473 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1474 }
1475 }
1476
1477 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1478 TEST_REQUIRES_X86_SSE41;
1479 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1480 VAddMicrokernelTester()
1481 .batch_size(batch_size)
1482 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1483 }
1484 }
1485
1486 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1487 TEST_REQUIRES_X86_SSE41;
1488 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1489 VAddMicrokernelTester()
1490 .batch_size(batch_size)
1491 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1492 }
1493 }
1494
1495 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a) {
1496 TEST_REQUIRES_X86_SSE41;
1497 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1498 VAddMicrokernelTester()
1499 .batch_size(batch_size)
1500 .inplace_a(true)
1501 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1502 }
1503 }
1504
1505 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_b) {
1506 TEST_REQUIRES_X86_SSE41;
1507 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1508 VAddMicrokernelTester()
1509 .batch_size(batch_size)
1510 .inplace_b(true)
1511 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1512 }
1513 }
1514
1515 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1516 TEST_REQUIRES_X86_SSE41;
1517 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1518 VAddMicrokernelTester()
1519 .batch_size(batch_size)
1520 .inplace_a(true)
1521 .inplace_b(true)
1522 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1523 }
1524 }
1525
Marat Dukhan95caee52020-09-02 03:41:32 -07001526 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1527 TEST_REQUIRES_X86_SSE41;
1528 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1529 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1530 VAddMicrokernelTester()
1531 .batch_size(batch_size)
1532 .a_zero_point(a_zero_point)
1533 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1534 }
1535 }
1536 }
1537
1538 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1539 TEST_REQUIRES_X86_SSE41;
1540 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1541 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1542 VAddMicrokernelTester()
1543 .batch_size(batch_size)
1544 .b_zero_point(b_zero_point)
1545 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1546 }
1547 }
1548 }
1549
1550 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1551 TEST_REQUIRES_X86_SSE41;
1552 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1553 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1554 VAddMicrokernelTester()
1555 .batch_size(batch_size)
1556 .y_zero_point(y_zero_point)
1557 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1558 }
1559 }
1560 }
1561
1562 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1563 TEST_REQUIRES_X86_SSE41;
1564 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1565 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1566 VAddMicrokernelTester()
1567 .batch_size(batch_size)
1568 .a_scale(a_scale)
1569 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1570 }
1571 }
1572 }
1573
1574 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1575 TEST_REQUIRES_X86_SSE41;
1576 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1577 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1578 VAddMicrokernelTester()
1579 .batch_size(batch_size)
1580 .b_scale(b_scale)
1581 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1582 }
1583 }
1584 }
1585
1586 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1587 TEST_REQUIRES_X86_SSE41;
1588 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1589 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1590 VAddMicrokernelTester()
1591 .batch_size(batch_size)
1592 .y_scale(y_scale)
1593 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1594 }
1595 }
1596 }
1597
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001598 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1599 TEST_REQUIRES_X86_SSE41;
1600 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1601 VAddMicrokernelTester()
1602 .batch_size(batch_size)
1603 .qmin(128)
1604 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1605 }
1606 }
1607
1608 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1609 TEST_REQUIRES_X86_SSE41;
1610 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1611 VAddMicrokernelTester()
1612 .batch_size(batch_size)
1613 .qmax(128)
1614 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
1615 }
1616 }
1617#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1618
1619
1620#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1621 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_eq_24) {
1622 TEST_REQUIRES_X86_SSE41;
1623 VAddMicrokernelTester()
1624 .batch_size(24)
1625 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1626 }
1627
1628 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_div_24) {
1629 TEST_REQUIRES_X86_SSE41;
1630 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1631 VAddMicrokernelTester()
1632 .batch_size(batch_size)
1633 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1634 }
1635 }
1636
1637 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_lt_24) {
1638 TEST_REQUIRES_X86_SSE41;
1639 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1640 VAddMicrokernelTester()
1641 .batch_size(batch_size)
1642 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1643 }
1644 }
1645
1646 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_gt_24) {
1647 TEST_REQUIRES_X86_SSE41;
1648 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1649 VAddMicrokernelTester()
1650 .batch_size(batch_size)
1651 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1652 }
1653 }
1654
1655 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a) {
1656 TEST_REQUIRES_X86_SSE41;
1657 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1658 VAddMicrokernelTester()
1659 .batch_size(batch_size)
1660 .inplace_a(true)
1661 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1662 }
1663 }
1664
1665 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_b) {
1666 TEST_REQUIRES_X86_SSE41;
1667 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1668 VAddMicrokernelTester()
1669 .batch_size(batch_size)
1670 .inplace_b(true)
1671 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1672 }
1673 }
1674
1675 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a_and_b) {
1676 TEST_REQUIRES_X86_SSE41;
1677 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1678 VAddMicrokernelTester()
1679 .batch_size(batch_size)
1680 .inplace_a(true)
1681 .inplace_b(true)
1682 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1683 }
1684 }
1685
Marat Dukhan95caee52020-09-02 03:41:32 -07001686 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_zero_point) {
1687 TEST_REQUIRES_X86_SSE41;
1688 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1689 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1690 VAddMicrokernelTester()
1691 .batch_size(batch_size)
1692 .a_zero_point(a_zero_point)
1693 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1694 }
1695 }
1696 }
1697
1698 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_zero_point) {
1699 TEST_REQUIRES_X86_SSE41;
1700 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1701 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1702 VAddMicrokernelTester()
1703 .batch_size(batch_size)
1704 .b_zero_point(b_zero_point)
1705 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1706 }
1707 }
1708 }
1709
1710 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_zero_point) {
1711 TEST_REQUIRES_X86_SSE41;
1712 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1713 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1714 VAddMicrokernelTester()
1715 .batch_size(batch_size)
1716 .y_zero_point(y_zero_point)
1717 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1718 }
1719 }
1720 }
1721
1722 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_scale) {
1723 TEST_REQUIRES_X86_SSE41;
1724 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1725 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1726 VAddMicrokernelTester()
1727 .batch_size(batch_size)
1728 .a_scale(a_scale)
1729 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1730 }
1731 }
1732 }
1733
1734 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_scale) {
1735 TEST_REQUIRES_X86_SSE41;
1736 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1737 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1738 VAddMicrokernelTester()
1739 .batch_size(batch_size)
1740 .b_scale(b_scale)
1741 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1742 }
1743 }
1744 }
1745
1746 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_scale) {
1747 TEST_REQUIRES_X86_SSE41;
1748 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1749 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1750 VAddMicrokernelTester()
1751 .batch_size(batch_size)
1752 .y_scale(y_scale)
1753 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1754 }
1755 }
1756 }
1757
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001758 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmin) {
1759 TEST_REQUIRES_X86_SSE41;
1760 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1761 VAddMicrokernelTester()
1762 .batch_size(batch_size)
1763 .qmin(128)
1764 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1765 }
1766 }
1767
1768 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmax) {
1769 TEST_REQUIRES_X86_SSE41;
1770 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1771 VAddMicrokernelTester()
1772 .batch_size(batch_size)
1773 .qmax(128)
1774 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1775 }
1776 }
1777#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1778
1779
1780#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1781 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_eq_32) {
1782 TEST_REQUIRES_X86_SSE41;
1783 VAddMicrokernelTester()
1784 .batch_size(32)
1785 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1786 }
1787
1788 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_div_32) {
1789 TEST_REQUIRES_X86_SSE41;
1790 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1791 VAddMicrokernelTester()
1792 .batch_size(batch_size)
1793 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1794 }
1795 }
1796
1797 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_lt_32) {
1798 TEST_REQUIRES_X86_SSE41;
1799 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1800 VAddMicrokernelTester()
1801 .batch_size(batch_size)
1802 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1803 }
1804 }
1805
1806 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_gt_32) {
1807 TEST_REQUIRES_X86_SSE41;
1808 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1809 VAddMicrokernelTester()
1810 .batch_size(batch_size)
1811 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1812 }
1813 }
1814
1815 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a) {
1816 TEST_REQUIRES_X86_SSE41;
1817 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1818 VAddMicrokernelTester()
1819 .batch_size(batch_size)
1820 .inplace_a(true)
1821 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1822 }
1823 }
1824
1825 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_b) {
1826 TEST_REQUIRES_X86_SSE41;
1827 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1828 VAddMicrokernelTester()
1829 .batch_size(batch_size)
1830 .inplace_b(true)
1831 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1832 }
1833 }
1834
1835 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a_and_b) {
1836 TEST_REQUIRES_X86_SSE41;
1837 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1838 VAddMicrokernelTester()
1839 .batch_size(batch_size)
1840 .inplace_a(true)
1841 .inplace_b(true)
1842 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1843 }
1844 }
1845
Marat Dukhan95caee52020-09-02 03:41:32 -07001846 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_zero_point) {
1847 TEST_REQUIRES_X86_SSE41;
1848 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1849 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1850 VAddMicrokernelTester()
1851 .batch_size(batch_size)
1852 .a_zero_point(a_zero_point)
1853 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1854 }
1855 }
1856 }
1857
1858 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_zero_point) {
1859 TEST_REQUIRES_X86_SSE41;
1860 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1861 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1862 VAddMicrokernelTester()
1863 .batch_size(batch_size)
1864 .b_zero_point(b_zero_point)
1865 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1866 }
1867 }
1868 }
1869
1870 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_zero_point) {
1871 TEST_REQUIRES_X86_SSE41;
1872 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1873 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1874 VAddMicrokernelTester()
1875 .batch_size(batch_size)
1876 .y_zero_point(y_zero_point)
1877 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1878 }
1879 }
1880 }
1881
1882 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_scale) {
1883 TEST_REQUIRES_X86_SSE41;
1884 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1885 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1886 VAddMicrokernelTester()
1887 .batch_size(batch_size)
1888 .a_scale(a_scale)
1889 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1890 }
1891 }
1892 }
1893
1894 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_scale) {
1895 TEST_REQUIRES_X86_SSE41;
1896 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1897 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1898 VAddMicrokernelTester()
1899 .batch_size(batch_size)
1900 .b_scale(b_scale)
1901 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1902 }
1903 }
1904 }
1905
1906 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_scale) {
1907 TEST_REQUIRES_X86_SSE41;
1908 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1909 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1910 VAddMicrokernelTester()
1911 .batch_size(batch_size)
1912 .y_scale(y_scale)
1913 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1914 }
1915 }
1916 }
1917
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001918 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmin) {
1919 TEST_REQUIRES_X86_SSE41;
1920 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1921 VAddMicrokernelTester()
1922 .batch_size(batch_size)
1923 .qmin(128)
1924 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1925 }
1926 }
1927
1928 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmax) {
1929 TEST_REQUIRES_X86_SSE41;
1930 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1931 VAddMicrokernelTester()
1932 .batch_size(batch_size)
1933 .qmax(128)
1934 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1935 }
1936 }
1937#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan5df27f82020-09-02 23:59:21 -07001938
1939
Marat Dukhanbb9225e2020-09-06 22:40:56 -07001940#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1941 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
1942 TEST_REQUIRES_X86_SSE41;
1943 VAddMicrokernelTester()
1944 .batch_size(8)
1945 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
1946 }
1947
1948 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
1949 TEST_REQUIRES_X86_SSE41;
1950 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1951 VAddMicrokernelTester()
1952 .batch_size(batch_size)
1953 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
1954 }
1955 }
1956
1957 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
1958 TEST_REQUIRES_X86_SSE41;
1959 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1960 VAddMicrokernelTester()
1961 .batch_size(batch_size)
1962 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
1963 }
1964 }
1965
1966 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
1967 TEST_REQUIRES_X86_SSE41;
1968 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1969 VAddMicrokernelTester()
1970 .batch_size(batch_size)
1971 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
1972 }
1973 }
1974
1975 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a) {
1976 TEST_REQUIRES_X86_SSE41;
1977 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1978 VAddMicrokernelTester()
1979 .batch_size(batch_size)
1980 .inplace_a(true)
1981 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
1982 }
1983 }
1984
1985 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_b) {
1986 TEST_REQUIRES_X86_SSE41;
1987 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1988 VAddMicrokernelTester()
1989 .batch_size(batch_size)
1990 .inplace_b(true)
1991 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
1992 }
1993 }
1994
1995 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a_and_b) {
1996 TEST_REQUIRES_X86_SSE41;
1997 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1998 VAddMicrokernelTester()
1999 .batch_size(batch_size)
2000 .inplace_a(true)
2001 .inplace_b(true)
2002 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
2003 }
2004 }
2005
2006 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
2007 TEST_REQUIRES_X86_SSE41;
2008 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2009 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2010 VAddMicrokernelTester()
2011 .batch_size(batch_size)
2012 .a_zero_point(a_zero_point)
2013 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
2014 }
2015 }
2016 }
2017
2018 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
2019 TEST_REQUIRES_X86_SSE41;
2020 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2021 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2022 VAddMicrokernelTester()
2023 .batch_size(batch_size)
2024 .b_zero_point(b_zero_point)
2025 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
2026 }
2027 }
2028 }
2029
2030 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
2031 TEST_REQUIRES_X86_SSE41;
2032 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2033 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2034 VAddMicrokernelTester()
2035 .batch_size(batch_size)
2036 .y_zero_point(y_zero_point)
2037 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
2038 }
2039 }
2040 }
2041
2042 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
2043 TEST_REQUIRES_X86_SSE41;
2044 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2045 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2046 VAddMicrokernelTester()
2047 .batch_size(batch_size)
2048 .a_scale(a_scale)
2049 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
2050 }
2051 }
2052 }
2053
2054 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
2055 TEST_REQUIRES_X86_SSE41;
2056 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2057 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2058 VAddMicrokernelTester()
2059 .batch_size(batch_size)
2060 .b_scale(b_scale)
2061 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
2062 }
2063 }
2064 }
2065
2066 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
2067 TEST_REQUIRES_X86_SSE41;
2068 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2069 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2070 VAddMicrokernelTester()
2071 .batch_size(batch_size)
2072 .y_scale(y_scale)
2073 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
2074 }
2075 }
2076 }
2077
2078 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
2079 TEST_REQUIRES_X86_SSE41;
2080 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2081 VAddMicrokernelTester()
2082 .batch_size(batch_size)
2083 .qmin(128)
2084 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
2085 }
2086 }
2087
2088 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
2089 TEST_REQUIRES_X86_SSE41;
2090 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2091 VAddMicrokernelTester()
2092 .batch_size(batch_size)
2093 .qmax(128)
2094 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8);
2095 }
2096 }
2097#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2098
2099
2100#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2101 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
2102 TEST_REQUIRES_X86_SSE41;
2103 VAddMicrokernelTester()
2104 .batch_size(16)
2105 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2106 }
2107
2108 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
2109 TEST_REQUIRES_X86_SSE41;
2110 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2111 VAddMicrokernelTester()
2112 .batch_size(batch_size)
2113 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2114 }
2115 }
2116
2117 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
2118 TEST_REQUIRES_X86_SSE41;
2119 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2120 VAddMicrokernelTester()
2121 .batch_size(batch_size)
2122 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2123 }
2124 }
2125
2126 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
2127 TEST_REQUIRES_X86_SSE41;
2128 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2129 VAddMicrokernelTester()
2130 .batch_size(batch_size)
2131 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2132 }
2133 }
2134
2135 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a) {
2136 TEST_REQUIRES_X86_SSE41;
2137 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2138 VAddMicrokernelTester()
2139 .batch_size(batch_size)
2140 .inplace_a(true)
2141 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2142 }
2143 }
2144
2145 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_b) {
2146 TEST_REQUIRES_X86_SSE41;
2147 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2148 VAddMicrokernelTester()
2149 .batch_size(batch_size)
2150 .inplace_b(true)
2151 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2152 }
2153 }
2154
2155 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a_and_b) {
2156 TEST_REQUIRES_X86_SSE41;
2157 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2158 VAddMicrokernelTester()
2159 .batch_size(batch_size)
2160 .inplace_a(true)
2161 .inplace_b(true)
2162 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2163 }
2164 }
2165
2166 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
2167 TEST_REQUIRES_X86_SSE41;
2168 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2169 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2170 VAddMicrokernelTester()
2171 .batch_size(batch_size)
2172 .a_zero_point(a_zero_point)
2173 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2174 }
2175 }
2176 }
2177
2178 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
2179 TEST_REQUIRES_X86_SSE41;
2180 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2181 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2182 VAddMicrokernelTester()
2183 .batch_size(batch_size)
2184 .b_zero_point(b_zero_point)
2185 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2186 }
2187 }
2188 }
2189
2190 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
2191 TEST_REQUIRES_X86_SSE41;
2192 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2193 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2194 VAddMicrokernelTester()
2195 .batch_size(batch_size)
2196 .y_zero_point(y_zero_point)
2197 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2198 }
2199 }
2200 }
2201
2202 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
2203 TEST_REQUIRES_X86_SSE41;
2204 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2205 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2206 VAddMicrokernelTester()
2207 .batch_size(batch_size)
2208 .a_scale(a_scale)
2209 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2210 }
2211 }
2212 }
2213
2214 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
2215 TEST_REQUIRES_X86_SSE41;
2216 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2217 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2218 VAddMicrokernelTester()
2219 .batch_size(batch_size)
2220 .b_scale(b_scale)
2221 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2222 }
2223 }
2224 }
2225
2226 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
2227 TEST_REQUIRES_X86_SSE41;
2228 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2229 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2230 VAddMicrokernelTester()
2231 .batch_size(batch_size)
2232 .y_scale(y_scale)
2233 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2234 }
2235 }
2236 }
2237
2238 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
2239 TEST_REQUIRES_X86_SSE41;
2240 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2241 VAddMicrokernelTester()
2242 .batch_size(batch_size)
2243 .qmin(128)
2244 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2245 }
2246 }
2247
2248 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
2249 TEST_REQUIRES_X86_SSE41;
2250 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2251 VAddMicrokernelTester()
2252 .batch_size(batch_size)
2253 .qmax(128)
2254 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16);
2255 }
2256 }
2257#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2258
2259
2260#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2261 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_eq_24) {
2262 TEST_REQUIRES_X86_SSE41;
2263 VAddMicrokernelTester()
2264 .batch_size(24)
2265 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2266 }
2267
2268 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_div_24) {
2269 TEST_REQUIRES_X86_SSE41;
2270 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2271 VAddMicrokernelTester()
2272 .batch_size(batch_size)
2273 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2274 }
2275 }
2276
2277 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_lt_24) {
2278 TEST_REQUIRES_X86_SSE41;
2279 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2280 VAddMicrokernelTester()
2281 .batch_size(batch_size)
2282 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2283 }
2284 }
2285
2286 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_gt_24) {
2287 TEST_REQUIRES_X86_SSE41;
2288 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2289 VAddMicrokernelTester()
2290 .batch_size(batch_size)
2291 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2292 }
2293 }
2294
2295 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_a) {
2296 TEST_REQUIRES_X86_SSE41;
2297 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2298 VAddMicrokernelTester()
2299 .batch_size(batch_size)
2300 .inplace_a(true)
2301 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2302 }
2303 }
2304
2305 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_b) {
2306 TEST_REQUIRES_X86_SSE41;
2307 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2308 VAddMicrokernelTester()
2309 .batch_size(batch_size)
2310 .inplace_b(true)
2311 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2312 }
2313 }
2314
2315 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_a_and_b) {
2316 TEST_REQUIRES_X86_SSE41;
2317 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2318 VAddMicrokernelTester()
2319 .batch_size(batch_size)
2320 .inplace_a(true)
2321 .inplace_b(true)
2322 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2323 }
2324 }
2325
2326 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, a_zero_point) {
2327 TEST_REQUIRES_X86_SSE41;
2328 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2329 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2330 VAddMicrokernelTester()
2331 .batch_size(batch_size)
2332 .a_zero_point(a_zero_point)
2333 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2334 }
2335 }
2336 }
2337
2338 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, b_zero_point) {
2339 TEST_REQUIRES_X86_SSE41;
2340 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2341 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2342 VAddMicrokernelTester()
2343 .batch_size(batch_size)
2344 .b_zero_point(b_zero_point)
2345 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2346 }
2347 }
2348 }
2349
2350 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, y_zero_point) {
2351 TEST_REQUIRES_X86_SSE41;
2352 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2353 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2354 VAddMicrokernelTester()
2355 .batch_size(batch_size)
2356 .y_zero_point(y_zero_point)
2357 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2358 }
2359 }
2360 }
2361
2362 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, a_scale) {
2363 TEST_REQUIRES_X86_SSE41;
2364 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2365 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2366 VAddMicrokernelTester()
2367 .batch_size(batch_size)
2368 .a_scale(a_scale)
2369 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2370 }
2371 }
2372 }
2373
2374 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, b_scale) {
2375 TEST_REQUIRES_X86_SSE41;
2376 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2377 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2378 VAddMicrokernelTester()
2379 .batch_size(batch_size)
2380 .b_scale(b_scale)
2381 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2382 }
2383 }
2384 }
2385
2386 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, y_scale) {
2387 TEST_REQUIRES_X86_SSE41;
2388 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2389 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2390 VAddMicrokernelTester()
2391 .batch_size(batch_size)
2392 .y_scale(y_scale)
2393 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2394 }
2395 }
2396 }
2397
2398 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, qmin) {
2399 TEST_REQUIRES_X86_SSE41;
2400 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2401 VAddMicrokernelTester()
2402 .batch_size(batch_size)
2403 .qmin(128)
2404 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2405 }
2406 }
2407
2408 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, qmax) {
2409 TEST_REQUIRES_X86_SSE41;
2410 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2411 VAddMicrokernelTester()
2412 .batch_size(batch_size)
2413 .qmax(128)
2414 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24);
2415 }
2416 }
2417#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2418
2419
2420#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2421 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_eq_32) {
2422 TEST_REQUIRES_X86_SSE41;
2423 VAddMicrokernelTester()
2424 .batch_size(32)
2425 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2426 }
2427
2428 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_div_32) {
2429 TEST_REQUIRES_X86_SSE41;
2430 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2431 VAddMicrokernelTester()
2432 .batch_size(batch_size)
2433 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2434 }
2435 }
2436
2437 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_lt_32) {
2438 TEST_REQUIRES_X86_SSE41;
2439 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2440 VAddMicrokernelTester()
2441 .batch_size(batch_size)
2442 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2443 }
2444 }
2445
2446 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_gt_32) {
2447 TEST_REQUIRES_X86_SSE41;
2448 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2449 VAddMicrokernelTester()
2450 .batch_size(batch_size)
2451 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2452 }
2453 }
2454
2455 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_a) {
2456 TEST_REQUIRES_X86_SSE41;
2457 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2458 VAddMicrokernelTester()
2459 .batch_size(batch_size)
2460 .inplace_a(true)
2461 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2462 }
2463 }
2464
2465 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_b) {
2466 TEST_REQUIRES_X86_SSE41;
2467 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2468 VAddMicrokernelTester()
2469 .batch_size(batch_size)
2470 .inplace_b(true)
2471 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2472 }
2473 }
2474
2475 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_a_and_b) {
2476 TEST_REQUIRES_X86_SSE41;
2477 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2478 VAddMicrokernelTester()
2479 .batch_size(batch_size)
2480 .inplace_a(true)
2481 .inplace_b(true)
2482 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2483 }
2484 }
2485
2486 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, a_zero_point) {
2487 TEST_REQUIRES_X86_SSE41;
2488 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2489 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2490 VAddMicrokernelTester()
2491 .batch_size(batch_size)
2492 .a_zero_point(a_zero_point)
2493 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2494 }
2495 }
2496 }
2497
2498 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, b_zero_point) {
2499 TEST_REQUIRES_X86_SSE41;
2500 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2501 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2502 VAddMicrokernelTester()
2503 .batch_size(batch_size)
2504 .b_zero_point(b_zero_point)
2505 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2506 }
2507 }
2508 }
2509
2510 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, y_zero_point) {
2511 TEST_REQUIRES_X86_SSE41;
2512 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2513 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2514 VAddMicrokernelTester()
2515 .batch_size(batch_size)
2516 .y_zero_point(y_zero_point)
2517 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2518 }
2519 }
2520 }
2521
2522 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, a_scale) {
2523 TEST_REQUIRES_X86_SSE41;
2524 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2525 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2526 VAddMicrokernelTester()
2527 .batch_size(batch_size)
2528 .a_scale(a_scale)
2529 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2530 }
2531 }
2532 }
2533
2534 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, b_scale) {
2535 TEST_REQUIRES_X86_SSE41;
2536 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2537 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2538 VAddMicrokernelTester()
2539 .batch_size(batch_size)
2540 .b_scale(b_scale)
2541 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2542 }
2543 }
2544 }
2545
2546 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, y_scale) {
2547 TEST_REQUIRES_X86_SSE41;
2548 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2549 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2550 VAddMicrokernelTester()
2551 .batch_size(batch_size)
2552 .y_scale(y_scale)
2553 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2554 }
2555 }
2556 }
2557
2558 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, qmin) {
2559 TEST_REQUIRES_X86_SSE41;
2560 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2561 VAddMicrokernelTester()
2562 .batch_size(batch_size)
2563 .qmin(128)
2564 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2565 }
2566 }
2567
2568 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, qmax) {
2569 TEST_REQUIRES_X86_SSE41;
2570 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2571 VAddMicrokernelTester()
2572 .batch_size(batch_size)
2573 .qmax(128)
2574 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32);
2575 }
2576 }
2577#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2578
2579
2580#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2581 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
2582 TEST_REQUIRES_X86_XOP;
2583 VAddMicrokernelTester()
2584 .batch_size(8)
2585 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2586 }
2587
2588 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
2589 TEST_REQUIRES_X86_XOP;
2590 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2591 VAddMicrokernelTester()
2592 .batch_size(batch_size)
2593 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2594 }
2595 }
2596
2597 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
2598 TEST_REQUIRES_X86_XOP;
2599 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2600 VAddMicrokernelTester()
2601 .batch_size(batch_size)
2602 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2603 }
2604 }
2605
2606 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
2607 TEST_REQUIRES_X86_XOP;
2608 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2609 VAddMicrokernelTester()
2610 .batch_size(batch_size)
2611 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2612 }
2613 }
2614
2615 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a) {
2616 TEST_REQUIRES_X86_XOP;
2617 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2618 VAddMicrokernelTester()
2619 .batch_size(batch_size)
2620 .inplace_a(true)
2621 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2622 }
2623 }
2624
2625 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_b) {
2626 TEST_REQUIRES_X86_XOP;
2627 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2628 VAddMicrokernelTester()
2629 .batch_size(batch_size)
2630 .inplace_b(true)
2631 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2632 }
2633 }
2634
2635 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a_and_b) {
2636 TEST_REQUIRES_X86_XOP;
2637 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2638 VAddMicrokernelTester()
2639 .batch_size(batch_size)
2640 .inplace_a(true)
2641 .inplace_b(true)
2642 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2643 }
2644 }
2645
2646 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
2647 TEST_REQUIRES_X86_XOP;
2648 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2649 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2650 VAddMicrokernelTester()
2651 .batch_size(batch_size)
2652 .a_zero_point(a_zero_point)
2653 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2654 }
2655 }
2656 }
2657
2658 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
2659 TEST_REQUIRES_X86_XOP;
2660 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2661 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2662 VAddMicrokernelTester()
2663 .batch_size(batch_size)
2664 .b_zero_point(b_zero_point)
2665 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2666 }
2667 }
2668 }
2669
2670 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
2671 TEST_REQUIRES_X86_XOP;
2672 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2673 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2674 VAddMicrokernelTester()
2675 .batch_size(batch_size)
2676 .y_zero_point(y_zero_point)
2677 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2678 }
2679 }
2680 }
2681
2682 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
2683 TEST_REQUIRES_X86_XOP;
2684 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2685 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2686 VAddMicrokernelTester()
2687 .batch_size(batch_size)
2688 .a_scale(a_scale)
2689 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2690 }
2691 }
2692 }
2693
2694 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
2695 TEST_REQUIRES_X86_XOP;
2696 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2697 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2698 VAddMicrokernelTester()
2699 .batch_size(batch_size)
2700 .b_scale(b_scale)
2701 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2702 }
2703 }
2704 }
2705
2706 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
2707 TEST_REQUIRES_X86_XOP;
2708 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2709 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2710 VAddMicrokernelTester()
2711 .batch_size(batch_size)
2712 .y_scale(y_scale)
2713 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2714 }
2715 }
2716 }
2717
2718 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmin) {
2719 TEST_REQUIRES_X86_XOP;
2720 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2721 VAddMicrokernelTester()
2722 .batch_size(batch_size)
2723 .qmin(128)
2724 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2725 }
2726 }
2727
2728 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmax) {
2729 TEST_REQUIRES_X86_XOP;
2730 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2731 VAddMicrokernelTester()
2732 .batch_size(batch_size)
2733 .qmax(128)
2734 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8);
2735 }
2736 }
2737#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2738
2739
2740#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2741 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
2742 TEST_REQUIRES_X86_XOP;
2743 VAddMicrokernelTester()
2744 .batch_size(16)
2745 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2746 }
2747
2748 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
2749 TEST_REQUIRES_X86_XOP;
2750 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2751 VAddMicrokernelTester()
2752 .batch_size(batch_size)
2753 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2754 }
2755 }
2756
2757 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
2758 TEST_REQUIRES_X86_XOP;
2759 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2760 VAddMicrokernelTester()
2761 .batch_size(batch_size)
2762 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2763 }
2764 }
2765
2766 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
2767 TEST_REQUIRES_X86_XOP;
2768 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2769 VAddMicrokernelTester()
2770 .batch_size(batch_size)
2771 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2772 }
2773 }
2774
2775 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a) {
2776 TEST_REQUIRES_X86_XOP;
2777 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2778 VAddMicrokernelTester()
2779 .batch_size(batch_size)
2780 .inplace_a(true)
2781 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2782 }
2783 }
2784
2785 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_b) {
2786 TEST_REQUIRES_X86_XOP;
2787 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2788 VAddMicrokernelTester()
2789 .batch_size(batch_size)
2790 .inplace_b(true)
2791 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2792 }
2793 }
2794
2795 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a_and_b) {
2796 TEST_REQUIRES_X86_XOP;
2797 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2798 VAddMicrokernelTester()
2799 .batch_size(batch_size)
2800 .inplace_a(true)
2801 .inplace_b(true)
2802 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2803 }
2804 }
2805
2806 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
2807 TEST_REQUIRES_X86_XOP;
2808 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2809 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2810 VAddMicrokernelTester()
2811 .batch_size(batch_size)
2812 .a_zero_point(a_zero_point)
2813 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2814 }
2815 }
2816 }
2817
2818 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
2819 TEST_REQUIRES_X86_XOP;
2820 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2821 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2822 VAddMicrokernelTester()
2823 .batch_size(batch_size)
2824 .b_zero_point(b_zero_point)
2825 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2826 }
2827 }
2828 }
2829
2830 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
2831 TEST_REQUIRES_X86_XOP;
2832 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2833 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2834 VAddMicrokernelTester()
2835 .batch_size(batch_size)
2836 .y_zero_point(y_zero_point)
2837 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2838 }
2839 }
2840 }
2841
2842 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
2843 TEST_REQUIRES_X86_XOP;
2844 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2845 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2846 VAddMicrokernelTester()
2847 .batch_size(batch_size)
2848 .a_scale(a_scale)
2849 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2850 }
2851 }
2852 }
2853
2854 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
2855 TEST_REQUIRES_X86_XOP;
2856 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2857 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2858 VAddMicrokernelTester()
2859 .batch_size(batch_size)
2860 .b_scale(b_scale)
2861 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2862 }
2863 }
2864 }
2865
2866 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
2867 TEST_REQUIRES_X86_XOP;
2868 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2869 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2870 VAddMicrokernelTester()
2871 .batch_size(batch_size)
2872 .y_scale(y_scale)
2873 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2874 }
2875 }
2876 }
2877
2878 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmin) {
2879 TEST_REQUIRES_X86_XOP;
2880 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2881 VAddMicrokernelTester()
2882 .batch_size(batch_size)
2883 .qmin(128)
2884 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2885 }
2886 }
2887
2888 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmax) {
2889 TEST_REQUIRES_X86_XOP;
2890 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2891 VAddMicrokernelTester()
2892 .batch_size(batch_size)
2893 .qmax(128)
2894 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16);
2895 }
2896 }
2897#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2898
2899
2900#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2901 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_eq_24) {
2902 TEST_REQUIRES_X86_XOP;
2903 VAddMicrokernelTester()
2904 .batch_size(24)
2905 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2906 }
2907
2908 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_div_24) {
2909 TEST_REQUIRES_X86_XOP;
2910 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2911 VAddMicrokernelTester()
2912 .batch_size(batch_size)
2913 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2914 }
2915 }
2916
2917 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_lt_24) {
2918 TEST_REQUIRES_X86_XOP;
2919 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2920 VAddMicrokernelTester()
2921 .batch_size(batch_size)
2922 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2923 }
2924 }
2925
2926 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_gt_24) {
2927 TEST_REQUIRES_X86_XOP;
2928 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2929 VAddMicrokernelTester()
2930 .batch_size(batch_size)
2931 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2932 }
2933 }
2934
2935 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_a) {
2936 TEST_REQUIRES_X86_XOP;
2937 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2938 VAddMicrokernelTester()
2939 .batch_size(batch_size)
2940 .inplace_a(true)
2941 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2942 }
2943 }
2944
2945 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_b) {
2946 TEST_REQUIRES_X86_XOP;
2947 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2948 VAddMicrokernelTester()
2949 .batch_size(batch_size)
2950 .inplace_b(true)
2951 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2952 }
2953 }
2954
2955 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_a_and_b) {
2956 TEST_REQUIRES_X86_XOP;
2957 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2958 VAddMicrokernelTester()
2959 .batch_size(batch_size)
2960 .inplace_a(true)
2961 .inplace_b(true)
2962 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2963 }
2964 }
2965
2966 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, a_zero_point) {
2967 TEST_REQUIRES_X86_XOP;
2968 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2969 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2970 VAddMicrokernelTester()
2971 .batch_size(batch_size)
2972 .a_zero_point(a_zero_point)
2973 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2974 }
2975 }
2976 }
2977
2978 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, b_zero_point) {
2979 TEST_REQUIRES_X86_XOP;
2980 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2981 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2982 VAddMicrokernelTester()
2983 .batch_size(batch_size)
2984 .b_zero_point(b_zero_point)
2985 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2986 }
2987 }
2988 }
2989
2990 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, y_zero_point) {
2991 TEST_REQUIRES_X86_XOP;
2992 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2993 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2994 VAddMicrokernelTester()
2995 .batch_size(batch_size)
2996 .y_zero_point(y_zero_point)
2997 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
2998 }
2999 }
3000 }
3001
3002 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, a_scale) {
3003 TEST_REQUIRES_X86_XOP;
3004 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3005 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3006 VAddMicrokernelTester()
3007 .batch_size(batch_size)
3008 .a_scale(a_scale)
3009 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
3010 }
3011 }
3012 }
3013
3014 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, b_scale) {
3015 TEST_REQUIRES_X86_XOP;
3016 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3017 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3018 VAddMicrokernelTester()
3019 .batch_size(batch_size)
3020 .b_scale(b_scale)
3021 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
3022 }
3023 }
3024 }
3025
3026 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, y_scale) {
3027 TEST_REQUIRES_X86_XOP;
3028 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3029 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3030 VAddMicrokernelTester()
3031 .batch_size(batch_size)
3032 .y_scale(y_scale)
3033 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
3034 }
3035 }
3036 }
3037
3038 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, qmin) {
3039 TEST_REQUIRES_X86_XOP;
3040 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3041 VAddMicrokernelTester()
3042 .batch_size(batch_size)
3043 .qmin(128)
3044 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
3045 }
3046 }
3047
3048 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, qmax) {
3049 TEST_REQUIRES_X86_XOP;
3050 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3051 VAddMicrokernelTester()
3052 .batch_size(batch_size)
3053 .qmax(128)
3054 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24);
3055 }
3056 }
3057#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3058
3059
3060#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3061 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_eq_32) {
3062 TEST_REQUIRES_X86_XOP;
3063 VAddMicrokernelTester()
3064 .batch_size(32)
3065 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3066 }
3067
3068 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_div_32) {
3069 TEST_REQUIRES_X86_XOP;
3070 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3071 VAddMicrokernelTester()
3072 .batch_size(batch_size)
3073 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3074 }
3075 }
3076
3077 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_lt_32) {
3078 TEST_REQUIRES_X86_XOP;
3079 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3080 VAddMicrokernelTester()
3081 .batch_size(batch_size)
3082 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3083 }
3084 }
3085
3086 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_gt_32) {
3087 TEST_REQUIRES_X86_XOP;
3088 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3089 VAddMicrokernelTester()
3090 .batch_size(batch_size)
3091 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3092 }
3093 }
3094
3095 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_a) {
3096 TEST_REQUIRES_X86_XOP;
3097 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3098 VAddMicrokernelTester()
3099 .batch_size(batch_size)
3100 .inplace_a(true)
3101 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3102 }
3103 }
3104
3105 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_b) {
3106 TEST_REQUIRES_X86_XOP;
3107 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3108 VAddMicrokernelTester()
3109 .batch_size(batch_size)
3110 .inplace_b(true)
3111 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3112 }
3113 }
3114
3115 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_a_and_b) {
3116 TEST_REQUIRES_X86_XOP;
3117 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3118 VAddMicrokernelTester()
3119 .batch_size(batch_size)
3120 .inplace_a(true)
3121 .inplace_b(true)
3122 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3123 }
3124 }
3125
3126 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, a_zero_point) {
3127 TEST_REQUIRES_X86_XOP;
3128 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3129 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3130 VAddMicrokernelTester()
3131 .batch_size(batch_size)
3132 .a_zero_point(a_zero_point)
3133 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3134 }
3135 }
3136 }
3137
3138 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, b_zero_point) {
3139 TEST_REQUIRES_X86_XOP;
3140 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3141 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3142 VAddMicrokernelTester()
3143 .batch_size(batch_size)
3144 .b_zero_point(b_zero_point)
3145 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3146 }
3147 }
3148 }
3149
3150 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, y_zero_point) {
3151 TEST_REQUIRES_X86_XOP;
3152 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3153 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3154 VAddMicrokernelTester()
3155 .batch_size(batch_size)
3156 .y_zero_point(y_zero_point)
3157 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3158 }
3159 }
3160 }
3161
3162 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, a_scale) {
3163 TEST_REQUIRES_X86_XOP;
3164 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3165 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3166 VAddMicrokernelTester()
3167 .batch_size(batch_size)
3168 .a_scale(a_scale)
3169 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3170 }
3171 }
3172 }
3173
3174 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, b_scale) {
3175 TEST_REQUIRES_X86_XOP;
3176 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3177 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3178 VAddMicrokernelTester()
3179 .batch_size(batch_size)
3180 .b_scale(b_scale)
3181 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3182 }
3183 }
3184 }
3185
3186 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, y_scale) {
3187 TEST_REQUIRES_X86_XOP;
3188 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3189 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3190 VAddMicrokernelTester()
3191 .batch_size(batch_size)
3192 .y_scale(y_scale)
3193 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3194 }
3195 }
3196 }
3197
3198 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, qmin) {
3199 TEST_REQUIRES_X86_XOP;
3200 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3201 VAddMicrokernelTester()
3202 .batch_size(batch_size)
3203 .qmin(128)
3204 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3205 }
3206 }
3207
3208 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, qmax) {
3209 TEST_REQUIRES_X86_XOP;
3210 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3211 VAddMicrokernelTester()
3212 .batch_size(batch_size)
3213 .qmax(128)
3214 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32);
3215 }
3216 }
3217#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3218
3219
Marat Dukhane6dc0b62020-09-08 23:57:14 -07003220#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3221 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
3222 TEST_REQUIRES_X86_AVX2;
3223 VAddMicrokernelTester()
3224 .batch_size(8)
3225 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3226 }
3227
3228 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
3229 TEST_REQUIRES_X86_AVX2;
3230 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3231 VAddMicrokernelTester()
3232 .batch_size(batch_size)
3233 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3234 }
3235 }
3236
3237 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
3238 TEST_REQUIRES_X86_AVX2;
3239 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3240 VAddMicrokernelTester()
3241 .batch_size(batch_size)
3242 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3243 }
3244 }
3245
3246 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
3247 TEST_REQUIRES_X86_AVX2;
3248 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3249 VAddMicrokernelTester()
3250 .batch_size(batch_size)
3251 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3252 }
3253 }
3254
3255 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a) {
3256 TEST_REQUIRES_X86_AVX2;
3257 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3258 VAddMicrokernelTester()
3259 .batch_size(batch_size)
3260 .inplace_a(true)
3261 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3262 }
3263 }
3264
3265 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_b) {
3266 TEST_REQUIRES_X86_AVX2;
3267 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3268 VAddMicrokernelTester()
3269 .batch_size(batch_size)
3270 .inplace_b(true)
3271 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3272 }
3273 }
3274
3275 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a_and_b) {
3276 TEST_REQUIRES_X86_AVX2;
3277 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3278 VAddMicrokernelTester()
3279 .batch_size(batch_size)
3280 .inplace_a(true)
3281 .inplace_b(true)
3282 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3283 }
3284 }
3285
3286 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
3287 TEST_REQUIRES_X86_AVX2;
3288 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3289 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3290 VAddMicrokernelTester()
3291 .batch_size(batch_size)
3292 .a_zero_point(a_zero_point)
3293 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3294 }
3295 }
3296 }
3297
3298 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
3299 TEST_REQUIRES_X86_AVX2;
3300 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3301 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3302 VAddMicrokernelTester()
3303 .batch_size(batch_size)
3304 .b_zero_point(b_zero_point)
3305 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3306 }
3307 }
3308 }
3309
3310 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
3311 TEST_REQUIRES_X86_AVX2;
3312 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3313 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3314 VAddMicrokernelTester()
3315 .batch_size(batch_size)
3316 .y_zero_point(y_zero_point)
3317 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3318 }
3319 }
3320 }
3321
3322 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
3323 TEST_REQUIRES_X86_AVX2;
3324 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3325 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3326 VAddMicrokernelTester()
3327 .batch_size(batch_size)
3328 .a_scale(a_scale)
3329 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3330 }
3331 }
3332 }
3333
3334 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
3335 TEST_REQUIRES_X86_AVX2;
3336 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3337 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3338 VAddMicrokernelTester()
3339 .batch_size(batch_size)
3340 .b_scale(b_scale)
3341 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3342 }
3343 }
3344 }
3345
3346 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
3347 TEST_REQUIRES_X86_AVX2;
3348 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3349 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3350 VAddMicrokernelTester()
3351 .batch_size(batch_size)
3352 .y_scale(y_scale)
3353 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3354 }
3355 }
3356 }
3357
3358 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
3359 TEST_REQUIRES_X86_AVX2;
3360 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3361 VAddMicrokernelTester()
3362 .batch_size(batch_size)
3363 .qmin(128)
3364 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3365 }
3366 }
3367
3368 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
3369 TEST_REQUIRES_X86_AVX2;
3370 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3371 VAddMicrokernelTester()
3372 .batch_size(batch_size)
3373 .qmax(128)
3374 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8);
3375 }
3376 }
3377#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3378
3379
3380#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3381 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
3382 TEST_REQUIRES_X86_AVX2;
3383 VAddMicrokernelTester()
3384 .batch_size(16)
3385 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3386 }
3387
3388 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
3389 TEST_REQUIRES_X86_AVX2;
3390 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3391 VAddMicrokernelTester()
3392 .batch_size(batch_size)
3393 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3394 }
3395 }
3396
3397 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
3398 TEST_REQUIRES_X86_AVX2;
3399 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3400 VAddMicrokernelTester()
3401 .batch_size(batch_size)
3402 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3403 }
3404 }
3405
3406 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
3407 TEST_REQUIRES_X86_AVX2;
3408 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3409 VAddMicrokernelTester()
3410 .batch_size(batch_size)
3411 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3412 }
3413 }
3414
3415 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a) {
3416 TEST_REQUIRES_X86_AVX2;
3417 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3418 VAddMicrokernelTester()
3419 .batch_size(batch_size)
3420 .inplace_a(true)
3421 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3422 }
3423 }
3424
3425 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_b) {
3426 TEST_REQUIRES_X86_AVX2;
3427 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3428 VAddMicrokernelTester()
3429 .batch_size(batch_size)
3430 .inplace_b(true)
3431 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3432 }
3433 }
3434
3435 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a_and_b) {
3436 TEST_REQUIRES_X86_AVX2;
3437 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3438 VAddMicrokernelTester()
3439 .batch_size(batch_size)
3440 .inplace_a(true)
3441 .inplace_b(true)
3442 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3443 }
3444 }
3445
3446 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
3447 TEST_REQUIRES_X86_AVX2;
3448 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3449 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3450 VAddMicrokernelTester()
3451 .batch_size(batch_size)
3452 .a_zero_point(a_zero_point)
3453 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3454 }
3455 }
3456 }
3457
3458 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
3459 TEST_REQUIRES_X86_AVX2;
3460 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3461 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3462 VAddMicrokernelTester()
3463 .batch_size(batch_size)
3464 .b_zero_point(b_zero_point)
3465 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3466 }
3467 }
3468 }
3469
3470 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
3471 TEST_REQUIRES_X86_AVX2;
3472 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3473 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3474 VAddMicrokernelTester()
3475 .batch_size(batch_size)
3476 .y_zero_point(y_zero_point)
3477 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3478 }
3479 }
3480 }
3481
3482 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
3483 TEST_REQUIRES_X86_AVX2;
3484 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3485 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3486 VAddMicrokernelTester()
3487 .batch_size(batch_size)
3488 .a_scale(a_scale)
3489 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3490 }
3491 }
3492 }
3493
3494 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
3495 TEST_REQUIRES_X86_AVX2;
3496 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3497 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3498 VAddMicrokernelTester()
3499 .batch_size(batch_size)
3500 .b_scale(b_scale)
3501 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3502 }
3503 }
3504 }
3505
3506 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
3507 TEST_REQUIRES_X86_AVX2;
3508 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3509 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3510 VAddMicrokernelTester()
3511 .batch_size(batch_size)
3512 .y_scale(y_scale)
3513 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3514 }
3515 }
3516 }
3517
3518 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
3519 TEST_REQUIRES_X86_AVX2;
3520 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3521 VAddMicrokernelTester()
3522 .batch_size(batch_size)
3523 .qmin(128)
3524 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3525 }
3526 }
3527
3528 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
3529 TEST_REQUIRES_X86_AVX2;
3530 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3531 VAddMicrokernelTester()
3532 .batch_size(batch_size)
3533 .qmax(128)
3534 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16);
3535 }
3536 }
3537#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3538
3539
3540#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3541 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_eq_24) {
3542 TEST_REQUIRES_X86_AVX2;
3543 VAddMicrokernelTester()
3544 .batch_size(24)
3545 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3546 }
3547
3548 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_div_24) {
3549 TEST_REQUIRES_X86_AVX2;
3550 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3551 VAddMicrokernelTester()
3552 .batch_size(batch_size)
3553 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3554 }
3555 }
3556
3557 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_lt_24) {
3558 TEST_REQUIRES_X86_AVX2;
3559 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3560 VAddMicrokernelTester()
3561 .batch_size(batch_size)
3562 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3563 }
3564 }
3565
3566 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_gt_24) {
3567 TEST_REQUIRES_X86_AVX2;
3568 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3569 VAddMicrokernelTester()
3570 .batch_size(batch_size)
3571 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3572 }
3573 }
3574
3575 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_a) {
3576 TEST_REQUIRES_X86_AVX2;
3577 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3578 VAddMicrokernelTester()
3579 .batch_size(batch_size)
3580 .inplace_a(true)
3581 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3582 }
3583 }
3584
3585 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_b) {
3586 TEST_REQUIRES_X86_AVX2;
3587 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3588 VAddMicrokernelTester()
3589 .batch_size(batch_size)
3590 .inplace_b(true)
3591 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3592 }
3593 }
3594
3595 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_a_and_b) {
3596 TEST_REQUIRES_X86_AVX2;
3597 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3598 VAddMicrokernelTester()
3599 .batch_size(batch_size)
3600 .inplace_a(true)
3601 .inplace_b(true)
3602 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3603 }
3604 }
3605
3606 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, a_zero_point) {
3607 TEST_REQUIRES_X86_AVX2;
3608 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3609 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3610 VAddMicrokernelTester()
3611 .batch_size(batch_size)
3612 .a_zero_point(a_zero_point)
3613 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3614 }
3615 }
3616 }
3617
3618 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, b_zero_point) {
3619 TEST_REQUIRES_X86_AVX2;
3620 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3621 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3622 VAddMicrokernelTester()
3623 .batch_size(batch_size)
3624 .b_zero_point(b_zero_point)
3625 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3626 }
3627 }
3628 }
3629
3630 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, y_zero_point) {
3631 TEST_REQUIRES_X86_AVX2;
3632 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3633 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3634 VAddMicrokernelTester()
3635 .batch_size(batch_size)
3636 .y_zero_point(y_zero_point)
3637 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3638 }
3639 }
3640 }
3641
3642 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, a_scale) {
3643 TEST_REQUIRES_X86_AVX2;
3644 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3645 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3646 VAddMicrokernelTester()
3647 .batch_size(batch_size)
3648 .a_scale(a_scale)
3649 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3650 }
3651 }
3652 }
3653
3654 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, b_scale) {
3655 TEST_REQUIRES_X86_AVX2;
3656 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3657 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3658 VAddMicrokernelTester()
3659 .batch_size(batch_size)
3660 .b_scale(b_scale)
3661 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3662 }
3663 }
3664 }
3665
3666 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, y_scale) {
3667 TEST_REQUIRES_X86_AVX2;
3668 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3669 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3670 VAddMicrokernelTester()
3671 .batch_size(batch_size)
3672 .y_scale(y_scale)
3673 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3674 }
3675 }
3676 }
3677
3678 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, qmin) {
3679 TEST_REQUIRES_X86_AVX2;
3680 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3681 VAddMicrokernelTester()
3682 .batch_size(batch_size)
3683 .qmin(128)
3684 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3685 }
3686 }
3687
3688 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, qmax) {
3689 TEST_REQUIRES_X86_AVX2;
3690 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3691 VAddMicrokernelTester()
3692 .batch_size(batch_size)
3693 .qmax(128)
3694 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24);
3695 }
3696 }
3697#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3698
3699
3700#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3701 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_eq_32) {
3702 TEST_REQUIRES_X86_AVX2;
3703 VAddMicrokernelTester()
3704 .batch_size(32)
3705 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3706 }
3707
3708 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_div_32) {
3709 TEST_REQUIRES_X86_AVX2;
3710 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3711 VAddMicrokernelTester()
3712 .batch_size(batch_size)
3713 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3714 }
3715 }
3716
3717 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_lt_32) {
3718 TEST_REQUIRES_X86_AVX2;
3719 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3720 VAddMicrokernelTester()
3721 .batch_size(batch_size)
3722 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3723 }
3724 }
3725
3726 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_gt_32) {
3727 TEST_REQUIRES_X86_AVX2;
3728 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3729 VAddMicrokernelTester()
3730 .batch_size(batch_size)
3731 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3732 }
3733 }
3734
3735 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_a) {
3736 TEST_REQUIRES_X86_AVX2;
3737 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3738 VAddMicrokernelTester()
3739 .batch_size(batch_size)
3740 .inplace_a(true)
3741 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3742 }
3743 }
3744
3745 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_b) {
3746 TEST_REQUIRES_X86_AVX2;
3747 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3748 VAddMicrokernelTester()
3749 .batch_size(batch_size)
3750 .inplace_b(true)
3751 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3752 }
3753 }
3754
3755 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_a_and_b) {
3756 TEST_REQUIRES_X86_AVX2;
3757 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3758 VAddMicrokernelTester()
3759 .batch_size(batch_size)
3760 .inplace_a(true)
3761 .inplace_b(true)
3762 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3763 }
3764 }
3765
3766 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, a_zero_point) {
3767 TEST_REQUIRES_X86_AVX2;
3768 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3769 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3770 VAddMicrokernelTester()
3771 .batch_size(batch_size)
3772 .a_zero_point(a_zero_point)
3773 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3774 }
3775 }
3776 }
3777
3778 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, b_zero_point) {
3779 TEST_REQUIRES_X86_AVX2;
3780 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3781 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3782 VAddMicrokernelTester()
3783 .batch_size(batch_size)
3784 .b_zero_point(b_zero_point)
3785 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3786 }
3787 }
3788 }
3789
3790 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, y_zero_point) {
3791 TEST_REQUIRES_X86_AVX2;
3792 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3793 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3794 VAddMicrokernelTester()
3795 .batch_size(batch_size)
3796 .y_zero_point(y_zero_point)
3797 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3798 }
3799 }
3800 }
3801
3802 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, a_scale) {
3803 TEST_REQUIRES_X86_AVX2;
3804 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3805 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3806 VAddMicrokernelTester()
3807 .batch_size(batch_size)
3808 .a_scale(a_scale)
3809 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3810 }
3811 }
3812 }
3813
3814 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, b_scale) {
3815 TEST_REQUIRES_X86_AVX2;
3816 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3817 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3818 VAddMicrokernelTester()
3819 .batch_size(batch_size)
3820 .b_scale(b_scale)
3821 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3822 }
3823 }
3824 }
3825
3826 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, y_scale) {
3827 TEST_REQUIRES_X86_AVX2;
3828 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3829 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3830 VAddMicrokernelTester()
3831 .batch_size(batch_size)
3832 .y_scale(y_scale)
3833 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3834 }
3835 }
3836 }
3837
3838 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, qmin) {
3839 TEST_REQUIRES_X86_AVX2;
3840 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3841 VAddMicrokernelTester()
3842 .batch_size(batch_size)
3843 .qmin(128)
3844 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3845 }
3846 }
3847
3848 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, qmax) {
3849 TEST_REQUIRES_X86_AVX2;
3850 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3851 VAddMicrokernelTester()
3852 .batch_size(batch_size)
3853 .qmax(128)
3854 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32);
3855 }
3856 }
3857#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3858
3859
Marat Dukhan5df27f82020-09-02 23:59:21 -07003860#if XNN_ARCH_WASMSIMD
3861 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_eq_8) {
3862 VAddMicrokernelTester()
3863 .batch_size(8)
3864 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3865 }
3866
3867 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_div_8) {
3868 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3869 VAddMicrokernelTester()
3870 .batch_size(batch_size)
3871 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3872 }
3873 }
3874
3875 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_lt_8) {
3876 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3877 VAddMicrokernelTester()
3878 .batch_size(batch_size)
3879 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3880 }
3881 }
3882
3883 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_gt_8) {
3884 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3885 VAddMicrokernelTester()
3886 .batch_size(batch_size)
3887 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3888 }
3889 }
3890
3891 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_a) {
3892 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3893 VAddMicrokernelTester()
3894 .batch_size(batch_size)
3895 .inplace_a(true)
3896 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3897 }
3898 }
3899
3900 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_b) {
3901 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3902 VAddMicrokernelTester()
3903 .batch_size(batch_size)
3904 .inplace_b(true)
3905 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3906 }
3907 }
3908
3909 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_a_and_b) {
3910 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3911 VAddMicrokernelTester()
3912 .batch_size(batch_size)
3913 .inplace_a(true)
3914 .inplace_b(true)
3915 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3916 }
3917 }
3918
3919 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, a_zero_point) {
3920 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3921 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3922 VAddMicrokernelTester()
3923 .batch_size(batch_size)
3924 .a_zero_point(a_zero_point)
3925 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3926 }
3927 }
3928 }
3929
3930 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, b_zero_point) {
3931 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3932 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3933 VAddMicrokernelTester()
3934 .batch_size(batch_size)
3935 .b_zero_point(b_zero_point)
3936 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3937 }
3938 }
3939 }
3940
3941 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, y_zero_point) {
3942 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3943 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3944 VAddMicrokernelTester()
3945 .batch_size(batch_size)
3946 .y_zero_point(y_zero_point)
3947 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3948 }
3949 }
3950 }
3951
3952 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, a_scale) {
3953 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3954 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3955 VAddMicrokernelTester()
3956 .batch_size(batch_size)
3957 .a_scale(a_scale)
3958 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3959 }
3960 }
3961 }
3962
3963 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, b_scale) {
3964 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3965 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3966 VAddMicrokernelTester()
3967 .batch_size(batch_size)
3968 .b_scale(b_scale)
3969 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3970 }
3971 }
3972 }
3973
3974 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, y_scale) {
3975 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3976 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3977 VAddMicrokernelTester()
3978 .batch_size(batch_size)
3979 .y_scale(y_scale)
3980 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3981 }
3982 }
3983 }
3984
3985 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, qmin) {
3986 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3987 VAddMicrokernelTester()
3988 .batch_size(batch_size)
3989 .qmin(128)
3990 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
3991 }
3992 }
3993
3994 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, qmax) {
3995 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3996 VAddMicrokernelTester()
3997 .batch_size(batch_size)
3998 .qmax(128)
3999 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8);
4000 }
4001 }
4002#endif // XNN_ARCH_WASMSIMD
4003
4004
4005#if XNN_ARCH_WASMSIMD
4006 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_eq_16) {
4007 VAddMicrokernelTester()
4008 .batch_size(16)
4009 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4010 }
4011
4012 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_div_16) {
4013 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4014 VAddMicrokernelTester()
4015 .batch_size(batch_size)
4016 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4017 }
4018 }
4019
4020 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_lt_16) {
4021 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4022 VAddMicrokernelTester()
4023 .batch_size(batch_size)
4024 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4025 }
4026 }
4027
4028 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_gt_16) {
4029 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4030 VAddMicrokernelTester()
4031 .batch_size(batch_size)
4032 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4033 }
4034 }
4035
4036 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_a) {
4037 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4038 VAddMicrokernelTester()
4039 .batch_size(batch_size)
4040 .inplace_a(true)
4041 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4042 }
4043 }
4044
4045 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_b) {
4046 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4047 VAddMicrokernelTester()
4048 .batch_size(batch_size)
4049 .inplace_b(true)
4050 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4051 }
4052 }
4053
4054 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_a_and_b) {
4055 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4056 VAddMicrokernelTester()
4057 .batch_size(batch_size)
4058 .inplace_a(true)
4059 .inplace_b(true)
4060 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4061 }
4062 }
4063
4064 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, a_zero_point) {
4065 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4066 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4067 VAddMicrokernelTester()
4068 .batch_size(batch_size)
4069 .a_zero_point(a_zero_point)
4070 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4071 }
4072 }
4073 }
4074
4075 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, b_zero_point) {
4076 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4077 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4078 VAddMicrokernelTester()
4079 .batch_size(batch_size)
4080 .b_zero_point(b_zero_point)
4081 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4082 }
4083 }
4084 }
4085
4086 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, y_zero_point) {
4087 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4088 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4089 VAddMicrokernelTester()
4090 .batch_size(batch_size)
4091 .y_zero_point(y_zero_point)
4092 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4093 }
4094 }
4095 }
4096
4097 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, a_scale) {
4098 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4099 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4100 VAddMicrokernelTester()
4101 .batch_size(batch_size)
4102 .a_scale(a_scale)
4103 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4104 }
4105 }
4106 }
4107
4108 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, b_scale) {
4109 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4110 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4111 VAddMicrokernelTester()
4112 .batch_size(batch_size)
4113 .b_scale(b_scale)
4114 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4115 }
4116 }
4117 }
4118
4119 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, y_scale) {
4120 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4121 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4122 VAddMicrokernelTester()
4123 .batch_size(batch_size)
4124 .y_scale(y_scale)
4125 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4126 }
4127 }
4128 }
4129
4130 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, qmin) {
4131 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4132 VAddMicrokernelTester()
4133 .batch_size(batch_size)
4134 .qmin(128)
4135 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4136 }
4137 }
4138
4139 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, qmax) {
4140 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4141 VAddMicrokernelTester()
4142 .batch_size(batch_size)
4143 .qmax(128)
4144 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16);
4145 }
4146 }
4147#endif // XNN_ARCH_WASMSIMD
4148
4149
4150#if XNN_ARCH_WASMSIMD
4151 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_eq_24) {
4152 VAddMicrokernelTester()
4153 .batch_size(24)
4154 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4155 }
4156
4157 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_div_24) {
4158 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4159 VAddMicrokernelTester()
4160 .batch_size(batch_size)
4161 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4162 }
4163 }
4164
4165 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_lt_24) {
4166 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4167 VAddMicrokernelTester()
4168 .batch_size(batch_size)
4169 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4170 }
4171 }
4172
4173 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_gt_24) {
4174 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4175 VAddMicrokernelTester()
4176 .batch_size(batch_size)
4177 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4178 }
4179 }
4180
4181 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_a) {
4182 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4183 VAddMicrokernelTester()
4184 .batch_size(batch_size)
4185 .inplace_a(true)
4186 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4187 }
4188 }
4189
4190 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_b) {
4191 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4192 VAddMicrokernelTester()
4193 .batch_size(batch_size)
4194 .inplace_b(true)
4195 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4196 }
4197 }
4198
4199 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_a_and_b) {
4200 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4201 VAddMicrokernelTester()
4202 .batch_size(batch_size)
4203 .inplace_a(true)
4204 .inplace_b(true)
4205 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4206 }
4207 }
4208
4209 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, a_zero_point) {
4210 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4211 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4212 VAddMicrokernelTester()
4213 .batch_size(batch_size)
4214 .a_zero_point(a_zero_point)
4215 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4216 }
4217 }
4218 }
4219
4220 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, b_zero_point) {
4221 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4222 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4223 VAddMicrokernelTester()
4224 .batch_size(batch_size)
4225 .b_zero_point(b_zero_point)
4226 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4227 }
4228 }
4229 }
4230
4231 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, y_zero_point) {
4232 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4233 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4234 VAddMicrokernelTester()
4235 .batch_size(batch_size)
4236 .y_zero_point(y_zero_point)
4237 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4238 }
4239 }
4240 }
4241
4242 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, a_scale) {
4243 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4244 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4245 VAddMicrokernelTester()
4246 .batch_size(batch_size)
4247 .a_scale(a_scale)
4248 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4249 }
4250 }
4251 }
4252
4253 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, b_scale) {
4254 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4255 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4256 VAddMicrokernelTester()
4257 .batch_size(batch_size)
4258 .b_scale(b_scale)
4259 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4260 }
4261 }
4262 }
4263
4264 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, y_scale) {
4265 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4266 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4267 VAddMicrokernelTester()
4268 .batch_size(batch_size)
4269 .y_scale(y_scale)
4270 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4271 }
4272 }
4273 }
4274
4275 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, qmin) {
4276 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4277 VAddMicrokernelTester()
4278 .batch_size(batch_size)
4279 .qmin(128)
4280 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4281 }
4282 }
4283
4284 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, qmax) {
4285 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4286 VAddMicrokernelTester()
4287 .batch_size(batch_size)
4288 .qmax(128)
4289 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24);
4290 }
4291 }
4292#endif // XNN_ARCH_WASMSIMD
4293
4294
4295#if XNN_ARCH_WASMSIMD
4296 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_eq_32) {
4297 VAddMicrokernelTester()
4298 .batch_size(32)
4299 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4300 }
4301
4302 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_div_32) {
4303 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4304 VAddMicrokernelTester()
4305 .batch_size(batch_size)
4306 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4307 }
4308 }
4309
4310 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_lt_32) {
4311 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4312 VAddMicrokernelTester()
4313 .batch_size(batch_size)
4314 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4315 }
4316 }
4317
4318 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_gt_32) {
4319 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4320 VAddMicrokernelTester()
4321 .batch_size(batch_size)
4322 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4323 }
4324 }
4325
4326 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_a) {
4327 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4328 VAddMicrokernelTester()
4329 .batch_size(batch_size)
4330 .inplace_a(true)
4331 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4332 }
4333 }
4334
4335 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_b) {
4336 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4337 VAddMicrokernelTester()
4338 .batch_size(batch_size)
4339 .inplace_b(true)
4340 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4341 }
4342 }
4343
4344 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_a_and_b) {
4345 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4346 VAddMicrokernelTester()
4347 .batch_size(batch_size)
4348 .inplace_a(true)
4349 .inplace_b(true)
4350 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4351 }
4352 }
4353
4354 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, a_zero_point) {
4355 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4356 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4357 VAddMicrokernelTester()
4358 .batch_size(batch_size)
4359 .a_zero_point(a_zero_point)
4360 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4361 }
4362 }
4363 }
4364
4365 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, b_zero_point) {
4366 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4367 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4368 VAddMicrokernelTester()
4369 .batch_size(batch_size)
4370 .b_zero_point(b_zero_point)
4371 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4372 }
4373 }
4374 }
4375
4376 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, y_zero_point) {
4377 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4378 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4379 VAddMicrokernelTester()
4380 .batch_size(batch_size)
4381 .y_zero_point(y_zero_point)
4382 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4383 }
4384 }
4385 }
4386
4387 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, a_scale) {
4388 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4389 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4390 VAddMicrokernelTester()
4391 .batch_size(batch_size)
4392 .a_scale(a_scale)
4393 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4394 }
4395 }
4396 }
4397
4398 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, b_scale) {
4399 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4400 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4401 VAddMicrokernelTester()
4402 .batch_size(batch_size)
4403 .b_scale(b_scale)
4404 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4405 }
4406 }
4407 }
4408
4409 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, y_scale) {
4410 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4411 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4412 VAddMicrokernelTester()
4413 .batch_size(batch_size)
4414 .y_scale(y_scale)
4415 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4416 }
4417 }
4418 }
4419
4420 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, qmin) {
4421 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4422 VAddMicrokernelTester()
4423 .batch_size(batch_size)
4424 .qmin(128)
4425 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4426 }
4427 }
4428
4429 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, qmax) {
4430 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4431 VAddMicrokernelTester()
4432 .batch_size(batch_size)
4433 .qmax(128)
4434 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32);
4435 }
4436 }
4437#endif // XNN_ARCH_WASMSIMD