blob: 79dce5be9178d479a91ac5619ffff0ecd437725d [file] [log] [blame]
Marat Dukhan0270d9f2020-08-11 00:56:46 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/qs8-vaddc-minmax.yaml
8// Generator: tools/generate-vbinary-test.py
9
10
11#include <gtest/gtest.h>
12
13#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
16#include <xnnpack/vadd.h>
17#include "vaddc-microkernel-tester.h"
18
19
Marat Dukhanba7b2792020-09-02 14:26:45 -070020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
21 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_eq_8) {
22 TEST_REQUIRES_ARM_NEON;
23 VAddCMicrokernelTester()
24 .batch_size(8)
25 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
26 }
27
28 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_div_8) {
29 TEST_REQUIRES_ARM_NEON;
30 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
31 VAddCMicrokernelTester()
32 .batch_size(batch_size)
33 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
34 }
35 }
36
37 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_lt_8) {
38 TEST_REQUIRES_ARM_NEON;
39 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
40 VAddCMicrokernelTester()
41 .batch_size(batch_size)
42 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
43 }
44 }
45
46 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_gt_8) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
49 VAddCMicrokernelTester()
50 .batch_size(batch_size)
51 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
52 }
53 }
54
55 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, inplace) {
56 TEST_REQUIRES_ARM_NEON;
57 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
58 VAddCMicrokernelTester()
59 .batch_size(batch_size)
60 .inplace(true)
61 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
62 }
63 }
64
65 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, a_zero_point) {
66 TEST_REQUIRES_ARM_NEON;
67 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
68 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
69 VAddCMicrokernelTester()
70 .batch_size(batch_size)
71 .a_zero_point(a_zero_point)
72 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
73 }
74 }
75 }
76
77 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, b_zero_point) {
78 TEST_REQUIRES_ARM_NEON;
79 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
80 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
81 VAddCMicrokernelTester()
82 .batch_size(batch_size)
83 .b_zero_point(b_zero_point)
84 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
85 }
86 }
87 }
88
89 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, y_zero_point) {
90 TEST_REQUIRES_ARM_NEON;
91 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
92 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
93 VAddCMicrokernelTester()
94 .batch_size(batch_size)
95 .y_zero_point(y_zero_point)
96 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
97 }
98 }
99 }
100
101 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, a_scale) {
102 TEST_REQUIRES_ARM_NEON;
103 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
104 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
105 VAddCMicrokernelTester()
106 .batch_size(batch_size)
107 .a_scale(a_scale)
108 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
109 }
110 }
111 }
112
113 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, b_scale) {
114 TEST_REQUIRES_ARM_NEON;
115 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
116 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
117 VAddCMicrokernelTester()
118 .batch_size(batch_size)
119 .b_scale(b_scale)
120 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
121 }
122 }
123 }
124
125 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, y_scale) {
126 TEST_REQUIRES_ARM_NEON;
127 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
128 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
129 VAddCMicrokernelTester()
130 .batch_size(batch_size)
131 .y_scale(y_scale)
132 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
133 }
134 }
135 }
136
137 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, qmin) {
138 TEST_REQUIRES_ARM_NEON;
139 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
140 VAddCMicrokernelTester()
141 .batch_size(batch_size)
142 .qmin(128)
143 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
144 }
145 }
146
147 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, qmax) {
148 TEST_REQUIRES_ARM_NEON;
149 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150 VAddCMicrokernelTester()
151 .batch_size(batch_size)
152 .qmax(128)
153 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8);
154 }
155 }
156#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
157
158
159#if XNN_ARCH_ARM || XNN_ARCH_ARM64
160 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_eq_16) {
161 TEST_REQUIRES_ARM_NEON;
162 VAddCMicrokernelTester()
163 .batch_size(16)
164 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
165 }
166
167 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_div_16) {
168 TEST_REQUIRES_ARM_NEON;
169 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
170 VAddCMicrokernelTester()
171 .batch_size(batch_size)
172 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
173 }
174 }
175
176 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_lt_16) {
177 TEST_REQUIRES_ARM_NEON;
178 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
179 VAddCMicrokernelTester()
180 .batch_size(batch_size)
181 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
182 }
183 }
184
185 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_gt_16) {
186 TEST_REQUIRES_ARM_NEON;
187 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
188 VAddCMicrokernelTester()
189 .batch_size(batch_size)
190 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
191 }
192 }
193
194 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, inplace) {
195 TEST_REQUIRES_ARM_NEON;
196 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
197 VAddCMicrokernelTester()
198 .batch_size(batch_size)
199 .inplace(true)
200 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
201 }
202 }
203
204 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, a_zero_point) {
205 TEST_REQUIRES_ARM_NEON;
206 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
207 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
208 VAddCMicrokernelTester()
209 .batch_size(batch_size)
210 .a_zero_point(a_zero_point)
211 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
212 }
213 }
214 }
215
216 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, b_zero_point) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
220 VAddCMicrokernelTester()
221 .batch_size(batch_size)
222 .b_zero_point(b_zero_point)
223 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
224 }
225 }
226 }
227
228 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, y_zero_point) {
229 TEST_REQUIRES_ARM_NEON;
230 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
231 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
232 VAddCMicrokernelTester()
233 .batch_size(batch_size)
234 .y_zero_point(y_zero_point)
235 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
236 }
237 }
238 }
239
240 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, a_scale) {
241 TEST_REQUIRES_ARM_NEON;
242 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
243 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
244 VAddCMicrokernelTester()
245 .batch_size(batch_size)
246 .a_scale(a_scale)
247 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
248 }
249 }
250 }
251
252 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, b_scale) {
253 TEST_REQUIRES_ARM_NEON;
254 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
255 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
256 VAddCMicrokernelTester()
257 .batch_size(batch_size)
258 .b_scale(b_scale)
259 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
260 }
261 }
262 }
263
264 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, y_scale) {
265 TEST_REQUIRES_ARM_NEON;
266 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
267 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
268 VAddCMicrokernelTester()
269 .batch_size(batch_size)
270 .y_scale(y_scale)
271 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
272 }
273 }
274 }
275
276 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, qmin) {
277 TEST_REQUIRES_ARM_NEON;
278 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
279 VAddCMicrokernelTester()
280 .batch_size(batch_size)
281 .qmin(128)
282 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
283 }
284 }
285
286 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, qmax) {
287 TEST_REQUIRES_ARM_NEON;
288 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
289 VAddCMicrokernelTester()
290 .batch_size(batch_size)
291 .qmax(128)
292 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16);
293 }
294 }
295#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
296
297
298#if XNN_ARCH_ARM || XNN_ARCH_ARM64
299 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_eq_24) {
300 TEST_REQUIRES_ARM_NEON;
301 VAddCMicrokernelTester()
302 .batch_size(24)
303 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
304 }
305
306 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_div_24) {
307 TEST_REQUIRES_ARM_NEON;
308 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
309 VAddCMicrokernelTester()
310 .batch_size(batch_size)
311 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
312 }
313 }
314
315 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_lt_24) {
316 TEST_REQUIRES_ARM_NEON;
317 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
318 VAddCMicrokernelTester()
319 .batch_size(batch_size)
320 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
321 }
322 }
323
324 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_gt_24) {
325 TEST_REQUIRES_ARM_NEON;
326 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
327 VAddCMicrokernelTester()
328 .batch_size(batch_size)
329 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
330 }
331 }
332
333 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, inplace) {
334 TEST_REQUIRES_ARM_NEON;
335 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
336 VAddCMicrokernelTester()
337 .batch_size(batch_size)
338 .inplace(true)
339 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
340 }
341 }
342
343 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, a_zero_point) {
344 TEST_REQUIRES_ARM_NEON;
345 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
346 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
347 VAddCMicrokernelTester()
348 .batch_size(batch_size)
349 .a_zero_point(a_zero_point)
350 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
351 }
352 }
353 }
354
355 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, b_zero_point) {
356 TEST_REQUIRES_ARM_NEON;
357 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
358 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
359 VAddCMicrokernelTester()
360 .batch_size(batch_size)
361 .b_zero_point(b_zero_point)
362 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
363 }
364 }
365 }
366
367 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, y_zero_point) {
368 TEST_REQUIRES_ARM_NEON;
369 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
370 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
371 VAddCMicrokernelTester()
372 .batch_size(batch_size)
373 .y_zero_point(y_zero_point)
374 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
375 }
376 }
377 }
378
379 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, a_scale) {
380 TEST_REQUIRES_ARM_NEON;
381 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
382 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
383 VAddCMicrokernelTester()
384 .batch_size(batch_size)
385 .a_scale(a_scale)
386 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
387 }
388 }
389 }
390
391 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, b_scale) {
392 TEST_REQUIRES_ARM_NEON;
393 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
394 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
395 VAddCMicrokernelTester()
396 .batch_size(batch_size)
397 .b_scale(b_scale)
398 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
399 }
400 }
401 }
402
403 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, y_scale) {
404 TEST_REQUIRES_ARM_NEON;
405 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
406 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
407 VAddCMicrokernelTester()
408 .batch_size(batch_size)
409 .y_scale(y_scale)
410 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
411 }
412 }
413 }
414
415 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, qmin) {
416 TEST_REQUIRES_ARM_NEON;
417 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
418 VAddCMicrokernelTester()
419 .batch_size(batch_size)
420 .qmin(128)
421 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
422 }
423 }
424
425 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, qmax) {
426 TEST_REQUIRES_ARM_NEON;
427 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
428 VAddCMicrokernelTester()
429 .batch_size(batch_size)
430 .qmax(128)
431 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24);
432 }
433 }
434#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
435
436
437#if XNN_ARCH_ARM || XNN_ARCH_ARM64
438 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_eq_32) {
439 TEST_REQUIRES_ARM_NEON;
440 VAddCMicrokernelTester()
441 .batch_size(32)
442 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
443 }
444
445 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_div_32) {
446 TEST_REQUIRES_ARM_NEON;
447 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
448 VAddCMicrokernelTester()
449 .batch_size(batch_size)
450 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
451 }
452 }
453
454 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_lt_32) {
455 TEST_REQUIRES_ARM_NEON;
456 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
457 VAddCMicrokernelTester()
458 .batch_size(batch_size)
459 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
460 }
461 }
462
463 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_gt_32) {
464 TEST_REQUIRES_ARM_NEON;
465 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
466 VAddCMicrokernelTester()
467 .batch_size(batch_size)
468 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
469 }
470 }
471
472 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, inplace) {
473 TEST_REQUIRES_ARM_NEON;
474 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
475 VAddCMicrokernelTester()
476 .batch_size(batch_size)
477 .inplace(true)
478 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
479 }
480 }
481
482 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, a_zero_point) {
483 TEST_REQUIRES_ARM_NEON;
484 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
485 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
486 VAddCMicrokernelTester()
487 .batch_size(batch_size)
488 .a_zero_point(a_zero_point)
489 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
490 }
491 }
492 }
493
494 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, b_zero_point) {
495 TEST_REQUIRES_ARM_NEON;
496 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
497 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
498 VAddCMicrokernelTester()
499 .batch_size(batch_size)
500 .b_zero_point(b_zero_point)
501 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
502 }
503 }
504 }
505
506 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, y_zero_point) {
507 TEST_REQUIRES_ARM_NEON;
508 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
509 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
510 VAddCMicrokernelTester()
511 .batch_size(batch_size)
512 .y_zero_point(y_zero_point)
513 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
514 }
515 }
516 }
517
518 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, a_scale) {
519 TEST_REQUIRES_ARM_NEON;
520 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
521 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
522 VAddCMicrokernelTester()
523 .batch_size(batch_size)
524 .a_scale(a_scale)
525 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
526 }
527 }
528 }
529
530 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, b_scale) {
531 TEST_REQUIRES_ARM_NEON;
532 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
533 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
534 VAddCMicrokernelTester()
535 .batch_size(batch_size)
536 .b_scale(b_scale)
537 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
538 }
539 }
540 }
541
542 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, y_scale) {
543 TEST_REQUIRES_ARM_NEON;
544 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
545 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
546 VAddCMicrokernelTester()
547 .batch_size(batch_size)
548 .y_scale(y_scale)
549 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
550 }
551 }
552 }
553
554 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, qmin) {
555 TEST_REQUIRES_ARM_NEON;
556 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
557 VAddCMicrokernelTester()
558 .batch_size(batch_size)
559 .qmin(128)
560 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
561 }
562 }
563
564 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, qmax) {
565 TEST_REQUIRES_ARM_NEON;
566 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
567 VAddCMicrokernelTester()
568 .batch_size(batch_size)
569 .qmax(128)
570 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32);
571 }
572 }
573#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
574
575
Marat Dukhan0270d9f2020-08-11 00:56:46 -0700576#if XNN_ARCH_X86 || XNN_ARCH_X86_64
577 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
578 TEST_REQUIRES_X86_SSE2;
579 VAddCMicrokernelTester()
580 .batch_size(8)
581 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
582 }
583
584 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
585 TEST_REQUIRES_X86_SSE2;
586 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
587 VAddCMicrokernelTester()
588 .batch_size(batch_size)
589 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
590 }
591 }
592
593 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
594 TEST_REQUIRES_X86_SSE2;
595 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
596 VAddCMicrokernelTester()
597 .batch_size(batch_size)
598 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
599 }
600 }
601
602 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
603 TEST_REQUIRES_X86_SSE2;
604 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
605 VAddCMicrokernelTester()
606 .batch_size(batch_size)
607 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
608 }
609 }
610
611 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, inplace) {
612 TEST_REQUIRES_X86_SSE2;
613 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
614 VAddCMicrokernelTester()
615 .batch_size(batch_size)
616 .inplace(true)
617 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
618 }
619 }
620
Marat Dukhan95caee52020-09-02 03:41:32 -0700621 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
622 TEST_REQUIRES_X86_SSE2;
623 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
624 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
625 VAddCMicrokernelTester()
626 .batch_size(batch_size)
627 .a_zero_point(a_zero_point)
628 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
629 }
630 }
631 }
632
633 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
634 TEST_REQUIRES_X86_SSE2;
635 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
636 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
637 VAddCMicrokernelTester()
638 .batch_size(batch_size)
639 .b_zero_point(b_zero_point)
640 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
641 }
642 }
643 }
644
645 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
646 TEST_REQUIRES_X86_SSE2;
647 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
648 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
649 VAddCMicrokernelTester()
650 .batch_size(batch_size)
651 .y_zero_point(y_zero_point)
652 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
653 }
654 }
655 }
656
657 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
658 TEST_REQUIRES_X86_SSE2;
659 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
660 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
661 VAddCMicrokernelTester()
662 .batch_size(batch_size)
663 .a_scale(a_scale)
664 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
665 }
666 }
667 }
668
669 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
670 TEST_REQUIRES_X86_SSE2;
671 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
672 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
673 VAddCMicrokernelTester()
674 .batch_size(batch_size)
675 .b_scale(b_scale)
676 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
677 }
678 }
679 }
680
681 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
682 TEST_REQUIRES_X86_SSE2;
683 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
684 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
685 VAddCMicrokernelTester()
686 .batch_size(batch_size)
687 .y_scale(y_scale)
688 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
689 }
690 }
691 }
692
Marat Dukhan0270d9f2020-08-11 00:56:46 -0700693 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
694 TEST_REQUIRES_X86_SSE2;
695 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
696 VAddCMicrokernelTester()
697 .batch_size(batch_size)
698 .qmin(128)
699 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
700 }
701 }
702
703 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
704 TEST_REQUIRES_X86_SSE2;
705 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
706 VAddCMicrokernelTester()
707 .batch_size(batch_size)
708 .qmax(128)
709 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8);
710 }
711 }
712#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
713
714
715#if XNN_ARCH_X86 || XNN_ARCH_X86_64
716 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
717 TEST_REQUIRES_X86_SSE2;
718 VAddCMicrokernelTester()
719 .batch_size(16)
720 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
721 }
722
723 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
724 TEST_REQUIRES_X86_SSE2;
725 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
726 VAddCMicrokernelTester()
727 .batch_size(batch_size)
728 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
729 }
730 }
731
732 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
733 TEST_REQUIRES_X86_SSE2;
734 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
735 VAddCMicrokernelTester()
736 .batch_size(batch_size)
737 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
738 }
739 }
740
741 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
742 TEST_REQUIRES_X86_SSE2;
743 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
744 VAddCMicrokernelTester()
745 .batch_size(batch_size)
746 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
747 }
748 }
749
750 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, inplace) {
751 TEST_REQUIRES_X86_SSE2;
752 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
753 VAddCMicrokernelTester()
754 .batch_size(batch_size)
755 .inplace(true)
756 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
757 }
758 }
759
Marat Dukhan95caee52020-09-02 03:41:32 -0700760 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
761 TEST_REQUIRES_X86_SSE2;
762 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
763 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
764 VAddCMicrokernelTester()
765 .batch_size(batch_size)
766 .a_zero_point(a_zero_point)
767 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
768 }
769 }
770 }
771
772 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
773 TEST_REQUIRES_X86_SSE2;
774 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
775 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
776 VAddCMicrokernelTester()
777 .batch_size(batch_size)
778 .b_zero_point(b_zero_point)
779 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
780 }
781 }
782 }
783
784 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
785 TEST_REQUIRES_X86_SSE2;
786 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
787 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
788 VAddCMicrokernelTester()
789 .batch_size(batch_size)
790 .y_zero_point(y_zero_point)
791 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
792 }
793 }
794 }
795
796 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
797 TEST_REQUIRES_X86_SSE2;
798 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
799 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
800 VAddCMicrokernelTester()
801 .batch_size(batch_size)
802 .a_scale(a_scale)
803 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
804 }
805 }
806 }
807
808 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
809 TEST_REQUIRES_X86_SSE2;
810 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
811 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
812 VAddCMicrokernelTester()
813 .batch_size(batch_size)
814 .b_scale(b_scale)
815 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
816 }
817 }
818 }
819
820 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
821 TEST_REQUIRES_X86_SSE2;
822 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
823 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
824 VAddCMicrokernelTester()
825 .batch_size(batch_size)
826 .y_scale(y_scale)
827 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
828 }
829 }
830 }
831
Marat Dukhan0270d9f2020-08-11 00:56:46 -0700832 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
833 TEST_REQUIRES_X86_SSE2;
834 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
835 VAddCMicrokernelTester()
836 .batch_size(batch_size)
837 .qmin(128)
838 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
839 }
840 }
841
842 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
843 TEST_REQUIRES_X86_SSE2;
844 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
845 VAddCMicrokernelTester()
846 .batch_size(batch_size)
847 .qmax(128)
848 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16);
849 }
850 }
851#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
852
853
854#if XNN_ARCH_X86 || XNN_ARCH_X86_64
855 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_eq_24) {
856 TEST_REQUIRES_X86_SSE2;
857 VAddCMicrokernelTester()
858 .batch_size(24)
859 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
860 }
861
862 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_div_24) {
863 TEST_REQUIRES_X86_SSE2;
864 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
865 VAddCMicrokernelTester()
866 .batch_size(batch_size)
867 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
868 }
869 }
870
871 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_lt_24) {
872 TEST_REQUIRES_X86_SSE2;
873 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
874 VAddCMicrokernelTester()
875 .batch_size(batch_size)
876 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
877 }
878 }
879
880 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_gt_24) {
881 TEST_REQUIRES_X86_SSE2;
882 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
883 VAddCMicrokernelTester()
884 .batch_size(batch_size)
885 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
886 }
887 }
888
889 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, inplace) {
890 TEST_REQUIRES_X86_SSE2;
891 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
892 VAddCMicrokernelTester()
893 .batch_size(batch_size)
894 .inplace(true)
895 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
896 }
897 }
898
Marat Dukhan95caee52020-09-02 03:41:32 -0700899 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, a_zero_point) {
900 TEST_REQUIRES_X86_SSE2;
901 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
902 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
903 VAddCMicrokernelTester()
904 .batch_size(batch_size)
905 .a_zero_point(a_zero_point)
906 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
907 }
908 }
909 }
910
911 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, b_zero_point) {
912 TEST_REQUIRES_X86_SSE2;
913 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
914 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
915 VAddCMicrokernelTester()
916 .batch_size(batch_size)
917 .b_zero_point(b_zero_point)
918 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
919 }
920 }
921 }
922
923 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, y_zero_point) {
924 TEST_REQUIRES_X86_SSE2;
925 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
926 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
927 VAddCMicrokernelTester()
928 .batch_size(batch_size)
929 .y_zero_point(y_zero_point)
930 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
931 }
932 }
933 }
934
935 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, a_scale) {
936 TEST_REQUIRES_X86_SSE2;
937 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
938 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
939 VAddCMicrokernelTester()
940 .batch_size(batch_size)
941 .a_scale(a_scale)
942 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
943 }
944 }
945 }
946
947 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, b_scale) {
948 TEST_REQUIRES_X86_SSE2;
949 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
950 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
951 VAddCMicrokernelTester()
952 .batch_size(batch_size)
953 .b_scale(b_scale)
954 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
955 }
956 }
957 }
958
959 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, y_scale) {
960 TEST_REQUIRES_X86_SSE2;
961 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
962 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
963 VAddCMicrokernelTester()
964 .batch_size(batch_size)
965 .y_scale(y_scale)
966 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
967 }
968 }
969 }
970
Marat Dukhan0270d9f2020-08-11 00:56:46 -0700971 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, qmin) {
972 TEST_REQUIRES_X86_SSE2;
973 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
974 VAddCMicrokernelTester()
975 .batch_size(batch_size)
976 .qmin(128)
977 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
978 }
979 }
980
981 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, qmax) {
982 TEST_REQUIRES_X86_SSE2;
983 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
984 VAddCMicrokernelTester()
985 .batch_size(batch_size)
986 .qmax(128)
987 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24);
988 }
989 }
990#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
991
992
993#if XNN_ARCH_X86 || XNN_ARCH_X86_64
994 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_eq_32) {
995 TEST_REQUIRES_X86_SSE2;
996 VAddCMicrokernelTester()
997 .batch_size(32)
998 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
999 }
1000
1001 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_div_32) {
1002 TEST_REQUIRES_X86_SSE2;
1003 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1004 VAddCMicrokernelTester()
1005 .batch_size(batch_size)
1006 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1007 }
1008 }
1009
1010 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_lt_32) {
1011 TEST_REQUIRES_X86_SSE2;
1012 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1013 VAddCMicrokernelTester()
1014 .batch_size(batch_size)
1015 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1016 }
1017 }
1018
1019 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_gt_32) {
1020 TEST_REQUIRES_X86_SSE2;
1021 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1022 VAddCMicrokernelTester()
1023 .batch_size(batch_size)
1024 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1025 }
1026 }
1027
1028 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, inplace) {
1029 TEST_REQUIRES_X86_SSE2;
1030 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1031 VAddCMicrokernelTester()
1032 .batch_size(batch_size)
1033 .inplace(true)
1034 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1035 }
1036 }
1037
Marat Dukhan95caee52020-09-02 03:41:32 -07001038 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, a_zero_point) {
1039 TEST_REQUIRES_X86_SSE2;
1040 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1041 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1042 VAddCMicrokernelTester()
1043 .batch_size(batch_size)
1044 .a_zero_point(a_zero_point)
1045 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1046 }
1047 }
1048 }
1049
1050 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, b_zero_point) {
1051 TEST_REQUIRES_X86_SSE2;
1052 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1053 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1054 VAddCMicrokernelTester()
1055 .batch_size(batch_size)
1056 .b_zero_point(b_zero_point)
1057 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1058 }
1059 }
1060 }
1061
1062 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, y_zero_point) {
1063 TEST_REQUIRES_X86_SSE2;
1064 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1065 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1066 VAddCMicrokernelTester()
1067 .batch_size(batch_size)
1068 .y_zero_point(y_zero_point)
1069 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1070 }
1071 }
1072 }
1073
1074 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, a_scale) {
1075 TEST_REQUIRES_X86_SSE2;
1076 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1077 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1078 VAddCMicrokernelTester()
1079 .batch_size(batch_size)
1080 .a_scale(a_scale)
1081 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1082 }
1083 }
1084 }
1085
1086 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, b_scale) {
1087 TEST_REQUIRES_X86_SSE2;
1088 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1089 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1090 VAddCMicrokernelTester()
1091 .batch_size(batch_size)
1092 .b_scale(b_scale)
1093 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1094 }
1095 }
1096 }
1097
1098 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, y_scale) {
1099 TEST_REQUIRES_X86_SSE2;
1100 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1101 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1102 VAddCMicrokernelTester()
1103 .batch_size(batch_size)
1104 .y_scale(y_scale)
1105 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1106 }
1107 }
1108 }
1109
Marat Dukhan0270d9f2020-08-11 00:56:46 -07001110 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, qmin) {
1111 TEST_REQUIRES_X86_SSE2;
1112 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1113 VAddCMicrokernelTester()
1114 .batch_size(batch_size)
1115 .qmin(128)
1116 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1117 }
1118 }
1119
1120 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, qmax) {
1121 TEST_REQUIRES_X86_SSE2;
1122 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1123 VAddCMicrokernelTester()
1124 .batch_size(batch_size)
1125 .qmax(128)
1126 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32);
1127 }
1128 }
1129#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1130
1131
1132#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1133 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
1134 TEST_REQUIRES_X86_SSE41;
1135 VAddCMicrokernelTester()
1136 .batch_size(8)
1137 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1138 }
1139
1140 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
1141 TEST_REQUIRES_X86_SSE41;
1142 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1143 VAddCMicrokernelTester()
1144 .batch_size(batch_size)
1145 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1146 }
1147 }
1148
1149 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
1150 TEST_REQUIRES_X86_SSE41;
1151 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1152 VAddCMicrokernelTester()
1153 .batch_size(batch_size)
1154 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1155 }
1156 }
1157
1158 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1159 TEST_REQUIRES_X86_SSE41;
1160 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1161 VAddCMicrokernelTester()
1162 .batch_size(batch_size)
1163 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1164 }
1165 }
1166
1167 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, inplace) {
1168 TEST_REQUIRES_X86_SSE41;
1169 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1170 VAddCMicrokernelTester()
1171 .batch_size(batch_size)
1172 .inplace(true)
1173 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1174 }
1175 }
1176
Marat Dukhan95caee52020-09-02 03:41:32 -07001177 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1178 TEST_REQUIRES_X86_SSE41;
1179 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1180 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1181 VAddCMicrokernelTester()
1182 .batch_size(batch_size)
1183 .a_zero_point(a_zero_point)
1184 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1185 }
1186 }
1187 }
1188
1189 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1190 TEST_REQUIRES_X86_SSE41;
1191 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1192 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1193 VAddCMicrokernelTester()
1194 .batch_size(batch_size)
1195 .b_zero_point(b_zero_point)
1196 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1197 }
1198 }
1199 }
1200
1201 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1202 TEST_REQUIRES_X86_SSE41;
1203 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1204 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1205 VAddCMicrokernelTester()
1206 .batch_size(batch_size)
1207 .y_zero_point(y_zero_point)
1208 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1209 }
1210 }
1211 }
1212
1213 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1214 TEST_REQUIRES_X86_SSE41;
1215 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1216 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1217 VAddCMicrokernelTester()
1218 .batch_size(batch_size)
1219 .a_scale(a_scale)
1220 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1221 }
1222 }
1223 }
1224
1225 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1226 TEST_REQUIRES_X86_SSE41;
1227 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1228 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1229 VAddCMicrokernelTester()
1230 .batch_size(batch_size)
1231 .b_scale(b_scale)
1232 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1233 }
1234 }
1235 }
1236
1237 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1238 TEST_REQUIRES_X86_SSE41;
1239 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1240 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1241 VAddCMicrokernelTester()
1242 .batch_size(batch_size)
1243 .y_scale(y_scale)
1244 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1245 }
1246 }
1247 }
1248
Marat Dukhan0270d9f2020-08-11 00:56:46 -07001249 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1250 TEST_REQUIRES_X86_SSE41;
1251 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1252 VAddCMicrokernelTester()
1253 .batch_size(batch_size)
1254 .qmin(128)
1255 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1256 }
1257 }
1258
1259 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1260 TEST_REQUIRES_X86_SSE41;
1261 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1262 VAddCMicrokernelTester()
1263 .batch_size(batch_size)
1264 .qmax(128)
1265 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8);
1266 }
1267 }
1268#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1269
1270
1271#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1272 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1273 TEST_REQUIRES_X86_SSE41;
1274 VAddCMicrokernelTester()
1275 .batch_size(16)
1276 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1277 }
1278
1279 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1280 TEST_REQUIRES_X86_SSE41;
1281 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1282 VAddCMicrokernelTester()
1283 .batch_size(batch_size)
1284 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1285 }
1286 }
1287
1288 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1289 TEST_REQUIRES_X86_SSE41;
1290 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1291 VAddCMicrokernelTester()
1292 .batch_size(batch_size)
1293 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1294 }
1295 }
1296
1297 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1298 TEST_REQUIRES_X86_SSE41;
1299 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1300 VAddCMicrokernelTester()
1301 .batch_size(batch_size)
1302 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1303 }
1304 }
1305
1306 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, inplace) {
1307 TEST_REQUIRES_X86_SSE41;
1308 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1309 VAddCMicrokernelTester()
1310 .batch_size(batch_size)
1311 .inplace(true)
1312 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1313 }
1314 }
1315
Marat Dukhan95caee52020-09-02 03:41:32 -07001316 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1317 TEST_REQUIRES_X86_SSE41;
1318 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1319 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1320 VAddCMicrokernelTester()
1321 .batch_size(batch_size)
1322 .a_zero_point(a_zero_point)
1323 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1324 }
1325 }
1326 }
1327
1328 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1329 TEST_REQUIRES_X86_SSE41;
1330 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1331 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1332 VAddCMicrokernelTester()
1333 .batch_size(batch_size)
1334 .b_zero_point(b_zero_point)
1335 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1336 }
1337 }
1338 }
1339
1340 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1341 TEST_REQUIRES_X86_SSE41;
1342 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1343 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1344 VAddCMicrokernelTester()
1345 .batch_size(batch_size)
1346 .y_zero_point(y_zero_point)
1347 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1348 }
1349 }
1350 }
1351
1352 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1353 TEST_REQUIRES_X86_SSE41;
1354 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1355 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1356 VAddCMicrokernelTester()
1357 .batch_size(batch_size)
1358 .a_scale(a_scale)
1359 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1360 }
1361 }
1362 }
1363
1364 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1365 TEST_REQUIRES_X86_SSE41;
1366 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1367 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1368 VAddCMicrokernelTester()
1369 .batch_size(batch_size)
1370 .b_scale(b_scale)
1371 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1372 }
1373 }
1374 }
1375
1376 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1377 TEST_REQUIRES_X86_SSE41;
1378 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1379 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1380 VAddCMicrokernelTester()
1381 .batch_size(batch_size)
1382 .y_scale(y_scale)
1383 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1384 }
1385 }
1386 }
1387
Marat Dukhan0270d9f2020-08-11 00:56:46 -07001388 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1389 TEST_REQUIRES_X86_SSE41;
1390 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1391 VAddCMicrokernelTester()
1392 .batch_size(batch_size)
1393 .qmin(128)
1394 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1395 }
1396 }
1397
1398 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1399 TEST_REQUIRES_X86_SSE41;
1400 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1401 VAddCMicrokernelTester()
1402 .batch_size(batch_size)
1403 .qmax(128)
1404 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16);
1405 }
1406 }
1407#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1408
1409
1410#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1411 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_eq_24) {
1412 TEST_REQUIRES_X86_SSE41;
1413 VAddCMicrokernelTester()
1414 .batch_size(24)
1415 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1416 }
1417
1418 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_div_24) {
1419 TEST_REQUIRES_X86_SSE41;
1420 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1421 VAddCMicrokernelTester()
1422 .batch_size(batch_size)
1423 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1424 }
1425 }
1426
1427 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_lt_24) {
1428 TEST_REQUIRES_X86_SSE41;
1429 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1430 VAddCMicrokernelTester()
1431 .batch_size(batch_size)
1432 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1433 }
1434 }
1435
1436 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_gt_24) {
1437 TEST_REQUIRES_X86_SSE41;
1438 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1439 VAddCMicrokernelTester()
1440 .batch_size(batch_size)
1441 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1442 }
1443 }
1444
1445 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, inplace) {
1446 TEST_REQUIRES_X86_SSE41;
1447 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1448 VAddCMicrokernelTester()
1449 .batch_size(batch_size)
1450 .inplace(true)
1451 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1452 }
1453 }
1454
Marat Dukhan95caee52020-09-02 03:41:32 -07001455 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, a_zero_point) {
1456 TEST_REQUIRES_X86_SSE41;
1457 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1458 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1459 VAddCMicrokernelTester()
1460 .batch_size(batch_size)
1461 .a_zero_point(a_zero_point)
1462 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1463 }
1464 }
1465 }
1466
1467 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, b_zero_point) {
1468 TEST_REQUIRES_X86_SSE41;
1469 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1470 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1471 VAddCMicrokernelTester()
1472 .batch_size(batch_size)
1473 .b_zero_point(b_zero_point)
1474 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1475 }
1476 }
1477 }
1478
1479 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, y_zero_point) {
1480 TEST_REQUIRES_X86_SSE41;
1481 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1482 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1483 VAddCMicrokernelTester()
1484 .batch_size(batch_size)
1485 .y_zero_point(y_zero_point)
1486 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1487 }
1488 }
1489 }
1490
1491 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, a_scale) {
1492 TEST_REQUIRES_X86_SSE41;
1493 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1494 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1495 VAddCMicrokernelTester()
1496 .batch_size(batch_size)
1497 .a_scale(a_scale)
1498 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1499 }
1500 }
1501 }
1502
1503 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, b_scale) {
1504 TEST_REQUIRES_X86_SSE41;
1505 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1506 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1507 VAddCMicrokernelTester()
1508 .batch_size(batch_size)
1509 .b_scale(b_scale)
1510 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1511 }
1512 }
1513 }
1514
1515 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, y_scale) {
1516 TEST_REQUIRES_X86_SSE41;
1517 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1518 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1519 VAddCMicrokernelTester()
1520 .batch_size(batch_size)
1521 .y_scale(y_scale)
1522 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1523 }
1524 }
1525 }
1526
Marat Dukhan0270d9f2020-08-11 00:56:46 -07001527 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, qmin) {
1528 TEST_REQUIRES_X86_SSE41;
1529 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1530 VAddCMicrokernelTester()
1531 .batch_size(batch_size)
1532 .qmin(128)
1533 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1534 }
1535 }
1536
1537 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, qmax) {
1538 TEST_REQUIRES_X86_SSE41;
1539 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1540 VAddCMicrokernelTester()
1541 .batch_size(batch_size)
1542 .qmax(128)
1543 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24);
1544 }
1545 }
1546#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1547
1548
1549#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1550 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_eq_32) {
1551 TEST_REQUIRES_X86_SSE41;
1552 VAddCMicrokernelTester()
1553 .batch_size(32)
1554 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1555 }
1556
1557 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_div_32) {
1558 TEST_REQUIRES_X86_SSE41;
1559 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1560 VAddCMicrokernelTester()
1561 .batch_size(batch_size)
1562 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1563 }
1564 }
1565
1566 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_lt_32) {
1567 TEST_REQUIRES_X86_SSE41;
1568 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1569 VAddCMicrokernelTester()
1570 .batch_size(batch_size)
1571 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1572 }
1573 }
1574
1575 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_gt_32) {
1576 TEST_REQUIRES_X86_SSE41;
1577 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1578 VAddCMicrokernelTester()
1579 .batch_size(batch_size)
1580 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1581 }
1582 }
1583
1584 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, inplace) {
1585 TEST_REQUIRES_X86_SSE41;
1586 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1587 VAddCMicrokernelTester()
1588 .batch_size(batch_size)
1589 .inplace(true)
1590 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1591 }
1592 }
1593
Marat Dukhan95caee52020-09-02 03:41:32 -07001594 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, a_zero_point) {
1595 TEST_REQUIRES_X86_SSE41;
1596 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1597 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1598 VAddCMicrokernelTester()
1599 .batch_size(batch_size)
1600 .a_zero_point(a_zero_point)
1601 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1602 }
1603 }
1604 }
1605
1606 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, b_zero_point) {
1607 TEST_REQUIRES_X86_SSE41;
1608 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1609 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1610 VAddCMicrokernelTester()
1611 .batch_size(batch_size)
1612 .b_zero_point(b_zero_point)
1613 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1614 }
1615 }
1616 }
1617
1618 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, y_zero_point) {
1619 TEST_REQUIRES_X86_SSE41;
1620 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1621 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1622 VAddCMicrokernelTester()
1623 .batch_size(batch_size)
1624 .y_zero_point(y_zero_point)
1625 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1626 }
1627 }
1628 }
1629
1630 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, a_scale) {
1631 TEST_REQUIRES_X86_SSE41;
1632 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1633 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1634 VAddCMicrokernelTester()
1635 .batch_size(batch_size)
1636 .a_scale(a_scale)
1637 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1638 }
1639 }
1640 }
1641
1642 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, b_scale) {
1643 TEST_REQUIRES_X86_SSE41;
1644 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1645 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1646 VAddCMicrokernelTester()
1647 .batch_size(batch_size)
1648 .b_scale(b_scale)
1649 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1650 }
1651 }
1652 }
1653
1654 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, y_scale) {
1655 TEST_REQUIRES_X86_SSE41;
1656 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1657 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1658 VAddCMicrokernelTester()
1659 .batch_size(batch_size)
1660 .y_scale(y_scale)
1661 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1662 }
1663 }
1664 }
1665
Marat Dukhan0270d9f2020-08-11 00:56:46 -07001666 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, qmin) {
1667 TEST_REQUIRES_X86_SSE41;
1668 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1669 VAddCMicrokernelTester()
1670 .batch_size(batch_size)
1671 .qmin(128)
1672 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1673 }
1674 }
1675
1676 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, qmax) {
1677 TEST_REQUIRES_X86_SSE41;
1678 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1679 VAddCMicrokernelTester()
1680 .batch_size(batch_size)
1681 .qmax(128)
1682 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32);
1683 }
1684 }
1685#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan5df27f82020-09-02 23:59:21 -07001686
1687
Marat Dukhanbb9225e2020-09-06 22:40:56 -07001688#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1689 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
1690 TEST_REQUIRES_X86_SSE41;
1691 VAddCMicrokernelTester()
1692 .batch_size(8)
1693 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1694 }
1695
1696 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
1697 TEST_REQUIRES_X86_SSE41;
1698 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1699 VAddCMicrokernelTester()
1700 .batch_size(batch_size)
1701 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1702 }
1703 }
1704
1705 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
1706 TEST_REQUIRES_X86_SSE41;
1707 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1708 VAddCMicrokernelTester()
1709 .batch_size(batch_size)
1710 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1711 }
1712 }
1713
1714 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
1715 TEST_REQUIRES_X86_SSE41;
1716 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1717 VAddCMicrokernelTester()
1718 .batch_size(batch_size)
1719 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1720 }
1721 }
1722
1723 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, inplace) {
1724 TEST_REQUIRES_X86_SSE41;
1725 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726 VAddCMicrokernelTester()
1727 .batch_size(batch_size)
1728 .inplace(true)
1729 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1730 }
1731 }
1732
1733 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
1734 TEST_REQUIRES_X86_SSE41;
1735 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1736 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1737 VAddCMicrokernelTester()
1738 .batch_size(batch_size)
1739 .a_zero_point(a_zero_point)
1740 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1741 }
1742 }
1743 }
1744
1745 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
1746 TEST_REQUIRES_X86_SSE41;
1747 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1748 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1749 VAddCMicrokernelTester()
1750 .batch_size(batch_size)
1751 .b_zero_point(b_zero_point)
1752 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1753 }
1754 }
1755 }
1756
1757 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
1758 TEST_REQUIRES_X86_SSE41;
1759 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1760 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1761 VAddCMicrokernelTester()
1762 .batch_size(batch_size)
1763 .y_zero_point(y_zero_point)
1764 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1765 }
1766 }
1767 }
1768
1769 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
1770 TEST_REQUIRES_X86_SSE41;
1771 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1773 VAddCMicrokernelTester()
1774 .batch_size(batch_size)
1775 .a_scale(a_scale)
1776 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1777 }
1778 }
1779 }
1780
1781 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
1782 TEST_REQUIRES_X86_SSE41;
1783 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1784 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1785 VAddCMicrokernelTester()
1786 .batch_size(batch_size)
1787 .b_scale(b_scale)
1788 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1789 }
1790 }
1791 }
1792
1793 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
1794 TEST_REQUIRES_X86_SSE41;
1795 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1796 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1797 VAddCMicrokernelTester()
1798 .batch_size(batch_size)
1799 .y_scale(y_scale)
1800 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1801 }
1802 }
1803 }
1804
1805 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
1806 TEST_REQUIRES_X86_SSE41;
1807 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1808 VAddCMicrokernelTester()
1809 .batch_size(batch_size)
1810 .qmin(128)
1811 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1812 }
1813 }
1814
1815 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
1816 TEST_REQUIRES_X86_SSE41;
1817 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1818 VAddCMicrokernelTester()
1819 .batch_size(batch_size)
1820 .qmax(128)
1821 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8);
1822 }
1823 }
1824#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1825
1826
1827#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1828 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
1829 TEST_REQUIRES_X86_SSE41;
1830 VAddCMicrokernelTester()
1831 .batch_size(16)
1832 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1833 }
1834
1835 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
1836 TEST_REQUIRES_X86_SSE41;
1837 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1838 VAddCMicrokernelTester()
1839 .batch_size(batch_size)
1840 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1841 }
1842 }
1843
1844 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
1845 TEST_REQUIRES_X86_SSE41;
1846 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1847 VAddCMicrokernelTester()
1848 .batch_size(batch_size)
1849 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1850 }
1851 }
1852
1853 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
1854 TEST_REQUIRES_X86_SSE41;
1855 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1856 VAddCMicrokernelTester()
1857 .batch_size(batch_size)
1858 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1859 }
1860 }
1861
1862 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, inplace) {
1863 TEST_REQUIRES_X86_SSE41;
1864 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1865 VAddCMicrokernelTester()
1866 .batch_size(batch_size)
1867 .inplace(true)
1868 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1869 }
1870 }
1871
1872 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
1873 TEST_REQUIRES_X86_SSE41;
1874 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1875 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1876 VAddCMicrokernelTester()
1877 .batch_size(batch_size)
1878 .a_zero_point(a_zero_point)
1879 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1880 }
1881 }
1882 }
1883
1884 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
1885 TEST_REQUIRES_X86_SSE41;
1886 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1887 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1888 VAddCMicrokernelTester()
1889 .batch_size(batch_size)
1890 .b_zero_point(b_zero_point)
1891 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1892 }
1893 }
1894 }
1895
1896 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
1897 TEST_REQUIRES_X86_SSE41;
1898 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1899 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1900 VAddCMicrokernelTester()
1901 .batch_size(batch_size)
1902 .y_zero_point(y_zero_point)
1903 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1904 }
1905 }
1906 }
1907
1908 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
1909 TEST_REQUIRES_X86_SSE41;
1910 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1911 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1912 VAddCMicrokernelTester()
1913 .batch_size(batch_size)
1914 .a_scale(a_scale)
1915 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1916 }
1917 }
1918 }
1919
1920 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
1921 TEST_REQUIRES_X86_SSE41;
1922 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1923 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1924 VAddCMicrokernelTester()
1925 .batch_size(batch_size)
1926 .b_scale(b_scale)
1927 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1928 }
1929 }
1930 }
1931
1932 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
1933 TEST_REQUIRES_X86_SSE41;
1934 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1935 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1936 VAddCMicrokernelTester()
1937 .batch_size(batch_size)
1938 .y_scale(y_scale)
1939 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1940 }
1941 }
1942 }
1943
1944 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
1945 TEST_REQUIRES_X86_SSE41;
1946 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1947 VAddCMicrokernelTester()
1948 .batch_size(batch_size)
1949 .qmin(128)
1950 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1951 }
1952 }
1953
1954 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
1955 TEST_REQUIRES_X86_SSE41;
1956 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1957 VAddCMicrokernelTester()
1958 .batch_size(batch_size)
1959 .qmax(128)
1960 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16);
1961 }
1962 }
1963#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1964
1965
1966#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1967 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_eq_24) {
1968 TEST_REQUIRES_X86_SSE41;
1969 VAddCMicrokernelTester()
1970 .batch_size(24)
1971 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
1972 }
1973
1974 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_div_24) {
1975 TEST_REQUIRES_X86_SSE41;
1976 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1977 VAddCMicrokernelTester()
1978 .batch_size(batch_size)
1979 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
1980 }
1981 }
1982
1983 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_lt_24) {
1984 TEST_REQUIRES_X86_SSE41;
1985 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1986 VAddCMicrokernelTester()
1987 .batch_size(batch_size)
1988 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
1989 }
1990 }
1991
1992 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_gt_24) {
1993 TEST_REQUIRES_X86_SSE41;
1994 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1995 VAddCMicrokernelTester()
1996 .batch_size(batch_size)
1997 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
1998 }
1999 }
2000
2001 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, inplace) {
2002 TEST_REQUIRES_X86_SSE41;
2003 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2004 VAddCMicrokernelTester()
2005 .batch_size(batch_size)
2006 .inplace(true)
2007 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
2008 }
2009 }
2010
2011 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, a_zero_point) {
2012 TEST_REQUIRES_X86_SSE41;
2013 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2014 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2015 VAddCMicrokernelTester()
2016 .batch_size(batch_size)
2017 .a_zero_point(a_zero_point)
2018 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
2019 }
2020 }
2021 }
2022
2023 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, b_zero_point) {
2024 TEST_REQUIRES_X86_SSE41;
2025 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2026 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2027 VAddCMicrokernelTester()
2028 .batch_size(batch_size)
2029 .b_zero_point(b_zero_point)
2030 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
2031 }
2032 }
2033 }
2034
2035 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, y_zero_point) {
2036 TEST_REQUIRES_X86_SSE41;
2037 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2038 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2039 VAddCMicrokernelTester()
2040 .batch_size(batch_size)
2041 .y_zero_point(y_zero_point)
2042 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
2043 }
2044 }
2045 }
2046
2047 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, a_scale) {
2048 TEST_REQUIRES_X86_SSE41;
2049 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2050 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2051 VAddCMicrokernelTester()
2052 .batch_size(batch_size)
2053 .a_scale(a_scale)
2054 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
2055 }
2056 }
2057 }
2058
2059 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, b_scale) {
2060 TEST_REQUIRES_X86_SSE41;
2061 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2062 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2063 VAddCMicrokernelTester()
2064 .batch_size(batch_size)
2065 .b_scale(b_scale)
2066 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
2067 }
2068 }
2069 }
2070
2071 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, y_scale) {
2072 TEST_REQUIRES_X86_SSE41;
2073 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2074 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2075 VAddCMicrokernelTester()
2076 .batch_size(batch_size)
2077 .y_scale(y_scale)
2078 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
2079 }
2080 }
2081 }
2082
2083 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, qmin) {
2084 TEST_REQUIRES_X86_SSE41;
2085 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2086 VAddCMicrokernelTester()
2087 .batch_size(batch_size)
2088 .qmin(128)
2089 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
2090 }
2091 }
2092
2093 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, qmax) {
2094 TEST_REQUIRES_X86_SSE41;
2095 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2096 VAddCMicrokernelTester()
2097 .batch_size(batch_size)
2098 .qmax(128)
2099 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24);
2100 }
2101 }
2102#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2103
2104
2105#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2106 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_eq_32) {
2107 TEST_REQUIRES_X86_SSE41;
2108 VAddCMicrokernelTester()
2109 .batch_size(32)
2110 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2111 }
2112
2113 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_div_32) {
2114 TEST_REQUIRES_X86_SSE41;
2115 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2116 VAddCMicrokernelTester()
2117 .batch_size(batch_size)
2118 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2119 }
2120 }
2121
2122 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_lt_32) {
2123 TEST_REQUIRES_X86_SSE41;
2124 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2125 VAddCMicrokernelTester()
2126 .batch_size(batch_size)
2127 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2128 }
2129 }
2130
2131 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_gt_32) {
2132 TEST_REQUIRES_X86_SSE41;
2133 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2134 VAddCMicrokernelTester()
2135 .batch_size(batch_size)
2136 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2137 }
2138 }
2139
2140 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, inplace) {
2141 TEST_REQUIRES_X86_SSE41;
2142 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2143 VAddCMicrokernelTester()
2144 .batch_size(batch_size)
2145 .inplace(true)
2146 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2147 }
2148 }
2149
2150 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, a_zero_point) {
2151 TEST_REQUIRES_X86_SSE41;
2152 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2153 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2154 VAddCMicrokernelTester()
2155 .batch_size(batch_size)
2156 .a_zero_point(a_zero_point)
2157 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2158 }
2159 }
2160 }
2161
2162 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, b_zero_point) {
2163 TEST_REQUIRES_X86_SSE41;
2164 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2165 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2166 VAddCMicrokernelTester()
2167 .batch_size(batch_size)
2168 .b_zero_point(b_zero_point)
2169 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2170 }
2171 }
2172 }
2173
2174 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, y_zero_point) {
2175 TEST_REQUIRES_X86_SSE41;
2176 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2177 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2178 VAddCMicrokernelTester()
2179 .batch_size(batch_size)
2180 .y_zero_point(y_zero_point)
2181 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2182 }
2183 }
2184 }
2185
2186 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, a_scale) {
2187 TEST_REQUIRES_X86_SSE41;
2188 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2189 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2190 VAddCMicrokernelTester()
2191 .batch_size(batch_size)
2192 .a_scale(a_scale)
2193 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2194 }
2195 }
2196 }
2197
2198 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, b_scale) {
2199 TEST_REQUIRES_X86_SSE41;
2200 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2201 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2202 VAddCMicrokernelTester()
2203 .batch_size(batch_size)
2204 .b_scale(b_scale)
2205 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2206 }
2207 }
2208 }
2209
2210 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, y_scale) {
2211 TEST_REQUIRES_X86_SSE41;
2212 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2213 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2214 VAddCMicrokernelTester()
2215 .batch_size(batch_size)
2216 .y_scale(y_scale)
2217 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2218 }
2219 }
2220 }
2221
2222 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, qmin) {
2223 TEST_REQUIRES_X86_SSE41;
2224 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2225 VAddCMicrokernelTester()
2226 .batch_size(batch_size)
2227 .qmin(128)
2228 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2229 }
2230 }
2231
2232 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, qmax) {
2233 TEST_REQUIRES_X86_SSE41;
2234 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2235 VAddCMicrokernelTester()
2236 .batch_size(batch_size)
2237 .qmax(128)
2238 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32);
2239 }
2240 }
2241#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2242
2243
2244#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2245 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
2246 TEST_REQUIRES_X86_XOP;
2247 VAddCMicrokernelTester()
2248 .batch_size(8)
2249 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2250 }
2251
2252 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
2253 TEST_REQUIRES_X86_XOP;
2254 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2255 VAddCMicrokernelTester()
2256 .batch_size(batch_size)
2257 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2258 }
2259 }
2260
2261 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
2262 TEST_REQUIRES_X86_XOP;
2263 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2264 VAddCMicrokernelTester()
2265 .batch_size(batch_size)
2266 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2267 }
2268 }
2269
2270 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
2271 TEST_REQUIRES_X86_XOP;
2272 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2273 VAddCMicrokernelTester()
2274 .batch_size(batch_size)
2275 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2276 }
2277 }
2278
2279 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, inplace) {
2280 TEST_REQUIRES_X86_XOP;
2281 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2282 VAddCMicrokernelTester()
2283 .batch_size(batch_size)
2284 .inplace(true)
2285 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2286 }
2287 }
2288
2289 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
2290 TEST_REQUIRES_X86_XOP;
2291 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2292 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2293 VAddCMicrokernelTester()
2294 .batch_size(batch_size)
2295 .a_zero_point(a_zero_point)
2296 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2297 }
2298 }
2299 }
2300
2301 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
2302 TEST_REQUIRES_X86_XOP;
2303 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2304 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2305 VAddCMicrokernelTester()
2306 .batch_size(batch_size)
2307 .b_zero_point(b_zero_point)
2308 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2309 }
2310 }
2311 }
2312
2313 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
2314 TEST_REQUIRES_X86_XOP;
2315 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2316 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2317 VAddCMicrokernelTester()
2318 .batch_size(batch_size)
2319 .y_zero_point(y_zero_point)
2320 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2321 }
2322 }
2323 }
2324
2325 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
2326 TEST_REQUIRES_X86_XOP;
2327 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2328 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2329 VAddCMicrokernelTester()
2330 .batch_size(batch_size)
2331 .a_scale(a_scale)
2332 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2333 }
2334 }
2335 }
2336
2337 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
2338 TEST_REQUIRES_X86_XOP;
2339 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2340 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2341 VAddCMicrokernelTester()
2342 .batch_size(batch_size)
2343 .b_scale(b_scale)
2344 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2345 }
2346 }
2347 }
2348
2349 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
2350 TEST_REQUIRES_X86_XOP;
2351 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2352 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2353 VAddCMicrokernelTester()
2354 .batch_size(batch_size)
2355 .y_scale(y_scale)
2356 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2357 }
2358 }
2359 }
2360
2361 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, qmin) {
2362 TEST_REQUIRES_X86_XOP;
2363 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2364 VAddCMicrokernelTester()
2365 .batch_size(batch_size)
2366 .qmin(128)
2367 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2368 }
2369 }
2370
2371 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, qmax) {
2372 TEST_REQUIRES_X86_XOP;
2373 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2374 VAddCMicrokernelTester()
2375 .batch_size(batch_size)
2376 .qmax(128)
2377 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8);
2378 }
2379 }
2380#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2381
2382
2383#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2384 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
2385 TEST_REQUIRES_X86_XOP;
2386 VAddCMicrokernelTester()
2387 .batch_size(16)
2388 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2389 }
2390
2391 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
2392 TEST_REQUIRES_X86_XOP;
2393 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2394 VAddCMicrokernelTester()
2395 .batch_size(batch_size)
2396 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2397 }
2398 }
2399
2400 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
2401 TEST_REQUIRES_X86_XOP;
2402 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2403 VAddCMicrokernelTester()
2404 .batch_size(batch_size)
2405 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2406 }
2407 }
2408
2409 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
2410 TEST_REQUIRES_X86_XOP;
2411 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2412 VAddCMicrokernelTester()
2413 .batch_size(batch_size)
2414 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2415 }
2416 }
2417
2418 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, inplace) {
2419 TEST_REQUIRES_X86_XOP;
2420 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2421 VAddCMicrokernelTester()
2422 .batch_size(batch_size)
2423 .inplace(true)
2424 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2425 }
2426 }
2427
2428 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
2429 TEST_REQUIRES_X86_XOP;
2430 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2431 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2432 VAddCMicrokernelTester()
2433 .batch_size(batch_size)
2434 .a_zero_point(a_zero_point)
2435 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2436 }
2437 }
2438 }
2439
2440 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
2441 TEST_REQUIRES_X86_XOP;
2442 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2443 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2444 VAddCMicrokernelTester()
2445 .batch_size(batch_size)
2446 .b_zero_point(b_zero_point)
2447 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2448 }
2449 }
2450 }
2451
2452 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
2453 TEST_REQUIRES_X86_XOP;
2454 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2455 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2456 VAddCMicrokernelTester()
2457 .batch_size(batch_size)
2458 .y_zero_point(y_zero_point)
2459 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2460 }
2461 }
2462 }
2463
2464 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
2465 TEST_REQUIRES_X86_XOP;
2466 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2467 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2468 VAddCMicrokernelTester()
2469 .batch_size(batch_size)
2470 .a_scale(a_scale)
2471 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2472 }
2473 }
2474 }
2475
2476 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
2477 TEST_REQUIRES_X86_XOP;
2478 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2479 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2480 VAddCMicrokernelTester()
2481 .batch_size(batch_size)
2482 .b_scale(b_scale)
2483 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2484 }
2485 }
2486 }
2487
2488 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
2489 TEST_REQUIRES_X86_XOP;
2490 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2491 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2492 VAddCMicrokernelTester()
2493 .batch_size(batch_size)
2494 .y_scale(y_scale)
2495 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2496 }
2497 }
2498 }
2499
2500 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, qmin) {
2501 TEST_REQUIRES_X86_XOP;
2502 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2503 VAddCMicrokernelTester()
2504 .batch_size(batch_size)
2505 .qmin(128)
2506 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2507 }
2508 }
2509
2510 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, qmax) {
2511 TEST_REQUIRES_X86_XOP;
2512 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2513 VAddCMicrokernelTester()
2514 .batch_size(batch_size)
2515 .qmax(128)
2516 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16);
2517 }
2518 }
2519#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2520
2521
2522#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2523 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_eq_24) {
2524 TEST_REQUIRES_X86_XOP;
2525 VAddCMicrokernelTester()
2526 .batch_size(24)
2527 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2528 }
2529
2530 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_div_24) {
2531 TEST_REQUIRES_X86_XOP;
2532 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2533 VAddCMicrokernelTester()
2534 .batch_size(batch_size)
2535 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2536 }
2537 }
2538
2539 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_lt_24) {
2540 TEST_REQUIRES_X86_XOP;
2541 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2542 VAddCMicrokernelTester()
2543 .batch_size(batch_size)
2544 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2545 }
2546 }
2547
2548 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_gt_24) {
2549 TEST_REQUIRES_X86_XOP;
2550 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2551 VAddCMicrokernelTester()
2552 .batch_size(batch_size)
2553 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2554 }
2555 }
2556
2557 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, inplace) {
2558 TEST_REQUIRES_X86_XOP;
2559 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2560 VAddCMicrokernelTester()
2561 .batch_size(batch_size)
2562 .inplace(true)
2563 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2564 }
2565 }
2566
2567 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, a_zero_point) {
2568 TEST_REQUIRES_X86_XOP;
2569 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2570 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2571 VAddCMicrokernelTester()
2572 .batch_size(batch_size)
2573 .a_zero_point(a_zero_point)
2574 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2575 }
2576 }
2577 }
2578
2579 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, b_zero_point) {
2580 TEST_REQUIRES_X86_XOP;
2581 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2582 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2583 VAddCMicrokernelTester()
2584 .batch_size(batch_size)
2585 .b_zero_point(b_zero_point)
2586 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2587 }
2588 }
2589 }
2590
2591 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, y_zero_point) {
2592 TEST_REQUIRES_X86_XOP;
2593 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2595 VAddCMicrokernelTester()
2596 .batch_size(batch_size)
2597 .y_zero_point(y_zero_point)
2598 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2599 }
2600 }
2601 }
2602
2603 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, a_scale) {
2604 TEST_REQUIRES_X86_XOP;
2605 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2606 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2607 VAddCMicrokernelTester()
2608 .batch_size(batch_size)
2609 .a_scale(a_scale)
2610 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2611 }
2612 }
2613 }
2614
2615 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, b_scale) {
2616 TEST_REQUIRES_X86_XOP;
2617 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2618 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2619 VAddCMicrokernelTester()
2620 .batch_size(batch_size)
2621 .b_scale(b_scale)
2622 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2623 }
2624 }
2625 }
2626
2627 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, y_scale) {
2628 TEST_REQUIRES_X86_XOP;
2629 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2630 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2631 VAddCMicrokernelTester()
2632 .batch_size(batch_size)
2633 .y_scale(y_scale)
2634 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2635 }
2636 }
2637 }
2638
2639 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, qmin) {
2640 TEST_REQUIRES_X86_XOP;
2641 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2642 VAddCMicrokernelTester()
2643 .batch_size(batch_size)
2644 .qmin(128)
2645 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2646 }
2647 }
2648
2649 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, qmax) {
2650 TEST_REQUIRES_X86_XOP;
2651 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2652 VAddCMicrokernelTester()
2653 .batch_size(batch_size)
2654 .qmax(128)
2655 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24);
2656 }
2657 }
2658#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2659
2660
2661#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2662 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_eq_32) {
2663 TEST_REQUIRES_X86_XOP;
2664 VAddCMicrokernelTester()
2665 .batch_size(32)
2666 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2667 }
2668
2669 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_div_32) {
2670 TEST_REQUIRES_X86_XOP;
2671 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2672 VAddCMicrokernelTester()
2673 .batch_size(batch_size)
2674 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2675 }
2676 }
2677
2678 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_lt_32) {
2679 TEST_REQUIRES_X86_XOP;
2680 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2681 VAddCMicrokernelTester()
2682 .batch_size(batch_size)
2683 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2684 }
2685 }
2686
2687 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_gt_32) {
2688 TEST_REQUIRES_X86_XOP;
2689 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2690 VAddCMicrokernelTester()
2691 .batch_size(batch_size)
2692 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2693 }
2694 }
2695
2696 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, inplace) {
2697 TEST_REQUIRES_X86_XOP;
2698 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2699 VAddCMicrokernelTester()
2700 .batch_size(batch_size)
2701 .inplace(true)
2702 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2703 }
2704 }
2705
2706 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, a_zero_point) {
2707 TEST_REQUIRES_X86_XOP;
2708 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2709 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2710 VAddCMicrokernelTester()
2711 .batch_size(batch_size)
2712 .a_zero_point(a_zero_point)
2713 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2714 }
2715 }
2716 }
2717
2718 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, b_zero_point) {
2719 TEST_REQUIRES_X86_XOP;
2720 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2721 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2722 VAddCMicrokernelTester()
2723 .batch_size(batch_size)
2724 .b_zero_point(b_zero_point)
2725 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2726 }
2727 }
2728 }
2729
2730 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, y_zero_point) {
2731 TEST_REQUIRES_X86_XOP;
2732 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2733 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2734 VAddCMicrokernelTester()
2735 .batch_size(batch_size)
2736 .y_zero_point(y_zero_point)
2737 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2738 }
2739 }
2740 }
2741
2742 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, a_scale) {
2743 TEST_REQUIRES_X86_XOP;
2744 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2745 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2746 VAddCMicrokernelTester()
2747 .batch_size(batch_size)
2748 .a_scale(a_scale)
2749 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2750 }
2751 }
2752 }
2753
2754 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, b_scale) {
2755 TEST_REQUIRES_X86_XOP;
2756 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2757 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2758 VAddCMicrokernelTester()
2759 .batch_size(batch_size)
2760 .b_scale(b_scale)
2761 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2762 }
2763 }
2764 }
2765
2766 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, y_scale) {
2767 TEST_REQUIRES_X86_XOP;
2768 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2769 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2770 VAddCMicrokernelTester()
2771 .batch_size(batch_size)
2772 .y_scale(y_scale)
2773 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2774 }
2775 }
2776 }
2777
2778 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, qmin) {
2779 TEST_REQUIRES_X86_XOP;
2780 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2781 VAddCMicrokernelTester()
2782 .batch_size(batch_size)
2783 .qmin(128)
2784 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2785 }
2786 }
2787
2788 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, qmax) {
2789 TEST_REQUIRES_X86_XOP;
2790 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2791 VAddCMicrokernelTester()
2792 .batch_size(batch_size)
2793 .qmax(128)
2794 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32);
2795 }
2796 }
2797#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2798
2799
Marat Dukhane6dc0b62020-09-08 23:57:14 -07002800#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2801 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
2802 TEST_REQUIRES_X86_AVX2;
2803 VAddCMicrokernelTester()
2804 .batch_size(8)
2805 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2806 }
2807
2808 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
2809 TEST_REQUIRES_X86_AVX2;
2810 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2811 VAddCMicrokernelTester()
2812 .batch_size(batch_size)
2813 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2814 }
2815 }
2816
2817 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
2818 TEST_REQUIRES_X86_AVX2;
2819 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2820 VAddCMicrokernelTester()
2821 .batch_size(batch_size)
2822 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2823 }
2824 }
2825
2826 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
2827 TEST_REQUIRES_X86_AVX2;
2828 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2829 VAddCMicrokernelTester()
2830 .batch_size(batch_size)
2831 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2832 }
2833 }
2834
2835 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, inplace) {
2836 TEST_REQUIRES_X86_AVX2;
2837 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2838 VAddCMicrokernelTester()
2839 .batch_size(batch_size)
2840 .inplace(true)
2841 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2842 }
2843 }
2844
2845 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
2846 TEST_REQUIRES_X86_AVX2;
2847 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2848 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2849 VAddCMicrokernelTester()
2850 .batch_size(batch_size)
2851 .a_zero_point(a_zero_point)
2852 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2853 }
2854 }
2855 }
2856
2857 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
2858 TEST_REQUIRES_X86_AVX2;
2859 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2860 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2861 VAddCMicrokernelTester()
2862 .batch_size(batch_size)
2863 .b_zero_point(b_zero_point)
2864 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2865 }
2866 }
2867 }
2868
2869 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
2870 TEST_REQUIRES_X86_AVX2;
2871 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2872 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2873 VAddCMicrokernelTester()
2874 .batch_size(batch_size)
2875 .y_zero_point(y_zero_point)
2876 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2877 }
2878 }
2879 }
2880
2881 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
2882 TEST_REQUIRES_X86_AVX2;
2883 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2884 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2885 VAddCMicrokernelTester()
2886 .batch_size(batch_size)
2887 .a_scale(a_scale)
2888 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2889 }
2890 }
2891 }
2892
2893 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
2894 TEST_REQUIRES_X86_AVX2;
2895 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2896 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2897 VAddCMicrokernelTester()
2898 .batch_size(batch_size)
2899 .b_scale(b_scale)
2900 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2901 }
2902 }
2903 }
2904
2905 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
2906 TEST_REQUIRES_X86_AVX2;
2907 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2908 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2909 VAddCMicrokernelTester()
2910 .batch_size(batch_size)
2911 .y_scale(y_scale)
2912 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2913 }
2914 }
2915 }
2916
2917 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
2918 TEST_REQUIRES_X86_AVX2;
2919 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2920 VAddCMicrokernelTester()
2921 .batch_size(batch_size)
2922 .qmin(128)
2923 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2924 }
2925 }
2926
2927 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
2928 TEST_REQUIRES_X86_AVX2;
2929 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2930 VAddCMicrokernelTester()
2931 .batch_size(batch_size)
2932 .qmax(128)
2933 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8);
2934 }
2935 }
2936#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2937
2938
2939#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2940 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
2941 TEST_REQUIRES_X86_AVX2;
2942 VAddCMicrokernelTester()
2943 .batch_size(16)
2944 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
2945 }
2946
2947 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
2948 TEST_REQUIRES_X86_AVX2;
2949 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2950 VAddCMicrokernelTester()
2951 .batch_size(batch_size)
2952 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
2953 }
2954 }
2955
2956 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
2957 TEST_REQUIRES_X86_AVX2;
2958 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2959 VAddCMicrokernelTester()
2960 .batch_size(batch_size)
2961 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
2962 }
2963 }
2964
2965 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
2966 TEST_REQUIRES_X86_AVX2;
2967 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2968 VAddCMicrokernelTester()
2969 .batch_size(batch_size)
2970 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
2971 }
2972 }
2973
2974 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, inplace) {
2975 TEST_REQUIRES_X86_AVX2;
2976 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2977 VAddCMicrokernelTester()
2978 .batch_size(batch_size)
2979 .inplace(true)
2980 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
2981 }
2982 }
2983
2984 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
2985 TEST_REQUIRES_X86_AVX2;
2986 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2987 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2988 VAddCMicrokernelTester()
2989 .batch_size(batch_size)
2990 .a_zero_point(a_zero_point)
2991 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
2992 }
2993 }
2994 }
2995
2996 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
2997 TEST_REQUIRES_X86_AVX2;
2998 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2999 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3000 VAddCMicrokernelTester()
3001 .batch_size(batch_size)
3002 .b_zero_point(b_zero_point)
3003 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
3004 }
3005 }
3006 }
3007
3008 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
3009 TEST_REQUIRES_X86_AVX2;
3010 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3011 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3012 VAddCMicrokernelTester()
3013 .batch_size(batch_size)
3014 .y_zero_point(y_zero_point)
3015 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
3016 }
3017 }
3018 }
3019
3020 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
3021 TEST_REQUIRES_X86_AVX2;
3022 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3023 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3024 VAddCMicrokernelTester()
3025 .batch_size(batch_size)
3026 .a_scale(a_scale)
3027 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
3028 }
3029 }
3030 }
3031
3032 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
3033 TEST_REQUIRES_X86_AVX2;
3034 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3035 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3036 VAddCMicrokernelTester()
3037 .batch_size(batch_size)
3038 .b_scale(b_scale)
3039 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
3040 }
3041 }
3042 }
3043
3044 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
3045 TEST_REQUIRES_X86_AVX2;
3046 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3047 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3048 VAddCMicrokernelTester()
3049 .batch_size(batch_size)
3050 .y_scale(y_scale)
3051 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
3052 }
3053 }
3054 }
3055
3056 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
3057 TEST_REQUIRES_X86_AVX2;
3058 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3059 VAddCMicrokernelTester()
3060 .batch_size(batch_size)
3061 .qmin(128)
3062 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
3063 }
3064 }
3065
3066 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
3067 TEST_REQUIRES_X86_AVX2;
3068 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3069 VAddCMicrokernelTester()
3070 .batch_size(batch_size)
3071 .qmax(128)
3072 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16);
3073 }
3074 }
3075#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3076
3077
3078#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3079 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_eq_24) {
3080 TEST_REQUIRES_X86_AVX2;
3081 VAddCMicrokernelTester()
3082 .batch_size(24)
3083 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3084 }
3085
3086 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_div_24) {
3087 TEST_REQUIRES_X86_AVX2;
3088 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3089 VAddCMicrokernelTester()
3090 .batch_size(batch_size)
3091 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3092 }
3093 }
3094
3095 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_lt_24) {
3096 TEST_REQUIRES_X86_AVX2;
3097 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3098 VAddCMicrokernelTester()
3099 .batch_size(batch_size)
3100 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3101 }
3102 }
3103
3104 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_gt_24) {
3105 TEST_REQUIRES_X86_AVX2;
3106 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3107 VAddCMicrokernelTester()
3108 .batch_size(batch_size)
3109 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3110 }
3111 }
3112
3113 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, inplace) {
3114 TEST_REQUIRES_X86_AVX2;
3115 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3116 VAddCMicrokernelTester()
3117 .batch_size(batch_size)
3118 .inplace(true)
3119 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3120 }
3121 }
3122
3123 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, a_zero_point) {
3124 TEST_REQUIRES_X86_AVX2;
3125 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3126 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3127 VAddCMicrokernelTester()
3128 .batch_size(batch_size)
3129 .a_zero_point(a_zero_point)
3130 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3131 }
3132 }
3133 }
3134
3135 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, b_zero_point) {
3136 TEST_REQUIRES_X86_AVX2;
3137 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3138 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3139 VAddCMicrokernelTester()
3140 .batch_size(batch_size)
3141 .b_zero_point(b_zero_point)
3142 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3143 }
3144 }
3145 }
3146
3147 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, y_zero_point) {
3148 TEST_REQUIRES_X86_AVX2;
3149 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3150 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3151 VAddCMicrokernelTester()
3152 .batch_size(batch_size)
3153 .y_zero_point(y_zero_point)
3154 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3155 }
3156 }
3157 }
3158
3159 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, a_scale) {
3160 TEST_REQUIRES_X86_AVX2;
3161 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3162 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3163 VAddCMicrokernelTester()
3164 .batch_size(batch_size)
3165 .a_scale(a_scale)
3166 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3167 }
3168 }
3169 }
3170
3171 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, b_scale) {
3172 TEST_REQUIRES_X86_AVX2;
3173 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3174 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3175 VAddCMicrokernelTester()
3176 .batch_size(batch_size)
3177 .b_scale(b_scale)
3178 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3179 }
3180 }
3181 }
3182
3183 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, y_scale) {
3184 TEST_REQUIRES_X86_AVX2;
3185 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3186 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3187 VAddCMicrokernelTester()
3188 .batch_size(batch_size)
3189 .y_scale(y_scale)
3190 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3191 }
3192 }
3193 }
3194
3195 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, qmin) {
3196 TEST_REQUIRES_X86_AVX2;
3197 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3198 VAddCMicrokernelTester()
3199 .batch_size(batch_size)
3200 .qmin(128)
3201 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3202 }
3203 }
3204
3205 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, qmax) {
3206 TEST_REQUIRES_X86_AVX2;
3207 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3208 VAddCMicrokernelTester()
3209 .batch_size(batch_size)
3210 .qmax(128)
3211 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24);
3212 }
3213 }
3214#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3215
3216
3217#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3218 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_eq_32) {
3219 TEST_REQUIRES_X86_AVX2;
3220 VAddCMicrokernelTester()
3221 .batch_size(32)
3222 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3223 }
3224
3225 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_div_32) {
3226 TEST_REQUIRES_X86_AVX2;
3227 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3228 VAddCMicrokernelTester()
3229 .batch_size(batch_size)
3230 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3231 }
3232 }
3233
3234 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_lt_32) {
3235 TEST_REQUIRES_X86_AVX2;
3236 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3237 VAddCMicrokernelTester()
3238 .batch_size(batch_size)
3239 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3240 }
3241 }
3242
3243 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_gt_32) {
3244 TEST_REQUIRES_X86_AVX2;
3245 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3246 VAddCMicrokernelTester()
3247 .batch_size(batch_size)
3248 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3249 }
3250 }
3251
3252 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, inplace) {
3253 TEST_REQUIRES_X86_AVX2;
3254 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3255 VAddCMicrokernelTester()
3256 .batch_size(batch_size)
3257 .inplace(true)
3258 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3259 }
3260 }
3261
3262 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, a_zero_point) {
3263 TEST_REQUIRES_X86_AVX2;
3264 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3265 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3266 VAddCMicrokernelTester()
3267 .batch_size(batch_size)
3268 .a_zero_point(a_zero_point)
3269 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3270 }
3271 }
3272 }
3273
3274 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, b_zero_point) {
3275 TEST_REQUIRES_X86_AVX2;
3276 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3277 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3278 VAddCMicrokernelTester()
3279 .batch_size(batch_size)
3280 .b_zero_point(b_zero_point)
3281 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3282 }
3283 }
3284 }
3285
3286 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, y_zero_point) {
3287 TEST_REQUIRES_X86_AVX2;
3288 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3289 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3290 VAddCMicrokernelTester()
3291 .batch_size(batch_size)
3292 .y_zero_point(y_zero_point)
3293 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3294 }
3295 }
3296 }
3297
3298 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, a_scale) {
3299 TEST_REQUIRES_X86_AVX2;
3300 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3301 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3302 VAddCMicrokernelTester()
3303 .batch_size(batch_size)
3304 .a_scale(a_scale)
3305 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3306 }
3307 }
3308 }
3309
3310 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, b_scale) {
3311 TEST_REQUIRES_X86_AVX2;
3312 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3313 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3314 VAddCMicrokernelTester()
3315 .batch_size(batch_size)
3316 .b_scale(b_scale)
3317 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3318 }
3319 }
3320 }
3321
3322 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, y_scale) {
3323 TEST_REQUIRES_X86_AVX2;
3324 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3325 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3326 VAddCMicrokernelTester()
3327 .batch_size(batch_size)
3328 .y_scale(y_scale)
3329 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3330 }
3331 }
3332 }
3333
3334 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, qmin) {
3335 TEST_REQUIRES_X86_AVX2;
3336 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3337 VAddCMicrokernelTester()
3338 .batch_size(batch_size)
3339 .qmin(128)
3340 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3341 }
3342 }
3343
3344 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, qmax) {
3345 TEST_REQUIRES_X86_AVX2;
3346 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3347 VAddCMicrokernelTester()
3348 .batch_size(batch_size)
3349 .qmax(128)
3350 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32);
3351 }
3352 }
3353#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3354
3355
Marat Dukhan5df27f82020-09-02 23:59:21 -07003356#if XNN_ARCH_WASMSIMD
3357 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_eq_8) {
3358 VAddCMicrokernelTester()
3359 .batch_size(8)
3360 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3361 }
3362
3363 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_div_8) {
3364 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3365 VAddCMicrokernelTester()
3366 .batch_size(batch_size)
3367 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3368 }
3369 }
3370
3371 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_lt_8) {
3372 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3373 VAddCMicrokernelTester()
3374 .batch_size(batch_size)
3375 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3376 }
3377 }
3378
3379 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_gt_8) {
3380 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3381 VAddCMicrokernelTester()
3382 .batch_size(batch_size)
3383 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3384 }
3385 }
3386
3387 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, inplace) {
3388 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3389 VAddCMicrokernelTester()
3390 .batch_size(batch_size)
3391 .inplace(true)
3392 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3393 }
3394 }
3395
3396 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, a_zero_point) {
3397 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3398 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3399 VAddCMicrokernelTester()
3400 .batch_size(batch_size)
3401 .a_zero_point(a_zero_point)
3402 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3403 }
3404 }
3405 }
3406
3407 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, b_zero_point) {
3408 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3409 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3410 VAddCMicrokernelTester()
3411 .batch_size(batch_size)
3412 .b_zero_point(b_zero_point)
3413 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3414 }
3415 }
3416 }
3417
3418 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, y_zero_point) {
3419 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3420 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3421 VAddCMicrokernelTester()
3422 .batch_size(batch_size)
3423 .y_zero_point(y_zero_point)
3424 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3425 }
3426 }
3427 }
3428
3429 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, a_scale) {
3430 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3431 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3432 VAddCMicrokernelTester()
3433 .batch_size(batch_size)
3434 .a_scale(a_scale)
3435 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3436 }
3437 }
3438 }
3439
3440 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, b_scale) {
3441 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3442 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3443 VAddCMicrokernelTester()
3444 .batch_size(batch_size)
3445 .b_scale(b_scale)
3446 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3447 }
3448 }
3449 }
3450
3451 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, y_scale) {
3452 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3453 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3454 VAddCMicrokernelTester()
3455 .batch_size(batch_size)
3456 .y_scale(y_scale)
3457 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3458 }
3459 }
3460 }
3461
3462 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, qmin) {
3463 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3464 VAddCMicrokernelTester()
3465 .batch_size(batch_size)
3466 .qmin(128)
3467 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3468 }
3469 }
3470
3471 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, qmax) {
3472 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3473 VAddCMicrokernelTester()
3474 .batch_size(batch_size)
3475 .qmax(128)
3476 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8);
3477 }
3478 }
3479#endif // XNN_ARCH_WASMSIMD
3480
3481
3482#if XNN_ARCH_WASMSIMD
3483 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_eq_16) {
3484 VAddCMicrokernelTester()
3485 .batch_size(16)
3486 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3487 }
3488
3489 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_div_16) {
3490 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3491 VAddCMicrokernelTester()
3492 .batch_size(batch_size)
3493 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3494 }
3495 }
3496
3497 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_lt_16) {
3498 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3499 VAddCMicrokernelTester()
3500 .batch_size(batch_size)
3501 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3502 }
3503 }
3504
3505 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_gt_16) {
3506 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3507 VAddCMicrokernelTester()
3508 .batch_size(batch_size)
3509 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3510 }
3511 }
3512
3513 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, inplace) {
3514 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3515 VAddCMicrokernelTester()
3516 .batch_size(batch_size)
3517 .inplace(true)
3518 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3519 }
3520 }
3521
3522 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, a_zero_point) {
3523 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3524 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3525 VAddCMicrokernelTester()
3526 .batch_size(batch_size)
3527 .a_zero_point(a_zero_point)
3528 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3529 }
3530 }
3531 }
3532
3533 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, b_zero_point) {
3534 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3535 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3536 VAddCMicrokernelTester()
3537 .batch_size(batch_size)
3538 .b_zero_point(b_zero_point)
3539 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3540 }
3541 }
3542 }
3543
3544 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, y_zero_point) {
3545 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3546 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3547 VAddCMicrokernelTester()
3548 .batch_size(batch_size)
3549 .y_zero_point(y_zero_point)
3550 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3551 }
3552 }
3553 }
3554
3555 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, a_scale) {
3556 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3557 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3558 VAddCMicrokernelTester()
3559 .batch_size(batch_size)
3560 .a_scale(a_scale)
3561 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3562 }
3563 }
3564 }
3565
3566 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, b_scale) {
3567 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3568 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3569 VAddCMicrokernelTester()
3570 .batch_size(batch_size)
3571 .b_scale(b_scale)
3572 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3573 }
3574 }
3575 }
3576
3577 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, y_scale) {
3578 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3579 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3580 VAddCMicrokernelTester()
3581 .batch_size(batch_size)
3582 .y_scale(y_scale)
3583 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3584 }
3585 }
3586 }
3587
3588 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, qmin) {
3589 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3590 VAddCMicrokernelTester()
3591 .batch_size(batch_size)
3592 .qmin(128)
3593 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3594 }
3595 }
3596
3597 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, qmax) {
3598 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3599 VAddCMicrokernelTester()
3600 .batch_size(batch_size)
3601 .qmax(128)
3602 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16);
3603 }
3604 }
3605#endif // XNN_ARCH_WASMSIMD
3606
3607
3608#if XNN_ARCH_WASMSIMD
3609 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_eq_24) {
3610 VAddCMicrokernelTester()
3611 .batch_size(24)
3612 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3613 }
3614
3615 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_div_24) {
3616 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3617 VAddCMicrokernelTester()
3618 .batch_size(batch_size)
3619 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3620 }
3621 }
3622
3623 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_lt_24) {
3624 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3625 VAddCMicrokernelTester()
3626 .batch_size(batch_size)
3627 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3628 }
3629 }
3630
3631 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_gt_24) {
3632 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3633 VAddCMicrokernelTester()
3634 .batch_size(batch_size)
3635 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3636 }
3637 }
3638
3639 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, inplace) {
3640 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3641 VAddCMicrokernelTester()
3642 .batch_size(batch_size)
3643 .inplace(true)
3644 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3645 }
3646 }
3647
3648 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, a_zero_point) {
3649 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3650 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3651 VAddCMicrokernelTester()
3652 .batch_size(batch_size)
3653 .a_zero_point(a_zero_point)
3654 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3655 }
3656 }
3657 }
3658
3659 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, b_zero_point) {
3660 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3661 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3662 VAddCMicrokernelTester()
3663 .batch_size(batch_size)
3664 .b_zero_point(b_zero_point)
3665 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3666 }
3667 }
3668 }
3669
3670 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, y_zero_point) {
3671 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3672 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3673 VAddCMicrokernelTester()
3674 .batch_size(batch_size)
3675 .y_zero_point(y_zero_point)
3676 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3677 }
3678 }
3679 }
3680
3681 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, a_scale) {
3682 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3683 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3684 VAddCMicrokernelTester()
3685 .batch_size(batch_size)
3686 .a_scale(a_scale)
3687 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3688 }
3689 }
3690 }
3691
3692 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, b_scale) {
3693 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3694 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3695 VAddCMicrokernelTester()
3696 .batch_size(batch_size)
3697 .b_scale(b_scale)
3698 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3699 }
3700 }
3701 }
3702
3703 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, y_scale) {
3704 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3705 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3706 VAddCMicrokernelTester()
3707 .batch_size(batch_size)
3708 .y_scale(y_scale)
3709 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3710 }
3711 }
3712 }
3713
3714 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, qmin) {
3715 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3716 VAddCMicrokernelTester()
3717 .batch_size(batch_size)
3718 .qmin(128)
3719 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3720 }
3721 }
3722
3723 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, qmax) {
3724 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3725 VAddCMicrokernelTester()
3726 .batch_size(batch_size)
3727 .qmax(128)
3728 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24);
3729 }
3730 }
3731#endif // XNN_ARCH_WASMSIMD
3732
3733
3734#if XNN_ARCH_WASMSIMD
3735 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_eq_32) {
3736 VAddCMicrokernelTester()
3737 .batch_size(32)
3738 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3739 }
3740
3741 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_div_32) {
3742 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3743 VAddCMicrokernelTester()
3744 .batch_size(batch_size)
3745 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3746 }
3747 }
3748
3749 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_lt_32) {
3750 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3751 VAddCMicrokernelTester()
3752 .batch_size(batch_size)
3753 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3754 }
3755 }
3756
3757 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_gt_32) {
3758 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3759 VAddCMicrokernelTester()
3760 .batch_size(batch_size)
3761 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3762 }
3763 }
3764
3765 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, inplace) {
3766 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3767 VAddCMicrokernelTester()
3768 .batch_size(batch_size)
3769 .inplace(true)
3770 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3771 }
3772 }
3773
3774 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, a_zero_point) {
3775 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3776 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3777 VAddCMicrokernelTester()
3778 .batch_size(batch_size)
3779 .a_zero_point(a_zero_point)
3780 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3781 }
3782 }
3783 }
3784
3785 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, b_zero_point) {
3786 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3787 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3788 VAddCMicrokernelTester()
3789 .batch_size(batch_size)
3790 .b_zero_point(b_zero_point)
3791 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3792 }
3793 }
3794 }
3795
3796 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, y_zero_point) {
3797 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3798 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3799 VAddCMicrokernelTester()
3800 .batch_size(batch_size)
3801 .y_zero_point(y_zero_point)
3802 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3803 }
3804 }
3805 }
3806
3807 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, a_scale) {
3808 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3809 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3810 VAddCMicrokernelTester()
3811 .batch_size(batch_size)
3812 .a_scale(a_scale)
3813 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3814 }
3815 }
3816 }
3817
3818 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, b_scale) {
3819 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3820 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3821 VAddCMicrokernelTester()
3822 .batch_size(batch_size)
3823 .b_scale(b_scale)
3824 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3825 }
3826 }
3827 }
3828
3829 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, y_scale) {
3830 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3831 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3832 VAddCMicrokernelTester()
3833 .batch_size(batch_size)
3834 .y_scale(y_scale)
3835 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3836 }
3837 }
3838 }
3839
3840 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, qmin) {
3841 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3842 VAddCMicrokernelTester()
3843 .batch_size(batch_size)
3844 .qmin(128)
3845 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3846 }
3847 }
3848
3849 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, qmax) {
3850 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3851 VAddCMicrokernelTester()
3852 .batch_size(batch_size)
3853 .qmax(128)
3854 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32);
3855 }
3856 }
3857#endif // XNN_ARCH_WASMSIMD