blob: 3b64bac790f2009d553d5902b4c13fa0425eaa8e [file] [log] [blame]
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/qs8-vadd-minmax.yaml
8// Generator: tools/generate-vbinary-test.py
9
10
11#include <gtest/gtest.h>
12
13#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
16#include <xnnpack/vadd.h>
17#include "vadd-microkernel-tester.h"
18
19
20#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
22 TEST_REQUIRES_X86_SSE2;
23 VAddMicrokernelTester()
24 .batch_size(8)
25 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
26 }
27
28 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
29 TEST_REQUIRES_X86_SSE2;
30 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
31 VAddMicrokernelTester()
32 .batch_size(batch_size)
33 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
34 }
35 }
36
37 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
38 TEST_REQUIRES_X86_SSE2;
39 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
40 VAddMicrokernelTester()
41 .batch_size(batch_size)
42 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
43 }
44 }
45
46 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
47 TEST_REQUIRES_X86_SSE2;
48 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
49 VAddMicrokernelTester()
50 .batch_size(batch_size)
51 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
52 }
53 }
54
55 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a) {
56 TEST_REQUIRES_X86_SSE2;
57 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
58 VAddMicrokernelTester()
59 .batch_size(batch_size)
60 .inplace_a(true)
61 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
62 }
63 }
64
65 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_b) {
66 TEST_REQUIRES_X86_SSE2;
67 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
68 VAddMicrokernelTester()
69 .batch_size(batch_size)
70 .inplace_b(true)
71 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
72 }
73 }
74
75 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
76 TEST_REQUIRES_X86_SSE2;
77 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
78 VAddMicrokernelTester()
79 .batch_size(batch_size)
80 .inplace_a(true)
81 .inplace_b(true)
82 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
83 }
84 }
85
Marat Dukhan95caee52020-09-02 03:41:32 -070086 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
87 TEST_REQUIRES_X86_SSE2;
88 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
89 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
90 VAddMicrokernelTester()
91 .batch_size(batch_size)
92 .a_zero_point(a_zero_point)
93 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
94 }
95 }
96 }
97
98 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
99 TEST_REQUIRES_X86_SSE2;
100 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
101 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
102 VAddMicrokernelTester()
103 .batch_size(batch_size)
104 .b_zero_point(b_zero_point)
105 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
106 }
107 }
108 }
109
110 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
111 TEST_REQUIRES_X86_SSE2;
112 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
113 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
114 VAddMicrokernelTester()
115 .batch_size(batch_size)
116 .y_zero_point(y_zero_point)
117 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
118 }
119 }
120 }
121
122 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
123 TEST_REQUIRES_X86_SSE2;
124 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
125 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
126 VAddMicrokernelTester()
127 .batch_size(batch_size)
128 .a_scale(a_scale)
129 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
130 }
131 }
132 }
133
134 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
135 TEST_REQUIRES_X86_SSE2;
136 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
137 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
138 VAddMicrokernelTester()
139 .batch_size(batch_size)
140 .b_scale(b_scale)
141 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
142 }
143 }
144 }
145
146 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
147 TEST_REQUIRES_X86_SSE2;
148 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
149 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
150 VAddMicrokernelTester()
151 .batch_size(batch_size)
152 .y_scale(y_scale)
153 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
154 }
155 }
156 }
157
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700158 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
159 TEST_REQUIRES_X86_SSE2;
160 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
161 VAddMicrokernelTester()
162 .batch_size(batch_size)
163 .qmin(128)
164 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
165 }
166 }
167
168 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
169 TEST_REQUIRES_X86_SSE2;
170 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
171 VAddMicrokernelTester()
172 .batch_size(batch_size)
173 .qmax(128)
174 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8);
175 }
176 }
177#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
178
179
180#if XNN_ARCH_X86 || XNN_ARCH_X86_64
181 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
182 TEST_REQUIRES_X86_SSE2;
183 VAddMicrokernelTester()
184 .batch_size(16)
185 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
186 }
187
188 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
189 TEST_REQUIRES_X86_SSE2;
190 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
191 VAddMicrokernelTester()
192 .batch_size(batch_size)
193 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
194 }
195 }
196
197 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
198 TEST_REQUIRES_X86_SSE2;
199 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
200 VAddMicrokernelTester()
201 .batch_size(batch_size)
202 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
203 }
204 }
205
206 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
207 TEST_REQUIRES_X86_SSE2;
208 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
209 VAddMicrokernelTester()
210 .batch_size(batch_size)
211 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
212 }
213 }
214
215 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a) {
216 TEST_REQUIRES_X86_SSE2;
217 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
218 VAddMicrokernelTester()
219 .batch_size(batch_size)
220 .inplace_a(true)
221 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
222 }
223 }
224
225 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_b) {
226 TEST_REQUIRES_X86_SSE2;
227 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
228 VAddMicrokernelTester()
229 .batch_size(batch_size)
230 .inplace_b(true)
231 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
232 }
233 }
234
235 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
236 TEST_REQUIRES_X86_SSE2;
237 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
238 VAddMicrokernelTester()
239 .batch_size(batch_size)
240 .inplace_a(true)
241 .inplace_b(true)
242 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
243 }
244 }
245
Marat Dukhan95caee52020-09-02 03:41:32 -0700246 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
247 TEST_REQUIRES_X86_SSE2;
248 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
249 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
250 VAddMicrokernelTester()
251 .batch_size(batch_size)
252 .a_zero_point(a_zero_point)
253 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
254 }
255 }
256 }
257
258 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
259 TEST_REQUIRES_X86_SSE2;
260 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
261 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
262 VAddMicrokernelTester()
263 .batch_size(batch_size)
264 .b_zero_point(b_zero_point)
265 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
266 }
267 }
268 }
269
270 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
271 TEST_REQUIRES_X86_SSE2;
272 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
273 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
274 VAddMicrokernelTester()
275 .batch_size(batch_size)
276 .y_zero_point(y_zero_point)
277 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
278 }
279 }
280 }
281
282 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
283 TEST_REQUIRES_X86_SSE2;
284 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
285 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
286 VAddMicrokernelTester()
287 .batch_size(batch_size)
288 .a_scale(a_scale)
289 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
290 }
291 }
292 }
293
294 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
295 TEST_REQUIRES_X86_SSE2;
296 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
297 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
298 VAddMicrokernelTester()
299 .batch_size(batch_size)
300 .b_scale(b_scale)
301 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
302 }
303 }
304 }
305
306 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
307 TEST_REQUIRES_X86_SSE2;
308 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
309 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
310 VAddMicrokernelTester()
311 .batch_size(batch_size)
312 .y_scale(y_scale)
313 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
314 }
315 }
316 }
317
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700318 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
319 TEST_REQUIRES_X86_SSE2;
320 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
321 VAddMicrokernelTester()
322 .batch_size(batch_size)
323 .qmin(128)
324 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
325 }
326 }
327
328 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
329 TEST_REQUIRES_X86_SSE2;
330 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
331 VAddMicrokernelTester()
332 .batch_size(batch_size)
333 .qmax(128)
334 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16);
335 }
336 }
337#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
338
339
340#if XNN_ARCH_X86 || XNN_ARCH_X86_64
341 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_eq_24) {
342 TEST_REQUIRES_X86_SSE2;
343 VAddMicrokernelTester()
344 .batch_size(24)
345 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
346 }
347
348 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_div_24) {
349 TEST_REQUIRES_X86_SSE2;
350 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
351 VAddMicrokernelTester()
352 .batch_size(batch_size)
353 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
354 }
355 }
356
357 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_lt_24) {
358 TEST_REQUIRES_X86_SSE2;
359 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
360 VAddMicrokernelTester()
361 .batch_size(batch_size)
362 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
363 }
364 }
365
366 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_gt_24) {
367 TEST_REQUIRES_X86_SSE2;
368 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
369 VAddMicrokernelTester()
370 .batch_size(batch_size)
371 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
372 }
373 }
374
375 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a) {
376 TEST_REQUIRES_X86_SSE2;
377 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
378 VAddMicrokernelTester()
379 .batch_size(batch_size)
380 .inplace_a(true)
381 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
382 }
383 }
384
385 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_b) {
386 TEST_REQUIRES_X86_SSE2;
387 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
388 VAddMicrokernelTester()
389 .batch_size(batch_size)
390 .inplace_b(true)
391 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
392 }
393 }
394
395 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a_and_b) {
396 TEST_REQUIRES_X86_SSE2;
397 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
398 VAddMicrokernelTester()
399 .batch_size(batch_size)
400 .inplace_a(true)
401 .inplace_b(true)
402 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
403 }
404 }
405
Marat Dukhan95caee52020-09-02 03:41:32 -0700406 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_zero_point) {
407 TEST_REQUIRES_X86_SSE2;
408 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
409 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
410 VAddMicrokernelTester()
411 .batch_size(batch_size)
412 .a_zero_point(a_zero_point)
413 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
414 }
415 }
416 }
417
418 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_zero_point) {
419 TEST_REQUIRES_X86_SSE2;
420 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
421 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
422 VAddMicrokernelTester()
423 .batch_size(batch_size)
424 .b_zero_point(b_zero_point)
425 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
426 }
427 }
428 }
429
430 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_zero_point) {
431 TEST_REQUIRES_X86_SSE2;
432 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
433 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
434 VAddMicrokernelTester()
435 .batch_size(batch_size)
436 .y_zero_point(y_zero_point)
437 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
438 }
439 }
440 }
441
442 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_scale) {
443 TEST_REQUIRES_X86_SSE2;
444 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
445 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
446 VAddMicrokernelTester()
447 .batch_size(batch_size)
448 .a_scale(a_scale)
449 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
450 }
451 }
452 }
453
454 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_scale) {
455 TEST_REQUIRES_X86_SSE2;
456 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
457 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
458 VAddMicrokernelTester()
459 .batch_size(batch_size)
460 .b_scale(b_scale)
461 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
462 }
463 }
464 }
465
466 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_scale) {
467 TEST_REQUIRES_X86_SSE2;
468 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
469 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
470 VAddMicrokernelTester()
471 .batch_size(batch_size)
472 .y_scale(y_scale)
473 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
474 }
475 }
476 }
477
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700478 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmin) {
479 TEST_REQUIRES_X86_SSE2;
480 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
481 VAddMicrokernelTester()
482 .batch_size(batch_size)
483 .qmin(128)
484 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
485 }
486 }
487
488 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmax) {
489 TEST_REQUIRES_X86_SSE2;
490 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
491 VAddMicrokernelTester()
492 .batch_size(batch_size)
493 .qmax(128)
494 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24);
495 }
496 }
497#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
498
499
500#if XNN_ARCH_X86 || XNN_ARCH_X86_64
501 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_eq_32) {
502 TEST_REQUIRES_X86_SSE2;
503 VAddMicrokernelTester()
504 .batch_size(32)
505 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
506 }
507
508 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_div_32) {
509 TEST_REQUIRES_X86_SSE2;
510 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
511 VAddMicrokernelTester()
512 .batch_size(batch_size)
513 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
514 }
515 }
516
517 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_lt_32) {
518 TEST_REQUIRES_X86_SSE2;
519 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
520 VAddMicrokernelTester()
521 .batch_size(batch_size)
522 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
523 }
524 }
525
526 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_gt_32) {
527 TEST_REQUIRES_X86_SSE2;
528 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
529 VAddMicrokernelTester()
530 .batch_size(batch_size)
531 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
532 }
533 }
534
535 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a) {
536 TEST_REQUIRES_X86_SSE2;
537 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
538 VAddMicrokernelTester()
539 .batch_size(batch_size)
540 .inplace_a(true)
541 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
542 }
543 }
544
545 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_b) {
546 TEST_REQUIRES_X86_SSE2;
547 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
548 VAddMicrokernelTester()
549 .batch_size(batch_size)
550 .inplace_b(true)
551 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
552 }
553 }
554
555 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a_and_b) {
556 TEST_REQUIRES_X86_SSE2;
557 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
558 VAddMicrokernelTester()
559 .batch_size(batch_size)
560 .inplace_a(true)
561 .inplace_b(true)
562 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
563 }
564 }
565
Marat Dukhan95caee52020-09-02 03:41:32 -0700566 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_zero_point) {
567 TEST_REQUIRES_X86_SSE2;
568 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
569 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
570 VAddMicrokernelTester()
571 .batch_size(batch_size)
572 .a_zero_point(a_zero_point)
573 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
574 }
575 }
576 }
577
578 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_zero_point) {
579 TEST_REQUIRES_X86_SSE2;
580 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
581 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
582 VAddMicrokernelTester()
583 .batch_size(batch_size)
584 .b_zero_point(b_zero_point)
585 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
586 }
587 }
588 }
589
590 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_zero_point) {
591 TEST_REQUIRES_X86_SSE2;
592 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
593 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
594 VAddMicrokernelTester()
595 .batch_size(batch_size)
596 .y_zero_point(y_zero_point)
597 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
598 }
599 }
600 }
601
602 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_scale) {
603 TEST_REQUIRES_X86_SSE2;
604 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
605 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
606 VAddMicrokernelTester()
607 .batch_size(batch_size)
608 .a_scale(a_scale)
609 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
610 }
611 }
612 }
613
614 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_scale) {
615 TEST_REQUIRES_X86_SSE2;
616 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
617 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
618 VAddMicrokernelTester()
619 .batch_size(batch_size)
620 .b_scale(b_scale)
621 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
622 }
623 }
624 }
625
626 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_scale) {
627 TEST_REQUIRES_X86_SSE2;
628 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
629 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
630 VAddMicrokernelTester()
631 .batch_size(batch_size)
632 .y_scale(y_scale)
633 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
634 }
635 }
636 }
637
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700638 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmin) {
639 TEST_REQUIRES_X86_SSE2;
640 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
641 VAddMicrokernelTester()
642 .batch_size(batch_size)
643 .qmin(128)
644 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
645 }
646 }
647
648 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmax) {
649 TEST_REQUIRES_X86_SSE2;
650 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
651 VAddMicrokernelTester()
652 .batch_size(batch_size)
653 .qmax(128)
654 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32);
655 }
656 }
657#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
658
659
660#if XNN_ARCH_X86 || XNN_ARCH_X86_64
661 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
662 TEST_REQUIRES_X86_SSE41;
663 VAddMicrokernelTester()
664 .batch_size(8)
665 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
666 }
667
668 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
669 TEST_REQUIRES_X86_SSE41;
670 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
671 VAddMicrokernelTester()
672 .batch_size(batch_size)
673 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
674 }
675 }
676
677 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
678 TEST_REQUIRES_X86_SSE41;
679 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
680 VAddMicrokernelTester()
681 .batch_size(batch_size)
682 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
683 }
684 }
685
686 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
687 TEST_REQUIRES_X86_SSE41;
688 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
689 VAddMicrokernelTester()
690 .batch_size(batch_size)
691 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
692 }
693 }
694
695 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a) {
696 TEST_REQUIRES_X86_SSE41;
697 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
698 VAddMicrokernelTester()
699 .batch_size(batch_size)
700 .inplace_a(true)
701 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
702 }
703 }
704
705 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_b) {
706 TEST_REQUIRES_X86_SSE41;
707 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
708 VAddMicrokernelTester()
709 .batch_size(batch_size)
710 .inplace_b(true)
711 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
712 }
713 }
714
715 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
716 TEST_REQUIRES_X86_SSE41;
717 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
718 VAddMicrokernelTester()
719 .batch_size(batch_size)
720 .inplace_a(true)
721 .inplace_b(true)
722 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
723 }
724 }
725
Marat Dukhan95caee52020-09-02 03:41:32 -0700726 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
727 TEST_REQUIRES_X86_SSE41;
728 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
729 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
730 VAddMicrokernelTester()
731 .batch_size(batch_size)
732 .a_zero_point(a_zero_point)
733 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
734 }
735 }
736 }
737
738 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
739 TEST_REQUIRES_X86_SSE41;
740 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
741 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
742 VAddMicrokernelTester()
743 .batch_size(batch_size)
744 .b_zero_point(b_zero_point)
745 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
746 }
747 }
748 }
749
750 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
751 TEST_REQUIRES_X86_SSE41;
752 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
753 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
754 VAddMicrokernelTester()
755 .batch_size(batch_size)
756 .y_zero_point(y_zero_point)
757 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
758 }
759 }
760 }
761
762 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
763 TEST_REQUIRES_X86_SSE41;
764 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
765 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
766 VAddMicrokernelTester()
767 .batch_size(batch_size)
768 .a_scale(a_scale)
769 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
770 }
771 }
772 }
773
774 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
775 TEST_REQUIRES_X86_SSE41;
776 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
777 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
778 VAddMicrokernelTester()
779 .batch_size(batch_size)
780 .b_scale(b_scale)
781 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
782 }
783 }
784 }
785
786 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
787 TEST_REQUIRES_X86_SSE41;
788 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
789 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
790 VAddMicrokernelTester()
791 .batch_size(batch_size)
792 .y_scale(y_scale)
793 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
794 }
795 }
796 }
797
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700798 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
799 TEST_REQUIRES_X86_SSE41;
800 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
801 VAddMicrokernelTester()
802 .batch_size(batch_size)
803 .qmin(128)
804 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
805 }
806 }
807
808 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
809 TEST_REQUIRES_X86_SSE41;
810 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
811 VAddMicrokernelTester()
812 .batch_size(batch_size)
813 .qmax(128)
814 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8);
815 }
816 }
817#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
818
819
820#if XNN_ARCH_X86 || XNN_ARCH_X86_64
821 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
822 TEST_REQUIRES_X86_SSE41;
823 VAddMicrokernelTester()
824 .batch_size(16)
825 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
826 }
827
828 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
829 TEST_REQUIRES_X86_SSE41;
830 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
831 VAddMicrokernelTester()
832 .batch_size(batch_size)
833 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
834 }
835 }
836
837 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
838 TEST_REQUIRES_X86_SSE41;
839 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
840 VAddMicrokernelTester()
841 .batch_size(batch_size)
842 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
843 }
844 }
845
846 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
847 TEST_REQUIRES_X86_SSE41;
848 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
849 VAddMicrokernelTester()
850 .batch_size(batch_size)
851 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
852 }
853 }
854
855 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a) {
856 TEST_REQUIRES_X86_SSE41;
857 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
858 VAddMicrokernelTester()
859 .batch_size(batch_size)
860 .inplace_a(true)
861 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
862 }
863 }
864
865 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_b) {
866 TEST_REQUIRES_X86_SSE41;
867 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
868 VAddMicrokernelTester()
869 .batch_size(batch_size)
870 .inplace_b(true)
871 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
872 }
873 }
874
875 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
876 TEST_REQUIRES_X86_SSE41;
877 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
878 VAddMicrokernelTester()
879 .batch_size(batch_size)
880 .inplace_a(true)
881 .inplace_b(true)
882 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
883 }
884 }
885
Marat Dukhan95caee52020-09-02 03:41:32 -0700886 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
887 TEST_REQUIRES_X86_SSE41;
888 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
889 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
890 VAddMicrokernelTester()
891 .batch_size(batch_size)
892 .a_zero_point(a_zero_point)
893 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
894 }
895 }
896 }
897
898 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
899 TEST_REQUIRES_X86_SSE41;
900 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
901 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
902 VAddMicrokernelTester()
903 .batch_size(batch_size)
904 .b_zero_point(b_zero_point)
905 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
906 }
907 }
908 }
909
910 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
911 TEST_REQUIRES_X86_SSE41;
912 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
913 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
914 VAddMicrokernelTester()
915 .batch_size(batch_size)
916 .y_zero_point(y_zero_point)
917 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
918 }
919 }
920 }
921
922 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
923 TEST_REQUIRES_X86_SSE41;
924 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
925 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
926 VAddMicrokernelTester()
927 .batch_size(batch_size)
928 .a_scale(a_scale)
929 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
930 }
931 }
932 }
933
934 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
935 TEST_REQUIRES_X86_SSE41;
936 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
937 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
938 VAddMicrokernelTester()
939 .batch_size(batch_size)
940 .b_scale(b_scale)
941 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
942 }
943 }
944 }
945
946 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
947 TEST_REQUIRES_X86_SSE41;
948 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
949 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
950 VAddMicrokernelTester()
951 .batch_size(batch_size)
952 .y_scale(y_scale)
953 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
954 }
955 }
956 }
957
Marat Dukhand9f3ad42020-08-10 12:30:58 -0700958 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
959 TEST_REQUIRES_X86_SSE41;
960 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
961 VAddMicrokernelTester()
962 .batch_size(batch_size)
963 .qmin(128)
964 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
965 }
966 }
967
968 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
969 TEST_REQUIRES_X86_SSE41;
970 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
971 VAddMicrokernelTester()
972 .batch_size(batch_size)
973 .qmax(128)
974 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16);
975 }
976 }
977#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
978
979
980#if XNN_ARCH_X86 || XNN_ARCH_X86_64
981 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_eq_24) {
982 TEST_REQUIRES_X86_SSE41;
983 VAddMicrokernelTester()
984 .batch_size(24)
985 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
986 }
987
988 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_div_24) {
989 TEST_REQUIRES_X86_SSE41;
990 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
991 VAddMicrokernelTester()
992 .batch_size(batch_size)
993 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
994 }
995 }
996
997 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_lt_24) {
998 TEST_REQUIRES_X86_SSE41;
999 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1000 VAddMicrokernelTester()
1001 .batch_size(batch_size)
1002 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1003 }
1004 }
1005
1006 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_gt_24) {
1007 TEST_REQUIRES_X86_SSE41;
1008 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1009 VAddMicrokernelTester()
1010 .batch_size(batch_size)
1011 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1012 }
1013 }
1014
1015 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a) {
1016 TEST_REQUIRES_X86_SSE41;
1017 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1018 VAddMicrokernelTester()
1019 .batch_size(batch_size)
1020 .inplace_a(true)
1021 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1022 }
1023 }
1024
1025 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_b) {
1026 TEST_REQUIRES_X86_SSE41;
1027 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1028 VAddMicrokernelTester()
1029 .batch_size(batch_size)
1030 .inplace_b(true)
1031 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1032 }
1033 }
1034
1035 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a_and_b) {
1036 TEST_REQUIRES_X86_SSE41;
1037 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1038 VAddMicrokernelTester()
1039 .batch_size(batch_size)
1040 .inplace_a(true)
1041 .inplace_b(true)
1042 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1043 }
1044 }
1045
Marat Dukhan95caee52020-09-02 03:41:32 -07001046 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_zero_point) {
1047 TEST_REQUIRES_X86_SSE41;
1048 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1049 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1050 VAddMicrokernelTester()
1051 .batch_size(batch_size)
1052 .a_zero_point(a_zero_point)
1053 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1054 }
1055 }
1056 }
1057
1058 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_zero_point) {
1059 TEST_REQUIRES_X86_SSE41;
1060 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1061 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1062 VAddMicrokernelTester()
1063 .batch_size(batch_size)
1064 .b_zero_point(b_zero_point)
1065 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1066 }
1067 }
1068 }
1069
1070 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_zero_point) {
1071 TEST_REQUIRES_X86_SSE41;
1072 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1073 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1074 VAddMicrokernelTester()
1075 .batch_size(batch_size)
1076 .y_zero_point(y_zero_point)
1077 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1078 }
1079 }
1080 }
1081
1082 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_scale) {
1083 TEST_REQUIRES_X86_SSE41;
1084 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1085 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1086 VAddMicrokernelTester()
1087 .batch_size(batch_size)
1088 .a_scale(a_scale)
1089 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1090 }
1091 }
1092 }
1093
1094 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_scale) {
1095 TEST_REQUIRES_X86_SSE41;
1096 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1097 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1098 VAddMicrokernelTester()
1099 .batch_size(batch_size)
1100 .b_scale(b_scale)
1101 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1102 }
1103 }
1104 }
1105
1106 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_scale) {
1107 TEST_REQUIRES_X86_SSE41;
1108 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1109 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1110 VAddMicrokernelTester()
1111 .batch_size(batch_size)
1112 .y_scale(y_scale)
1113 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1114 }
1115 }
1116 }
1117
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001118 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmin) {
1119 TEST_REQUIRES_X86_SSE41;
1120 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1121 VAddMicrokernelTester()
1122 .batch_size(batch_size)
1123 .qmin(128)
1124 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1125 }
1126 }
1127
1128 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmax) {
1129 TEST_REQUIRES_X86_SSE41;
1130 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1131 VAddMicrokernelTester()
1132 .batch_size(batch_size)
1133 .qmax(128)
1134 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24);
1135 }
1136 }
1137#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1138
1139
1140#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1141 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_eq_32) {
1142 TEST_REQUIRES_X86_SSE41;
1143 VAddMicrokernelTester()
1144 .batch_size(32)
1145 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1146 }
1147
1148 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_div_32) {
1149 TEST_REQUIRES_X86_SSE41;
1150 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1151 VAddMicrokernelTester()
1152 .batch_size(batch_size)
1153 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1154 }
1155 }
1156
1157 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_lt_32) {
1158 TEST_REQUIRES_X86_SSE41;
1159 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1160 VAddMicrokernelTester()
1161 .batch_size(batch_size)
1162 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1163 }
1164 }
1165
1166 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_gt_32) {
1167 TEST_REQUIRES_X86_SSE41;
1168 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1169 VAddMicrokernelTester()
1170 .batch_size(batch_size)
1171 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1172 }
1173 }
1174
1175 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a) {
1176 TEST_REQUIRES_X86_SSE41;
1177 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1178 VAddMicrokernelTester()
1179 .batch_size(batch_size)
1180 .inplace_a(true)
1181 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1182 }
1183 }
1184
1185 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_b) {
1186 TEST_REQUIRES_X86_SSE41;
1187 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1188 VAddMicrokernelTester()
1189 .batch_size(batch_size)
1190 .inplace_b(true)
1191 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1192 }
1193 }
1194
1195 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a_and_b) {
1196 TEST_REQUIRES_X86_SSE41;
1197 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1198 VAddMicrokernelTester()
1199 .batch_size(batch_size)
1200 .inplace_a(true)
1201 .inplace_b(true)
1202 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1203 }
1204 }
1205
Marat Dukhan95caee52020-09-02 03:41:32 -07001206 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_zero_point) {
1207 TEST_REQUIRES_X86_SSE41;
1208 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1209 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1210 VAddMicrokernelTester()
1211 .batch_size(batch_size)
1212 .a_zero_point(a_zero_point)
1213 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1214 }
1215 }
1216 }
1217
1218 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_zero_point) {
1219 TEST_REQUIRES_X86_SSE41;
1220 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1221 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1222 VAddMicrokernelTester()
1223 .batch_size(batch_size)
1224 .b_zero_point(b_zero_point)
1225 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1226 }
1227 }
1228 }
1229
1230 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_zero_point) {
1231 TEST_REQUIRES_X86_SSE41;
1232 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1233 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1234 VAddMicrokernelTester()
1235 .batch_size(batch_size)
1236 .y_zero_point(y_zero_point)
1237 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1238 }
1239 }
1240 }
1241
1242 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_scale) {
1243 TEST_REQUIRES_X86_SSE41;
1244 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1245 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1246 VAddMicrokernelTester()
1247 .batch_size(batch_size)
1248 .a_scale(a_scale)
1249 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1250 }
1251 }
1252 }
1253
1254 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_scale) {
1255 TEST_REQUIRES_X86_SSE41;
1256 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1257 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1258 VAddMicrokernelTester()
1259 .batch_size(batch_size)
1260 .b_scale(b_scale)
1261 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1262 }
1263 }
1264 }
1265
1266 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_scale) {
1267 TEST_REQUIRES_X86_SSE41;
1268 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1269 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1270 VAddMicrokernelTester()
1271 .batch_size(batch_size)
1272 .y_scale(y_scale)
1273 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1274 }
1275 }
1276 }
1277
Marat Dukhand9f3ad42020-08-10 12:30:58 -07001278 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmin) {
1279 TEST_REQUIRES_X86_SSE41;
1280 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1281 VAddMicrokernelTester()
1282 .batch_size(batch_size)
1283 .qmin(128)
1284 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1285 }
1286 }
1287
1288 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmax) {
1289 TEST_REQUIRES_X86_SSE41;
1290 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1291 VAddMicrokernelTester()
1292 .batch_size(batch_size)
1293 .qmax(128)
1294 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32);
1295 }
1296 }
1297#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64