blob: a54070e3dde9e9bdfd6365fefe9c247896f96fe2 [file] [log] [blame]
Marat Dukhana212eac2021-08-02 09:58:04 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/qs8-vmul-minmax-fp32.yaml
8// Generator: tools/generate-vbinary-test.py
9
10
11#include <gtest/gtest.h>
12
13#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
16#include <xnnpack/params-init.h>
17#include <xnnpack/vmul.h>
18#include "vmul-microkernel-tester.h"
19
20
Marat Dukhan4a7b70f2021-08-02 18:18:10 -070021#if XNN_ARCH_ARM || XNN_ARCH_ARM64
22 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_eq_8) {
23 TEST_REQUIRES_ARM_NEON;
24 VMulMicrokernelTester()
25 .batch_size(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080026 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -070027 }
28
29 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_div_8) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32 VMulMicrokernelTester()
33 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -080034 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -070035 }
36 }
37
38 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_lt_8) {
39 TEST_REQUIRES_ARM_NEON;
40 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41 VMulMicrokernelTester()
42 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -080043 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -070044 }
45 }
46
47 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_gt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50 VMulMicrokernelTester()
51 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -080052 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -070053 }
54 }
55
56 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, inplace_a) {
57 TEST_REQUIRES_ARM_NEON;
58 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59 VMulMicrokernelTester()
60 .batch_size(batch_size)
61 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -080062 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -070063 }
64 }
65
66 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, inplace_b) {
67 TEST_REQUIRES_ARM_NEON;
68 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69 VMulMicrokernelTester()
70 .batch_size(batch_size)
71 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -080072 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -070073 }
74 }
75
76 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, inplace_a_and_b) {
77 TEST_REQUIRES_ARM_NEON;
78 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
79 VMulMicrokernelTester()
80 .batch_size(batch_size)
81 .inplace_a(true)
82 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -080083 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -070084 }
85 }
86
87 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, a_zero_point) {
88 TEST_REQUIRES_ARM_NEON;
89 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
90 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
91 VMulMicrokernelTester()
92 .batch_size(batch_size)
93 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -080094 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -070095 }
96 }
97 }
98
99 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, b_zero_point) {
100 TEST_REQUIRES_ARM_NEON;
101 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
102 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
103 VMulMicrokernelTester()
104 .batch_size(batch_size)
105 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800106 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700107 }
108 }
109 }
110
111 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, y_zero_point) {
112 TEST_REQUIRES_ARM_NEON;
113 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
114 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
115 VMulMicrokernelTester()
116 .batch_size(batch_size)
117 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800118 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700119 }
120 }
121 }
122
123 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, a_scale) {
124 TEST_REQUIRES_ARM_NEON;
125 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
126 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
127 VMulMicrokernelTester()
128 .batch_size(batch_size)
129 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800130 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700131 }
132 }
133 }
134
135 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, b_scale) {
136 TEST_REQUIRES_ARM_NEON;
137 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
138 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
139 VMulMicrokernelTester()
140 .batch_size(batch_size)
141 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800142 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700143 }
144 }
145 }
146
147 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, y_scale) {
148 TEST_REQUIRES_ARM_NEON;
149 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
151 VMulMicrokernelTester()
152 .batch_size(batch_size)
153 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800154 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700155 }
156 }
157 }
158
159 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, qmin) {
160 TEST_REQUIRES_ARM_NEON;
161 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
162 VMulMicrokernelTester()
163 .batch_size(batch_size)
164 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800165 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700166 }
167 }
168
169 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, qmax) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
172 VMulMicrokernelTester()
173 .batch_size(batch_size)
174 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800175 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700176 }
177 }
178#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
179
180
181#if XNN_ARCH_ARM || XNN_ARCH_ARM64
182 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_eq_16) {
183 TEST_REQUIRES_ARM_NEON;
184 VMulMicrokernelTester()
185 .batch_size(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800186 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700187 }
188
189 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_div_16) {
190 TEST_REQUIRES_ARM_NEON;
191 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
192 VMulMicrokernelTester()
193 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800194 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700195 }
196 }
197
198 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_lt_16) {
199 TEST_REQUIRES_ARM_NEON;
200 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
201 VMulMicrokernelTester()
202 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800203 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700204 }
205 }
206
207 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_gt_16) {
208 TEST_REQUIRES_ARM_NEON;
209 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
210 VMulMicrokernelTester()
211 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800212 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700213 }
214 }
215
216 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, inplace_a) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219 VMulMicrokernelTester()
220 .batch_size(batch_size)
221 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800222 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700223 }
224 }
225
226 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, inplace_b) {
227 TEST_REQUIRES_ARM_NEON;
228 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
229 VMulMicrokernelTester()
230 .batch_size(batch_size)
231 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800232 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700233 }
234 }
235
236 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, inplace_a_and_b) {
237 TEST_REQUIRES_ARM_NEON;
238 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
239 VMulMicrokernelTester()
240 .batch_size(batch_size)
241 .inplace_a(true)
242 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800243 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700244 }
245 }
246
247 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, a_zero_point) {
248 TEST_REQUIRES_ARM_NEON;
249 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
250 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
251 VMulMicrokernelTester()
252 .batch_size(batch_size)
253 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800254 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700255 }
256 }
257 }
258
259 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, b_zero_point) {
260 TEST_REQUIRES_ARM_NEON;
261 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
262 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
263 VMulMicrokernelTester()
264 .batch_size(batch_size)
265 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800266 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700267 }
268 }
269 }
270
271 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, y_zero_point) {
272 TEST_REQUIRES_ARM_NEON;
273 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
274 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
275 VMulMicrokernelTester()
276 .batch_size(batch_size)
277 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800278 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700279 }
280 }
281 }
282
283 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, a_scale) {
284 TEST_REQUIRES_ARM_NEON;
285 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
286 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
287 VMulMicrokernelTester()
288 .batch_size(batch_size)
289 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800290 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700291 }
292 }
293 }
294
295 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, b_scale) {
296 TEST_REQUIRES_ARM_NEON;
297 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
298 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
299 VMulMicrokernelTester()
300 .batch_size(batch_size)
301 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800302 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700303 }
304 }
305 }
306
307 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, y_scale) {
308 TEST_REQUIRES_ARM_NEON;
309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
310 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
311 VMulMicrokernelTester()
312 .batch_size(batch_size)
313 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800314 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700315 }
316 }
317 }
318
319 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, qmin) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
322 VMulMicrokernelTester()
323 .batch_size(batch_size)
324 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800325 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700326 }
327 }
328
329 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, qmax) {
330 TEST_REQUIRES_ARM_NEON;
331 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
332 VMulMicrokernelTester()
333 .batch_size(batch_size)
334 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800335 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700336 }
337 }
338#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
339
340
341#if XNN_ARCH_ARM || XNN_ARCH_ARM64
342 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_eq_16) {
343 TEST_REQUIRES_ARM_NEON;
344 VMulMicrokernelTester()
345 .batch_size(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800346 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700347 }
348
349 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_div_16) {
350 TEST_REQUIRES_ARM_NEON;
351 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
352 VMulMicrokernelTester()
353 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800354 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700355 }
356 }
357
358 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_lt_16) {
359 TEST_REQUIRES_ARM_NEON;
360 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
361 VMulMicrokernelTester()
362 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800363 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700364 }
365 }
366
367 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_gt_16) {
368 TEST_REQUIRES_ARM_NEON;
369 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
370 VMulMicrokernelTester()
371 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800372 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700373 }
374 }
375
376 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, inplace_a) {
377 TEST_REQUIRES_ARM_NEON;
378 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
379 VMulMicrokernelTester()
380 .batch_size(batch_size)
381 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800382 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700383 }
384 }
385
386 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, inplace_b) {
387 TEST_REQUIRES_ARM_NEON;
388 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
389 VMulMicrokernelTester()
390 .batch_size(batch_size)
391 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800392 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700393 }
394 }
395
396 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, inplace_a_and_b) {
397 TEST_REQUIRES_ARM_NEON;
398 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
399 VMulMicrokernelTester()
400 .batch_size(batch_size)
401 .inplace_a(true)
402 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800403 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700404 }
405 }
406
407 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, a_zero_point) {
408 TEST_REQUIRES_ARM_NEON;
409 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
410 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
411 VMulMicrokernelTester()
412 .batch_size(batch_size)
413 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800414 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700415 }
416 }
417 }
418
419 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, b_zero_point) {
420 TEST_REQUIRES_ARM_NEON;
421 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
422 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
423 VMulMicrokernelTester()
424 .batch_size(batch_size)
425 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800426 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700427 }
428 }
429 }
430
431 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, y_zero_point) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
434 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
435 VMulMicrokernelTester()
436 .batch_size(batch_size)
437 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800438 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700439 }
440 }
441 }
442
443 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, a_scale) {
444 TEST_REQUIRES_ARM_NEON;
445 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
446 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
447 VMulMicrokernelTester()
448 .batch_size(batch_size)
449 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800450 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700451 }
452 }
453 }
454
455 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, b_scale) {
456 TEST_REQUIRES_ARM_NEON;
457 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
458 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
459 VMulMicrokernelTester()
460 .batch_size(batch_size)
461 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800462 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700463 }
464 }
465 }
466
467 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, y_scale) {
468 TEST_REQUIRES_ARM_NEON;
469 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
470 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
471 VMulMicrokernelTester()
472 .batch_size(batch_size)
473 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800474 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700475 }
476 }
477 }
478
479 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, qmin) {
480 TEST_REQUIRES_ARM_NEON;
481 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
482 VMulMicrokernelTester()
483 .batch_size(batch_size)
484 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800485 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700486 }
487 }
488
489 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, qmax) {
490 TEST_REQUIRES_ARM_NEON;
491 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
492 VMulMicrokernelTester()
493 .batch_size(batch_size)
494 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800495 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700496 }
497 }
498#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
499
500
501#if XNN_ARCH_ARM || XNN_ARCH_ARM64
502 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_eq_8) {
503 TEST_REQUIRES_ARM_NEON_V8;
504 VMulMicrokernelTester()
505 .batch_size(8)
Marat Dukhan50323b82022-01-11 00:12:01 -0800506 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700507 }
508
509 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_div_8) {
510 TEST_REQUIRES_ARM_NEON_V8;
511 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
512 VMulMicrokernelTester()
513 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800514 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700515 }
516 }
517
518 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_lt_8) {
519 TEST_REQUIRES_ARM_NEON_V8;
520 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
521 VMulMicrokernelTester()
522 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800523 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700524 }
525 }
526
527 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_gt_8) {
528 TEST_REQUIRES_ARM_NEON_V8;
529 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
530 VMulMicrokernelTester()
531 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800532 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700533 }
534 }
535
536 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, inplace_a) {
537 TEST_REQUIRES_ARM_NEON_V8;
538 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
539 VMulMicrokernelTester()
540 .batch_size(batch_size)
541 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800542 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700543 }
544 }
545
546 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, inplace_b) {
547 TEST_REQUIRES_ARM_NEON_V8;
548 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
549 VMulMicrokernelTester()
550 .batch_size(batch_size)
551 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800552 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700553 }
554 }
555
556 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, inplace_a_and_b) {
557 TEST_REQUIRES_ARM_NEON_V8;
558 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
559 VMulMicrokernelTester()
560 .batch_size(batch_size)
561 .inplace_a(true)
562 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800563 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700564 }
565 }
566
567 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, a_zero_point) {
568 TEST_REQUIRES_ARM_NEON_V8;
569 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
570 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
571 VMulMicrokernelTester()
572 .batch_size(batch_size)
573 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800574 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700575 }
576 }
577 }
578
579 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, b_zero_point) {
580 TEST_REQUIRES_ARM_NEON_V8;
581 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
582 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
583 VMulMicrokernelTester()
584 .batch_size(batch_size)
585 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800586 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700587 }
588 }
589 }
590
591 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, y_zero_point) {
592 TEST_REQUIRES_ARM_NEON_V8;
593 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
595 VMulMicrokernelTester()
596 .batch_size(batch_size)
597 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800598 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700599 }
600 }
601 }
602
603 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, a_scale) {
604 TEST_REQUIRES_ARM_NEON_V8;
605 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
606 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
607 VMulMicrokernelTester()
608 .batch_size(batch_size)
609 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800610 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700611 }
612 }
613 }
614
615 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, b_scale) {
616 TEST_REQUIRES_ARM_NEON_V8;
617 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
618 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
619 VMulMicrokernelTester()
620 .batch_size(batch_size)
621 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800622 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700623 }
624 }
625 }
626
627 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, y_scale) {
628 TEST_REQUIRES_ARM_NEON_V8;
629 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
630 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
631 VMulMicrokernelTester()
632 .batch_size(batch_size)
633 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800634 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700635 }
636 }
637 }
638
639 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, qmin) {
640 TEST_REQUIRES_ARM_NEON_V8;
641 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
642 VMulMicrokernelTester()
643 .batch_size(batch_size)
644 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800645 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700646 }
647 }
648
649 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, qmax) {
650 TEST_REQUIRES_ARM_NEON_V8;
651 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
652 VMulMicrokernelTester()
653 .batch_size(batch_size)
654 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800655 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700656 }
657 }
658#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
659
660
661#if XNN_ARCH_ARM || XNN_ARCH_ARM64
662 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_eq_16) {
663 TEST_REQUIRES_ARM_NEON_V8;
664 VMulMicrokernelTester()
665 .batch_size(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800666 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700667 }
668
669 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_div_16) {
670 TEST_REQUIRES_ARM_NEON_V8;
671 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
672 VMulMicrokernelTester()
673 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800674 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700675 }
676 }
677
678 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_lt_16) {
679 TEST_REQUIRES_ARM_NEON_V8;
680 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
681 VMulMicrokernelTester()
682 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800683 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700684 }
685 }
686
687 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_gt_16) {
688 TEST_REQUIRES_ARM_NEON_V8;
689 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
690 VMulMicrokernelTester()
691 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800692 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700693 }
694 }
695
696 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, inplace_a) {
697 TEST_REQUIRES_ARM_NEON_V8;
698 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
699 VMulMicrokernelTester()
700 .batch_size(batch_size)
701 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800702 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700703 }
704 }
705
706 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, inplace_b) {
707 TEST_REQUIRES_ARM_NEON_V8;
708 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
709 VMulMicrokernelTester()
710 .batch_size(batch_size)
711 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800712 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700713 }
714 }
715
716 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, inplace_a_and_b) {
717 TEST_REQUIRES_ARM_NEON_V8;
718 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
719 VMulMicrokernelTester()
720 .batch_size(batch_size)
721 .inplace_a(true)
722 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800723 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700724 }
725 }
726
727 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, a_zero_point) {
728 TEST_REQUIRES_ARM_NEON_V8;
729 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
730 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
731 VMulMicrokernelTester()
732 .batch_size(batch_size)
733 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800734 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700735 }
736 }
737 }
738
739 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, b_zero_point) {
740 TEST_REQUIRES_ARM_NEON_V8;
741 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
742 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
743 VMulMicrokernelTester()
744 .batch_size(batch_size)
745 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800746 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700747 }
748 }
749 }
750
751 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, y_zero_point) {
752 TEST_REQUIRES_ARM_NEON_V8;
753 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
754 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
755 VMulMicrokernelTester()
756 .batch_size(batch_size)
757 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800758 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700759 }
760 }
761 }
762
763 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, a_scale) {
764 TEST_REQUIRES_ARM_NEON_V8;
765 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
766 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
767 VMulMicrokernelTester()
768 .batch_size(batch_size)
769 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800770 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700771 }
772 }
773 }
774
775 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, b_scale) {
776 TEST_REQUIRES_ARM_NEON_V8;
777 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
778 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
779 VMulMicrokernelTester()
780 .batch_size(batch_size)
781 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800782 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700783 }
784 }
785 }
786
787 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, y_scale) {
788 TEST_REQUIRES_ARM_NEON_V8;
789 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
790 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
791 VMulMicrokernelTester()
792 .batch_size(batch_size)
793 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800794 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700795 }
796 }
797 }
798
799 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, qmin) {
800 TEST_REQUIRES_ARM_NEON_V8;
801 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
802 VMulMicrokernelTester()
803 .batch_size(batch_size)
804 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800805 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700806 }
807 }
808
809 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, qmax) {
810 TEST_REQUIRES_ARM_NEON_V8;
811 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
812 VMulMicrokernelTester()
813 .batch_size(batch_size)
814 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800815 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700816 }
817 }
818#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
819
820
821#if XNN_ARCH_ARM || XNN_ARCH_ARM64
822 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_eq_16) {
823 TEST_REQUIRES_ARM_NEON_V8;
824 VMulMicrokernelTester()
825 .batch_size(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800826 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700827 }
828
829 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_div_16) {
830 TEST_REQUIRES_ARM_NEON_V8;
831 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
832 VMulMicrokernelTester()
833 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800834 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700835 }
836 }
837
838 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_lt_16) {
839 TEST_REQUIRES_ARM_NEON_V8;
840 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
841 VMulMicrokernelTester()
842 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800843 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700844 }
845 }
846
847 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_gt_16) {
848 TEST_REQUIRES_ARM_NEON_V8;
849 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
850 VMulMicrokernelTester()
851 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800852 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700853 }
854 }
855
856 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, inplace_a) {
857 TEST_REQUIRES_ARM_NEON_V8;
858 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
859 VMulMicrokernelTester()
860 .batch_size(batch_size)
861 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800862 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700863 }
864 }
865
866 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, inplace_b) {
867 TEST_REQUIRES_ARM_NEON_V8;
868 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
869 VMulMicrokernelTester()
870 .batch_size(batch_size)
871 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800872 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700873 }
874 }
875
876 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, inplace_a_and_b) {
877 TEST_REQUIRES_ARM_NEON_V8;
878 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
879 VMulMicrokernelTester()
880 .batch_size(batch_size)
881 .inplace_a(true)
882 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -0800883 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700884 }
885 }
886
887 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, a_zero_point) {
888 TEST_REQUIRES_ARM_NEON_V8;
889 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
890 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
891 VMulMicrokernelTester()
892 .batch_size(batch_size)
893 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800894 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700895 }
896 }
897 }
898
899 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, b_zero_point) {
900 TEST_REQUIRES_ARM_NEON_V8;
901 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
902 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
903 VMulMicrokernelTester()
904 .batch_size(batch_size)
905 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800906 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700907 }
908 }
909 }
910
911 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, y_zero_point) {
912 TEST_REQUIRES_ARM_NEON_V8;
913 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
914 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
915 VMulMicrokernelTester()
916 .batch_size(batch_size)
917 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -0800918 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700919 }
920 }
921 }
922
923 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, a_scale) {
924 TEST_REQUIRES_ARM_NEON_V8;
925 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
926 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
927 VMulMicrokernelTester()
928 .batch_size(batch_size)
929 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800930 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700931 }
932 }
933 }
934
935 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, b_scale) {
936 TEST_REQUIRES_ARM_NEON_V8;
937 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
938 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
939 VMulMicrokernelTester()
940 .batch_size(batch_size)
941 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800942 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700943 }
944 }
945 }
946
947 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, y_scale) {
948 TEST_REQUIRES_ARM_NEON_V8;
949 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
950 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
951 VMulMicrokernelTester()
952 .batch_size(batch_size)
953 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -0800954 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700955 }
956 }
957 }
958
959 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, qmin) {
960 TEST_REQUIRES_ARM_NEON_V8;
961 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
962 VMulMicrokernelTester()
963 .batch_size(batch_size)
964 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800965 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700966 }
967 }
968
969 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, qmax) {
970 TEST_REQUIRES_ARM_NEON_V8;
971 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
972 VMulMicrokernelTester()
973 .batch_size(batch_size)
974 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800975 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan4a7b70f2021-08-02 18:18:10 -0700976 }
977 }
978#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
979
980
Marat Dukhana212eac2021-08-02 09:58:04 -0700981#if XNN_ARCH_X86 || XNN_ARCH_X86_64
982 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_eq_8) {
983 TEST_REQUIRES_X86_SSE2;
984 VMulMicrokernelTester()
985 .batch_size(8)
Marat Dukhan50323b82022-01-11 00:12:01 -0800986 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -0700987 }
988
989 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_div_8) {
990 TEST_REQUIRES_X86_SSE2;
991 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
992 VMulMicrokernelTester()
993 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -0800994 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -0700995 }
996 }
997
998 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_lt_8) {
999 TEST_REQUIRES_X86_SSE2;
1000 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1001 VMulMicrokernelTester()
1002 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001003 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001004 }
1005 }
1006
1007 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_gt_8) {
1008 TEST_REQUIRES_X86_SSE2;
1009 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1010 VMulMicrokernelTester()
1011 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001012 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001013 }
1014 }
1015
1016 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace_a) {
1017 TEST_REQUIRES_X86_SSE2;
1018 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1019 VMulMicrokernelTester()
1020 .batch_size(batch_size)
1021 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001022 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001023 }
1024 }
1025
1026 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace_b) {
1027 TEST_REQUIRES_X86_SSE2;
1028 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1029 VMulMicrokernelTester()
1030 .batch_size(batch_size)
1031 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001032 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001033 }
1034 }
1035
1036 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
1037 TEST_REQUIRES_X86_SSE2;
1038 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1039 VMulMicrokernelTester()
1040 .batch_size(batch_size)
1041 .inplace_a(true)
1042 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001043 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001044 }
1045 }
1046
1047 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_zero_point) {
1048 TEST_REQUIRES_X86_SSE2;
1049 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1050 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1051 VMulMicrokernelTester()
1052 .batch_size(batch_size)
1053 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001054 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001055 }
1056 }
1057 }
1058
1059 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_zero_point) {
1060 TEST_REQUIRES_X86_SSE2;
1061 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1062 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1063 VMulMicrokernelTester()
1064 .batch_size(batch_size)
1065 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001066 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001067 }
1068 }
1069 }
1070
1071 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_zero_point) {
1072 TEST_REQUIRES_X86_SSE2;
1073 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1074 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1075 VMulMicrokernelTester()
1076 .batch_size(batch_size)
1077 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001078 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001079 }
1080 }
1081 }
1082
1083 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_scale) {
1084 TEST_REQUIRES_X86_SSE2;
1085 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1086 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1087 VMulMicrokernelTester()
1088 .batch_size(batch_size)
1089 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001090 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001091 }
1092 }
1093 }
1094
1095 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_scale) {
1096 TEST_REQUIRES_X86_SSE2;
1097 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1098 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1099 VMulMicrokernelTester()
1100 .batch_size(batch_size)
1101 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001102 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001103 }
1104 }
1105 }
1106
1107 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_scale) {
1108 TEST_REQUIRES_X86_SSE2;
1109 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1110 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1111 VMulMicrokernelTester()
1112 .batch_size(batch_size)
1113 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001114 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001115 }
1116 }
1117 }
1118
1119 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmin) {
1120 TEST_REQUIRES_X86_SSE2;
1121 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1122 VMulMicrokernelTester()
1123 .batch_size(batch_size)
1124 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001125 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001126 }
1127 }
1128
1129 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmax) {
1130 TEST_REQUIRES_X86_SSE2;
1131 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1132 VMulMicrokernelTester()
1133 .batch_size(batch_size)
1134 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001135 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001136 }
1137 }
1138#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1139
1140
1141#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1142 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_eq_16) {
1143 TEST_REQUIRES_X86_SSE2;
1144 VMulMicrokernelTester()
1145 .batch_size(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001146 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001147 }
1148
1149 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_div_16) {
1150 TEST_REQUIRES_X86_SSE2;
1151 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1152 VMulMicrokernelTester()
1153 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001154 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001155 }
1156 }
1157
1158 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_lt_16) {
1159 TEST_REQUIRES_X86_SSE2;
1160 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1161 VMulMicrokernelTester()
1162 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001163 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001164 }
1165 }
1166
1167 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_gt_16) {
1168 TEST_REQUIRES_X86_SSE2;
1169 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1170 VMulMicrokernelTester()
1171 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001172 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001173 }
1174 }
1175
1176 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace_a) {
1177 TEST_REQUIRES_X86_SSE2;
1178 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1179 VMulMicrokernelTester()
1180 .batch_size(batch_size)
1181 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001182 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001183 }
1184 }
1185
1186 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace_b) {
1187 TEST_REQUIRES_X86_SSE2;
1188 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1189 VMulMicrokernelTester()
1190 .batch_size(batch_size)
1191 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001192 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001193 }
1194 }
1195
1196 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
1197 TEST_REQUIRES_X86_SSE2;
1198 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1199 VMulMicrokernelTester()
1200 .batch_size(batch_size)
1201 .inplace_a(true)
1202 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001203 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001204 }
1205 }
1206
1207 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_zero_point) {
1208 TEST_REQUIRES_X86_SSE2;
1209 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1210 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1211 VMulMicrokernelTester()
1212 .batch_size(batch_size)
1213 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001214 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001215 }
1216 }
1217 }
1218
1219 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_zero_point) {
1220 TEST_REQUIRES_X86_SSE2;
1221 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1222 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1223 VMulMicrokernelTester()
1224 .batch_size(batch_size)
1225 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001226 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001227 }
1228 }
1229 }
1230
1231 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_zero_point) {
1232 TEST_REQUIRES_X86_SSE2;
1233 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1234 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1235 VMulMicrokernelTester()
1236 .batch_size(batch_size)
1237 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001238 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001239 }
1240 }
1241 }
1242
1243 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_scale) {
1244 TEST_REQUIRES_X86_SSE2;
1245 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1246 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1247 VMulMicrokernelTester()
1248 .batch_size(batch_size)
1249 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001250 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001251 }
1252 }
1253 }
1254
1255 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_scale) {
1256 TEST_REQUIRES_X86_SSE2;
1257 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1258 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1259 VMulMicrokernelTester()
1260 .batch_size(batch_size)
1261 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001262 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001263 }
1264 }
1265 }
1266
1267 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_scale) {
1268 TEST_REQUIRES_X86_SSE2;
1269 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1270 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1271 VMulMicrokernelTester()
1272 .batch_size(batch_size)
1273 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001274 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001275 }
1276 }
1277 }
1278
1279 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmin) {
1280 TEST_REQUIRES_X86_SSE2;
1281 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1282 VMulMicrokernelTester()
1283 .batch_size(batch_size)
1284 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001285 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001286 }
1287 }
1288
1289 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmax) {
1290 TEST_REQUIRES_X86_SSE2;
1291 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1292 VMulMicrokernelTester()
1293 .batch_size(batch_size)
1294 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001295 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001296 }
1297 }
1298#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1299
1300
1301#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1302 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_eq_8) {
1303 TEST_REQUIRES_X86_SSE41;
1304 VMulMicrokernelTester()
1305 .batch_size(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001306 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001307 }
1308
1309 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_div_8) {
1310 TEST_REQUIRES_X86_SSE41;
1311 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1312 VMulMicrokernelTester()
1313 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001314 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001315 }
1316 }
1317
1318 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_lt_8) {
1319 TEST_REQUIRES_X86_SSE41;
1320 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1321 VMulMicrokernelTester()
1322 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001323 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001324 }
1325 }
1326
1327 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_gt_8) {
1328 TEST_REQUIRES_X86_SSE41;
1329 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1330 VMulMicrokernelTester()
1331 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001332 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001333 }
1334 }
1335
1336 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace_a) {
1337 TEST_REQUIRES_X86_SSE41;
1338 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1339 VMulMicrokernelTester()
1340 .batch_size(batch_size)
1341 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001342 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001343 }
1344 }
1345
1346 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace_b) {
1347 TEST_REQUIRES_X86_SSE41;
1348 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1349 VMulMicrokernelTester()
1350 .batch_size(batch_size)
1351 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001352 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001353 }
1354 }
1355
1356 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1357 TEST_REQUIRES_X86_SSE41;
1358 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1359 VMulMicrokernelTester()
1360 .batch_size(batch_size)
1361 .inplace_a(true)
1362 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001363 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001364 }
1365 }
1366
1367 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_zero_point) {
1368 TEST_REQUIRES_X86_SSE41;
1369 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1370 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1371 VMulMicrokernelTester()
1372 .batch_size(batch_size)
1373 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001374 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001375 }
1376 }
1377 }
1378
1379 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_zero_point) {
1380 TEST_REQUIRES_X86_SSE41;
1381 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1382 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1383 VMulMicrokernelTester()
1384 .batch_size(batch_size)
1385 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001386 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001387 }
1388 }
1389 }
1390
1391 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_zero_point) {
1392 TEST_REQUIRES_X86_SSE41;
1393 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1394 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1395 VMulMicrokernelTester()
1396 .batch_size(batch_size)
1397 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001398 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001399 }
1400 }
1401 }
1402
1403 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_scale) {
1404 TEST_REQUIRES_X86_SSE41;
1405 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1406 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1407 VMulMicrokernelTester()
1408 .batch_size(batch_size)
1409 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001410 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001411 }
1412 }
1413 }
1414
1415 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_scale) {
1416 TEST_REQUIRES_X86_SSE41;
1417 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1418 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1419 VMulMicrokernelTester()
1420 .batch_size(batch_size)
1421 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001422 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001423 }
1424 }
1425 }
1426
1427 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_scale) {
1428 TEST_REQUIRES_X86_SSE41;
1429 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1430 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1431 VMulMicrokernelTester()
1432 .batch_size(batch_size)
1433 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001434 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001435 }
1436 }
1437 }
1438
1439 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmin) {
1440 TEST_REQUIRES_X86_SSE41;
1441 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1442 VMulMicrokernelTester()
1443 .batch_size(batch_size)
1444 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001445 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001446 }
1447 }
1448
1449 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmax) {
1450 TEST_REQUIRES_X86_SSE41;
1451 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1452 VMulMicrokernelTester()
1453 .batch_size(batch_size)
1454 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001455 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001456 }
1457 }
1458#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1459
1460
1461#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1462 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_eq_16) {
1463 TEST_REQUIRES_X86_SSE41;
1464 VMulMicrokernelTester()
1465 .batch_size(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001466 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001467 }
1468
1469 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_div_16) {
1470 TEST_REQUIRES_X86_SSE41;
1471 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1472 VMulMicrokernelTester()
1473 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001474 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001475 }
1476 }
1477
1478 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_lt_16) {
1479 TEST_REQUIRES_X86_SSE41;
1480 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1481 VMulMicrokernelTester()
1482 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001483 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001484 }
1485 }
1486
1487 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_gt_16) {
1488 TEST_REQUIRES_X86_SSE41;
1489 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1490 VMulMicrokernelTester()
1491 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001492 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001493 }
1494 }
1495
1496 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace_a) {
1497 TEST_REQUIRES_X86_SSE41;
1498 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1499 VMulMicrokernelTester()
1500 .batch_size(batch_size)
1501 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001502 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001503 }
1504 }
1505
1506 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace_b) {
1507 TEST_REQUIRES_X86_SSE41;
1508 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1509 VMulMicrokernelTester()
1510 .batch_size(batch_size)
1511 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001512 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001513 }
1514 }
1515
1516 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1517 TEST_REQUIRES_X86_SSE41;
1518 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1519 VMulMicrokernelTester()
1520 .batch_size(batch_size)
1521 .inplace_a(true)
1522 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001523 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001524 }
1525 }
1526
1527 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_zero_point) {
1528 TEST_REQUIRES_X86_SSE41;
1529 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1530 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1531 VMulMicrokernelTester()
1532 .batch_size(batch_size)
1533 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001534 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001535 }
1536 }
1537 }
1538
1539 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_zero_point) {
1540 TEST_REQUIRES_X86_SSE41;
1541 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1542 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1543 VMulMicrokernelTester()
1544 .batch_size(batch_size)
1545 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001546 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001547 }
1548 }
1549 }
1550
1551 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_zero_point) {
1552 TEST_REQUIRES_X86_SSE41;
1553 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1554 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1555 VMulMicrokernelTester()
1556 .batch_size(batch_size)
1557 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001558 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001559 }
1560 }
1561 }
1562
1563 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_scale) {
1564 TEST_REQUIRES_X86_SSE41;
1565 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1566 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1567 VMulMicrokernelTester()
1568 .batch_size(batch_size)
1569 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001570 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001571 }
1572 }
1573 }
1574
1575 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_scale) {
1576 TEST_REQUIRES_X86_SSE41;
1577 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1578 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1579 VMulMicrokernelTester()
1580 .batch_size(batch_size)
1581 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001582 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001583 }
1584 }
1585 }
1586
1587 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_scale) {
1588 TEST_REQUIRES_X86_SSE41;
1589 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1590 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1591 VMulMicrokernelTester()
1592 .batch_size(batch_size)
1593 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001594 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001595 }
1596 }
1597 }
1598
1599 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmin) {
1600 TEST_REQUIRES_X86_SSE41;
1601 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1602 VMulMicrokernelTester()
1603 .batch_size(batch_size)
1604 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001605 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001606 }
1607 }
1608
1609 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmax) {
1610 TEST_REQUIRES_X86_SSE41;
1611 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1612 VMulMicrokernelTester()
1613 .batch_size(batch_size)
1614 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001615 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001616 }
1617 }
1618#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1619
1620
1621#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1622 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_eq_8) {
1623 TEST_REQUIRES_X86_AVX;
1624 VMulMicrokernelTester()
1625 .batch_size(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001626 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001627 }
1628
1629 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_div_8) {
1630 TEST_REQUIRES_X86_AVX;
1631 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1632 VMulMicrokernelTester()
1633 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001634 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001635 }
1636 }
1637
1638 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_lt_8) {
1639 TEST_REQUIRES_X86_AVX;
1640 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1641 VMulMicrokernelTester()
1642 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001643 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001644 }
1645 }
1646
1647 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_gt_8) {
1648 TEST_REQUIRES_X86_AVX;
1649 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1650 VMulMicrokernelTester()
1651 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001652 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001653 }
1654 }
1655
1656 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace_a) {
1657 TEST_REQUIRES_X86_AVX;
1658 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1659 VMulMicrokernelTester()
1660 .batch_size(batch_size)
1661 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001662 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001663 }
1664 }
1665
1666 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace_b) {
1667 TEST_REQUIRES_X86_AVX;
1668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1669 VMulMicrokernelTester()
1670 .batch_size(batch_size)
1671 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001672 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001673 }
1674 }
1675
1676 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace_a_and_b) {
1677 TEST_REQUIRES_X86_AVX;
1678 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1679 VMulMicrokernelTester()
1680 .batch_size(batch_size)
1681 .inplace_a(true)
1682 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001683 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001684 }
1685 }
1686
1687 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, a_zero_point) {
1688 TEST_REQUIRES_X86_AVX;
1689 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1690 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1691 VMulMicrokernelTester()
1692 .batch_size(batch_size)
1693 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001694 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001695 }
1696 }
1697 }
1698
1699 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, b_zero_point) {
1700 TEST_REQUIRES_X86_AVX;
1701 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1702 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1703 VMulMicrokernelTester()
1704 .batch_size(batch_size)
1705 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001706 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001707 }
1708 }
1709 }
1710
1711 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, y_zero_point) {
1712 TEST_REQUIRES_X86_AVX;
1713 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1714 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1715 VMulMicrokernelTester()
1716 .batch_size(batch_size)
1717 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001718 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001719 }
1720 }
1721 }
1722
1723 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, a_scale) {
1724 TEST_REQUIRES_X86_AVX;
1725 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1727 VMulMicrokernelTester()
1728 .batch_size(batch_size)
1729 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001730 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001731 }
1732 }
1733 }
1734
1735 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, b_scale) {
1736 TEST_REQUIRES_X86_AVX;
1737 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1738 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1739 VMulMicrokernelTester()
1740 .batch_size(batch_size)
1741 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001742 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001743 }
1744 }
1745 }
1746
1747 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, y_scale) {
1748 TEST_REQUIRES_X86_AVX;
1749 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1750 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1751 VMulMicrokernelTester()
1752 .batch_size(batch_size)
1753 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001754 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001755 }
1756 }
1757 }
1758
1759 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, qmin) {
1760 TEST_REQUIRES_X86_AVX;
1761 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1762 VMulMicrokernelTester()
1763 .batch_size(batch_size)
1764 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001765 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001766 }
1767 }
1768
1769 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, qmax) {
1770 TEST_REQUIRES_X86_AVX;
1771 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772 VMulMicrokernelTester()
1773 .batch_size(batch_size)
1774 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001775 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001776 }
1777 }
1778#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779
1780
1781#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1782 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_eq_16) {
1783 TEST_REQUIRES_X86_AVX;
1784 VMulMicrokernelTester()
1785 .batch_size(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001786 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001787 }
1788
1789 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_div_16) {
1790 TEST_REQUIRES_X86_AVX;
1791 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1792 VMulMicrokernelTester()
1793 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001794 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001795 }
1796 }
1797
1798 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_lt_16) {
1799 TEST_REQUIRES_X86_AVX;
1800 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1801 VMulMicrokernelTester()
1802 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001803 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001804 }
1805 }
1806
1807 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_gt_16) {
1808 TEST_REQUIRES_X86_AVX;
1809 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1810 VMulMicrokernelTester()
1811 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001812 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001813 }
1814 }
1815
1816 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace_a) {
1817 TEST_REQUIRES_X86_AVX;
1818 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1819 VMulMicrokernelTester()
1820 .batch_size(batch_size)
1821 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001822 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001823 }
1824 }
1825
1826 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace_b) {
1827 TEST_REQUIRES_X86_AVX;
1828 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1829 VMulMicrokernelTester()
1830 .batch_size(batch_size)
1831 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001832 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001833 }
1834 }
1835
1836 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace_a_and_b) {
1837 TEST_REQUIRES_X86_AVX;
1838 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1839 VMulMicrokernelTester()
1840 .batch_size(batch_size)
1841 .inplace_a(true)
1842 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001843 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001844 }
1845 }
1846
1847 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, a_zero_point) {
1848 TEST_REQUIRES_X86_AVX;
1849 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1850 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1851 VMulMicrokernelTester()
1852 .batch_size(batch_size)
1853 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001854 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001855 }
1856 }
1857 }
1858
1859 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, b_zero_point) {
1860 TEST_REQUIRES_X86_AVX;
1861 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1862 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1863 VMulMicrokernelTester()
1864 .batch_size(batch_size)
1865 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001866 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001867 }
1868 }
1869 }
1870
1871 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, y_zero_point) {
1872 TEST_REQUIRES_X86_AVX;
1873 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1874 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1875 VMulMicrokernelTester()
1876 .batch_size(batch_size)
1877 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08001878 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001879 }
1880 }
1881 }
1882
1883 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, a_scale) {
1884 TEST_REQUIRES_X86_AVX;
1885 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1886 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1887 VMulMicrokernelTester()
1888 .batch_size(batch_size)
1889 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001890 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001891 }
1892 }
1893 }
1894
1895 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, b_scale) {
1896 TEST_REQUIRES_X86_AVX;
1897 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1898 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1899 VMulMicrokernelTester()
1900 .batch_size(batch_size)
1901 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001902 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001903 }
1904 }
1905 }
1906
1907 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, y_scale) {
1908 TEST_REQUIRES_X86_AVX;
1909 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1910 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1911 VMulMicrokernelTester()
1912 .batch_size(batch_size)
1913 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08001914 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001915 }
1916 }
1917 }
1918
1919 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, qmin) {
1920 TEST_REQUIRES_X86_AVX;
1921 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1922 VMulMicrokernelTester()
1923 .batch_size(batch_size)
1924 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001925 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001926 }
1927 }
1928
1929 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, qmax) {
1930 TEST_REQUIRES_X86_AVX;
1931 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932 VMulMicrokernelTester()
1933 .batch_size(batch_size)
1934 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001935 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhana212eac2021-08-02 09:58:04 -07001936 }
1937 }
1938#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan661ea6d2021-08-02 11:25:41 -07001939
1940
Marat Dukhan4c617792021-12-21 15:47:58 -08001941#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan661ea6d2021-08-02 11:25:41 -07001942 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_eq_8) {
1943 VMulMicrokernelTester()
1944 .batch_size(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001945 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07001946 }
1947
1948 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_div_8) {
1949 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1950 VMulMicrokernelTester()
1951 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001952 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07001953 }
1954 }
1955
1956 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_lt_8) {
1957 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1958 VMulMicrokernelTester()
1959 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001960 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07001961 }
1962 }
1963
1964 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_gt_8) {
1965 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1966 VMulMicrokernelTester()
1967 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08001968 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07001969 }
1970 }
1971
1972 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace_a) {
1973 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1974 VMulMicrokernelTester()
1975 .batch_size(batch_size)
1976 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001977 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07001978 }
1979 }
1980
1981 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace_b) {
1982 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1983 VMulMicrokernelTester()
1984 .batch_size(batch_size)
1985 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001986 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07001987 }
1988 }
1989
1990 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace_a_and_b) {
1991 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1992 VMulMicrokernelTester()
1993 .batch_size(batch_size)
1994 .inplace_a(true)
1995 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08001996 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07001997 }
1998 }
1999
2000 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_zero_point) {
2001 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2002 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2003 VMulMicrokernelTester()
2004 .batch_size(batch_size)
2005 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002006 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002007 }
2008 }
2009 }
2010
2011 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_zero_point) {
2012 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2013 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2014 VMulMicrokernelTester()
2015 .batch_size(batch_size)
2016 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002017 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002018 }
2019 }
2020 }
2021
2022 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_zero_point) {
2023 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2024 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2025 VMulMicrokernelTester()
2026 .batch_size(batch_size)
2027 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002028 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002029 }
2030 }
2031 }
2032
2033 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_scale) {
2034 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2035 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2036 VMulMicrokernelTester()
2037 .batch_size(batch_size)
2038 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002039 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002040 }
2041 }
2042 }
2043
2044 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_scale) {
2045 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2046 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2047 VMulMicrokernelTester()
2048 .batch_size(batch_size)
2049 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002050 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002051 }
2052 }
2053 }
2054
2055 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_scale) {
2056 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2057 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2058 VMulMicrokernelTester()
2059 .batch_size(batch_size)
2060 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002061 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002062 }
2063 }
2064 }
2065
2066 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmin) {
2067 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2068 VMulMicrokernelTester()
2069 .batch_size(batch_size)
2070 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002071 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002072 }
2073 }
2074
2075 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmax) {
2076 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2077 VMulMicrokernelTester()
2078 .batch_size(batch_size)
2079 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002080 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002081 }
2082 }
Marat Dukhan4c617792021-12-21 15:47:58 -08002083#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002084
2085
Marat Dukhan4c617792021-12-21 15:47:58 -08002086#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002087 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_eq_16) {
2088 VMulMicrokernelTester()
2089 .batch_size(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002090 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002091 }
2092
2093 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_div_16) {
2094 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2095 VMulMicrokernelTester()
2096 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002097 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002098 }
2099 }
2100
2101 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_lt_16) {
2102 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2103 VMulMicrokernelTester()
2104 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002105 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002106 }
2107 }
2108
2109 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_gt_16) {
2110 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2111 VMulMicrokernelTester()
2112 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002113 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002114 }
2115 }
2116
2117 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace_a) {
2118 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2119 VMulMicrokernelTester()
2120 .batch_size(batch_size)
2121 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002122 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002123 }
2124 }
2125
2126 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace_b) {
2127 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2128 VMulMicrokernelTester()
2129 .batch_size(batch_size)
2130 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002131 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002132 }
2133 }
2134
2135 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace_a_and_b) {
2136 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2137 VMulMicrokernelTester()
2138 .batch_size(batch_size)
2139 .inplace_a(true)
2140 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002141 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002142 }
2143 }
2144
2145 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_zero_point) {
2146 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2147 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2148 VMulMicrokernelTester()
2149 .batch_size(batch_size)
2150 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002151 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002152 }
2153 }
2154 }
2155
2156 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_zero_point) {
2157 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2158 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2159 VMulMicrokernelTester()
2160 .batch_size(batch_size)
2161 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002162 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002163 }
2164 }
2165 }
2166
2167 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_zero_point) {
2168 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2169 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2170 VMulMicrokernelTester()
2171 .batch_size(batch_size)
2172 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002173 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002174 }
2175 }
2176 }
2177
2178 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_scale) {
2179 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2180 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2181 VMulMicrokernelTester()
2182 .batch_size(batch_size)
2183 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002184 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002185 }
2186 }
2187 }
2188
2189 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_scale) {
2190 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2191 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2192 VMulMicrokernelTester()
2193 .batch_size(batch_size)
2194 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002195 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002196 }
2197 }
2198 }
2199
2200 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_scale) {
2201 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2202 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2203 VMulMicrokernelTester()
2204 .batch_size(batch_size)
2205 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002206 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002207 }
2208 }
2209 }
2210
2211 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmin) {
2212 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2213 VMulMicrokernelTester()
2214 .batch_size(batch_size)
2215 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002216 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002217 }
2218 }
2219
2220 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmax) {
2221 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2222 VMulMicrokernelTester()
2223 .batch_size(batch_size)
2224 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002225 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan661ea6d2021-08-02 11:25:41 -07002226 }
2227 }
Marat Dukhan4c617792021-12-21 15:47:58 -08002228#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan79993412021-08-02 15:02:57 -07002229
2230
2231TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, batch_eq_1) {
2232 VMulMicrokernelTester()
2233 .batch_size(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002234 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002235}
2236
2237TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, batch_gt_1) {
2238 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
2239 VMulMicrokernelTester()
2240 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002241 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002242 }
2243}
2244
2245TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, inplace_a) {
2246 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2247 VMulMicrokernelTester()
2248 .batch_size(batch_size)
2249 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002250 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002251 }
2252}
2253
2254TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, inplace_b) {
2255 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2256 VMulMicrokernelTester()
2257 .batch_size(batch_size)
2258 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002259 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002260 }
2261}
2262
2263TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, inplace_a_and_b) {
2264 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2265 VMulMicrokernelTester()
2266 .batch_size(batch_size)
2267 .inplace_a(true)
2268 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002269 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002270 }
2271}
2272
2273TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, a_zero_point) {
2274 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2275 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2276 VMulMicrokernelTester()
2277 .batch_size(batch_size)
2278 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002279 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002280 }
2281 }
2282}
2283
2284TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, b_zero_point) {
2285 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2286 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2287 VMulMicrokernelTester()
2288 .batch_size(batch_size)
2289 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002290 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002291 }
2292 }
2293}
2294
2295TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, y_zero_point) {
2296 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2297 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2298 VMulMicrokernelTester()
2299 .batch_size(batch_size)
2300 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002301 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002302 }
2303 }
2304}
2305
2306TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, a_scale) {
2307 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2308 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2309 VMulMicrokernelTester()
2310 .batch_size(batch_size)
2311 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002312 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002313 }
2314 }
2315}
2316
2317TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, b_scale) {
2318 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2319 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2320 VMulMicrokernelTester()
2321 .batch_size(batch_size)
2322 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002323 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002324 }
2325 }
2326}
2327
2328TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, y_scale) {
2329 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2330 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2331 VMulMicrokernelTester()
2332 .batch_size(batch_size)
2333 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002334 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002335 }
2336 }
2337}
2338
2339TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, qmin) {
2340 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2341 VMulMicrokernelTester()
2342 .batch_size(batch_size)
2343 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002344 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002345 }
2346}
2347
2348TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, qmax) {
2349 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2350 VMulMicrokernelTester()
2351 .batch_size(batch_size)
2352 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002353 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002354 }
2355}
2356
2357TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_eq_2) {
2358 VMulMicrokernelTester()
2359 .batch_size(2)
Marat Dukhan50323b82022-01-11 00:12:01 -08002360 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002361}
2362
2363TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_div_2) {
2364 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
2365 VMulMicrokernelTester()
2366 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002367 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002368 }
2369}
2370
2371TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_lt_2) {
2372 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
2373 VMulMicrokernelTester()
2374 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002375 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002376 }
2377}
2378
2379TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_gt_2) {
2380 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
2381 VMulMicrokernelTester()
2382 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002383 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002384 }
2385}
2386
2387TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, inplace_a) {
2388 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2389 VMulMicrokernelTester()
2390 .batch_size(batch_size)
2391 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002392 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002393 }
2394}
2395
2396TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, inplace_b) {
2397 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2398 VMulMicrokernelTester()
2399 .batch_size(batch_size)
2400 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002401 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002402 }
2403}
2404
2405TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, inplace_a_and_b) {
2406 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2407 VMulMicrokernelTester()
2408 .batch_size(batch_size)
2409 .inplace_a(true)
2410 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002411 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002412 }
2413}
2414
2415TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, a_zero_point) {
2416 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2417 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2418 VMulMicrokernelTester()
2419 .batch_size(batch_size)
2420 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002421 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002422 }
2423 }
2424}
2425
2426TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, b_zero_point) {
2427 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2428 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2429 VMulMicrokernelTester()
2430 .batch_size(batch_size)
2431 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002432 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002433 }
2434 }
2435}
2436
2437TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, y_zero_point) {
2438 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2439 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2440 VMulMicrokernelTester()
2441 .batch_size(batch_size)
2442 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002443 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002444 }
2445 }
2446}
2447
2448TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, a_scale) {
2449 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2450 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2451 VMulMicrokernelTester()
2452 .batch_size(batch_size)
2453 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002454 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002455 }
2456 }
2457}
2458
2459TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, b_scale) {
2460 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2461 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2462 VMulMicrokernelTester()
2463 .batch_size(batch_size)
2464 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002465 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002466 }
2467 }
2468}
2469
2470TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, y_scale) {
2471 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2472 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2473 VMulMicrokernelTester()
2474 .batch_size(batch_size)
2475 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002476 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002477 }
2478 }
2479}
2480
2481TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, qmin) {
2482 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2483 VMulMicrokernelTester()
2484 .batch_size(batch_size)
2485 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002486 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002487 }
2488}
2489
2490TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, qmax) {
2491 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2492 VMulMicrokernelTester()
2493 .batch_size(batch_size)
2494 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002495 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002496 }
2497}
2498
2499TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_eq_4) {
2500 VMulMicrokernelTester()
2501 .batch_size(4)
Marat Dukhan50323b82022-01-11 00:12:01 -08002502 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002503}
2504
2505TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_div_4) {
2506 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2507 VMulMicrokernelTester()
2508 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002509 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002510 }
2511}
2512
2513TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_lt_4) {
2514 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2515 VMulMicrokernelTester()
2516 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002517 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002518 }
2519}
2520
2521TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_gt_4) {
2522 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2523 VMulMicrokernelTester()
2524 .batch_size(batch_size)
Marat Dukhan50323b82022-01-11 00:12:01 -08002525 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002526 }
2527}
2528
2529TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, inplace_a) {
2530 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2531 VMulMicrokernelTester()
2532 .batch_size(batch_size)
2533 .inplace_a(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002534 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002535 }
2536}
2537
2538TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, inplace_b) {
2539 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2540 VMulMicrokernelTester()
2541 .batch_size(batch_size)
2542 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002543 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002544 }
2545}
2546
2547TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, inplace_a_and_b) {
2548 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2549 VMulMicrokernelTester()
2550 .batch_size(batch_size)
2551 .inplace_a(true)
2552 .inplace_b(true)
Marat Dukhan50323b82022-01-11 00:12:01 -08002553 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002554 }
2555}
2556
2557TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, a_zero_point) {
2558 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2559 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2560 VMulMicrokernelTester()
2561 .batch_size(batch_size)
2562 .a_zero_point(a_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002563 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002564 }
2565 }
2566}
2567
2568TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, b_zero_point) {
2569 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2570 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2571 VMulMicrokernelTester()
2572 .batch_size(batch_size)
2573 .b_zero_point(b_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002574 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002575 }
2576 }
2577}
2578
2579TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, y_zero_point) {
2580 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2581 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2582 VMulMicrokernelTester()
2583 .batch_size(batch_size)
2584 .y_zero_point(y_zero_point)
Marat Dukhan50323b82022-01-11 00:12:01 -08002585 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002586 }
2587 }
2588}
2589
2590TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, a_scale) {
2591 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2592 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2593 VMulMicrokernelTester()
2594 .batch_size(batch_size)
2595 .a_scale(a_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002596 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002597 }
2598 }
2599}
2600
2601TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, b_scale) {
2602 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2603 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2604 VMulMicrokernelTester()
2605 .batch_size(batch_size)
2606 .b_scale(b_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002607 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002608 }
2609 }
2610}
2611
2612TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, y_scale) {
2613 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2614 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2615 VMulMicrokernelTester()
2616 .batch_size(batch_size)
2617 .y_scale(y_scale)
Marat Dukhan50323b82022-01-11 00:12:01 -08002618 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002619 }
2620 }
2621}
2622
2623TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, qmin) {
2624 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2625 VMulMicrokernelTester()
2626 .batch_size(batch_size)
2627 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002628 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002629 }
2630}
2631
2632TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, qmax) {
2633 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2634 VMulMicrokernelTester()
2635 .batch_size(batch_size)
2636 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002637 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
Marat Dukhan79993412021-08-02 15:02:57 -07002638 }
2639}