blob: 9c016d50e56c9af935a7144be995540c5b2e55e8 [file] [log] [blame]
Marat Dukhanf4db2f32020-06-30 10:55:30 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5//
6// Auto-generated file. Do not edit!
7// Specification: test/f32-vsqrt.yaml
8// Generator: tools/generate-vunary-test.py
9
10
11#include <gtest/gtest.h>
12
13#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
16#include <xnnpack/vunary.h>
17#include "vunary-microkernel-tester.h"
18
19
20#if XNN_ARCH_ARM64
21 TEST(F32_VSQRT__NEON_SQRT_X4, batch_eq_4) {
22 TEST_REQUIRES_ARM_NEON;
23 VUnOpMicrokernelTester()
24 .batch_size(4)
25 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
26 }
27
28 TEST(F32_VSQRT__NEON_SQRT_X4, batch_div_4) {
29 TEST_REQUIRES_ARM_NEON;
30 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
31 VUnOpMicrokernelTester()
32 .batch_size(batch_size)
33 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
34 }
35 }
36
37 TEST(F32_VSQRT__NEON_SQRT_X4, batch_lt_4) {
38 TEST_REQUIRES_ARM_NEON;
39 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
40 VUnOpMicrokernelTester()
41 .batch_size(batch_size)
42 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
43 }
44 }
45
46 TEST(F32_VSQRT__NEON_SQRT_X4, batch_gt_4) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
49 VUnOpMicrokernelTester()
50 .batch_size(batch_size)
51 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
52 }
53 }
54
55 TEST(F32_VSQRT__NEON_SQRT_X4, inplace) {
56 TEST_REQUIRES_ARM_NEON;
57 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
58 VUnOpMicrokernelTester()
59 .batch_size(batch_size)
60 .inplace(true)
61 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
62 }
63 }
64#endif // XNN_ARCH_ARM64
65
66
67#if XNN_ARCH_ARM64
68 TEST(F32_VSQRT__NEON_SQRT_X8, batch_eq_8) {
69 TEST_REQUIRES_ARM_NEON;
70 VUnOpMicrokernelTester()
71 .batch_size(8)
72 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
73 }
74
75 TEST(F32_VSQRT__NEON_SQRT_X8, batch_div_8) {
76 TEST_REQUIRES_ARM_NEON;
77 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
78 VUnOpMicrokernelTester()
79 .batch_size(batch_size)
80 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
81 }
82 }
83
84 TEST(F32_VSQRT__NEON_SQRT_X8, batch_lt_8) {
85 TEST_REQUIRES_ARM_NEON;
86 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
87 VUnOpMicrokernelTester()
88 .batch_size(batch_size)
89 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
90 }
91 }
92
93 TEST(F32_VSQRT__NEON_SQRT_X8, batch_gt_8) {
94 TEST_REQUIRES_ARM_NEON;
95 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
96 VUnOpMicrokernelTester()
97 .batch_size(batch_size)
98 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
99 }
100 }
101
102 TEST(F32_VSQRT__NEON_SQRT_X8, inplace) {
103 TEST_REQUIRES_ARM_NEON;
104 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105 VUnOpMicrokernelTester()
106 .batch_size(batch_size)
107 .inplace(true)
108 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neon_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
109 }
110 }
111#endif // XNN_ARCH_ARM64
112
113
114#if XNN_ARCH_ARM || XNN_ARCH_ARM64
115 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_eq_4) {
116 TEST_REQUIRES_ARM_NEON_FMA;
117 VUnOpMicrokernelTester()
118 .batch_size(4)
119 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
120 }
121
122 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_div_4) {
123 TEST_REQUIRES_ARM_NEON_FMA;
124 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
125 VUnOpMicrokernelTester()
126 .batch_size(batch_size)
127 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
128 }
129 }
130
131 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_lt_4) {
132 TEST_REQUIRES_ARM_NEON_FMA;
133 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
134 VUnOpMicrokernelTester()
135 .batch_size(batch_size)
136 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
137 }
138 }
139
140 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_gt_4) {
141 TEST_REQUIRES_ARM_NEON_FMA;
142 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
143 VUnOpMicrokernelTester()
144 .batch_size(batch_size)
145 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
146 }
147 }
148
149 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, inplace) {
150 TEST_REQUIRES_ARM_NEON_FMA;
151 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
152 VUnOpMicrokernelTester()
153 .batch_size(batch_size)
154 .inplace(true)
155 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
156 }
157 }
158#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
159
160
161#if XNN_ARCH_ARM || XNN_ARCH_ARM64
162 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_eq_8) {
163 TEST_REQUIRES_ARM_NEON_FMA;
164 VUnOpMicrokernelTester()
165 .batch_size(8)
166 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
167 }
168
169 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_div_8) {
170 TEST_REQUIRES_ARM_NEON_FMA;
171 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
172 VUnOpMicrokernelTester()
173 .batch_size(batch_size)
174 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
175 }
176 }
177
178 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_lt_8) {
179 TEST_REQUIRES_ARM_NEON_FMA;
180 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
181 VUnOpMicrokernelTester()
182 .batch_size(batch_size)
183 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
184 }
185 }
186
187 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_gt_8) {
188 TEST_REQUIRES_ARM_NEON_FMA;
189 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
190 VUnOpMicrokernelTester()
191 .batch_size(batch_size)
192 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
193 }
194 }
195
196 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, inplace) {
197 TEST_REQUIRES_ARM_NEON_FMA;
198 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
199 VUnOpMicrokernelTester()
200 .batch_size(batch_size)
201 .inplace(true)
202 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
203 }
204 }
205#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
206
207
208#if XNN_ARCH_ARM || XNN_ARCH_ARM64
209 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_eq_12) {
210 TEST_REQUIRES_ARM_NEON_FMA;
211 VUnOpMicrokernelTester()
212 .batch_size(12)
213 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
214 }
215
216 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_div_12) {
217 TEST_REQUIRES_ARM_NEON_FMA;
218 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
219 VUnOpMicrokernelTester()
220 .batch_size(batch_size)
221 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
222 }
223 }
224
225 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_lt_12) {
226 TEST_REQUIRES_ARM_NEON_FMA;
227 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
228 VUnOpMicrokernelTester()
229 .batch_size(batch_size)
230 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
231 }
232 }
233
234 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_gt_12) {
235 TEST_REQUIRES_ARM_NEON_FMA;
236 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
237 VUnOpMicrokernelTester()
238 .batch_size(batch_size)
239 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
240 }
241 }
242
243 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, inplace) {
244 TEST_REQUIRES_ARM_NEON_FMA;
245 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
246 VUnOpMicrokernelTester()
247 .batch_size(batch_size)
248 .inplace(true)
249 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
250 }
251 }
252#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
253
254
255#if XNN_ARCH_ARM || XNN_ARCH_ARM64
256 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_eq_16) {
257 TEST_REQUIRES_ARM_NEON_FMA;
258 VUnOpMicrokernelTester()
259 .batch_size(16)
260 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
261 }
262
263 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_div_16) {
264 TEST_REQUIRES_ARM_NEON_FMA;
265 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
266 VUnOpMicrokernelTester()
267 .batch_size(batch_size)
268 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
269 }
270 }
271
272 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_lt_16) {
273 TEST_REQUIRES_ARM_NEON_FMA;
274 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
275 VUnOpMicrokernelTester()
276 .batch_size(batch_size)
277 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
278 }
279 }
280
281 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_gt_16) {
282 TEST_REQUIRES_ARM_NEON_FMA;
283 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
284 VUnOpMicrokernelTester()
285 .batch_size(batch_size)
286 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
287 }
288 }
289
290 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, inplace) {
291 TEST_REQUIRES_ARM_NEON_FMA;
292 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
293 VUnOpMicrokernelTester()
294 .batch_size(batch_size)
295 .inplace(true)
296 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
297 }
298 }
299#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
300
301
302#if XNN_ARCH_ARM || XNN_ARCH_ARM64
303 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_eq_20) {
304 TEST_REQUIRES_ARM_NEON_FMA;
305 VUnOpMicrokernelTester()
306 .batch_size(20)
307 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
308 }
309
310 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_div_20) {
311 TEST_REQUIRES_ARM_NEON_FMA;
312 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
313 VUnOpMicrokernelTester()
314 .batch_size(batch_size)
315 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
316 }
317 }
318
319 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_lt_20) {
320 TEST_REQUIRES_ARM_NEON_FMA;
321 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
322 VUnOpMicrokernelTester()
323 .batch_size(batch_size)
324 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
325 }
326 }
327
328 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_gt_20) {
329 TEST_REQUIRES_ARM_NEON_FMA;
330 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
331 VUnOpMicrokernelTester()
332 .batch_size(batch_size)
333 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
334 }
335 }
336
337 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, inplace) {
338 TEST_REQUIRES_ARM_NEON_FMA;
339 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
340 VUnOpMicrokernelTester()
341 .batch_size(batch_size)
342 .inplace(true)
343 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
344 }
345 }
346#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
347
348
349#if XNN_ARCH_ARM || XNN_ARCH_ARM64
350 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_eq_24) {
351 TEST_REQUIRES_ARM_NEON_FMA;
352 VUnOpMicrokernelTester()
353 .batch_size(24)
354 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
355 }
356
357 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_div_24) {
358 TEST_REQUIRES_ARM_NEON_FMA;
359 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
360 VUnOpMicrokernelTester()
361 .batch_size(batch_size)
362 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
363 }
364 }
365
366 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_lt_24) {
367 TEST_REQUIRES_ARM_NEON_FMA;
368 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
369 VUnOpMicrokernelTester()
370 .batch_size(batch_size)
371 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
372 }
373 }
374
375 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_gt_24) {
376 TEST_REQUIRES_ARM_NEON_FMA;
377 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
378 VUnOpMicrokernelTester()
379 .batch_size(batch_size)
380 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
381 }
382 }
383
384 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, inplace) {
385 TEST_REQUIRES_ARM_NEON_FMA;
386 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
387 VUnOpMicrokernelTester()
388 .batch_size(batch_size)
389 .inplace(true)
390 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
391 }
392 }
393#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
394
395
396#if XNN_ARCH_ARM || XNN_ARCH_ARM64
397 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_eq_28) {
398 TEST_REQUIRES_ARM_NEON_FMA;
399 VUnOpMicrokernelTester()
400 .batch_size(28)
401 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
402 }
403
404 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_div_28) {
405 TEST_REQUIRES_ARM_NEON_FMA;
406 for (size_t batch_size = 56; batch_size < 280; batch_size += 28) {
407 VUnOpMicrokernelTester()
408 .batch_size(batch_size)
409 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
410 }
411 }
412
413 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_lt_28) {
414 TEST_REQUIRES_ARM_NEON_FMA;
415 for (size_t batch_size = 1; batch_size < 28; batch_size++) {
416 VUnOpMicrokernelTester()
417 .batch_size(batch_size)
418 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
419 }
420 }
421
422 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_gt_28) {
423 TEST_REQUIRES_ARM_NEON_FMA;
424 for (size_t batch_size = 29; batch_size < 56; batch_size++) {
425 VUnOpMicrokernelTester()
426 .batch_size(batch_size)
427 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
428 }
429 }
430
431 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, inplace) {
432 TEST_REQUIRES_ARM_NEON_FMA;
433 for (size_t batch_size = 1; batch_size <= 140; batch_size += 27) {
434 VUnOpMicrokernelTester()
435 .batch_size(batch_size)
436 .inplace(true)
437 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
438 }
439 }
440#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
441
442
443#if XNN_ARCH_ARM || XNN_ARCH_ARM64
444 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_eq_32) {
445 TEST_REQUIRES_ARM_NEON_FMA;
446 VUnOpMicrokernelTester()
447 .batch_size(32)
448 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
449 }
450
451 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_div_32) {
452 TEST_REQUIRES_ARM_NEON_FMA;
453 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
454 VUnOpMicrokernelTester()
455 .batch_size(batch_size)
456 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
457 }
458 }
459
460 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_lt_32) {
461 TEST_REQUIRES_ARM_NEON_FMA;
462 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
463 VUnOpMicrokernelTester()
464 .batch_size(batch_size)
465 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
466 }
467 }
468
469 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_gt_32) {
470 TEST_REQUIRES_ARM_NEON_FMA;
471 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
472 VUnOpMicrokernelTester()
473 .batch_size(batch_size)
474 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
475 }
476 }
477
478 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, inplace) {
479 TEST_REQUIRES_ARM_NEON_FMA;
480 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
481 VUnOpMicrokernelTester()
482 .batch_size(batch_size)
483 .inplace(true)
484 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
485 }
486 }
487#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
488
489
490#if XNN_ARCH_ARM || XNN_ARCH_ARM64
491 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_eq_36) {
492 TEST_REQUIRES_ARM_NEON_FMA;
493 VUnOpMicrokernelTester()
494 .batch_size(36)
495 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
496 }
497
498 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_div_36) {
499 TEST_REQUIRES_ARM_NEON_FMA;
500 for (size_t batch_size = 72; batch_size < 360; batch_size += 36) {
501 VUnOpMicrokernelTester()
502 .batch_size(batch_size)
503 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
504 }
505 }
506
507 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_lt_36) {
508 TEST_REQUIRES_ARM_NEON_FMA;
509 for (size_t batch_size = 1; batch_size < 36; batch_size++) {
510 VUnOpMicrokernelTester()
511 .batch_size(batch_size)
512 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
513 }
514 }
515
516 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_gt_36) {
517 TEST_REQUIRES_ARM_NEON_FMA;
518 for (size_t batch_size = 37; batch_size < 72; batch_size++) {
519 VUnOpMicrokernelTester()
520 .batch_size(batch_size)
521 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
522 }
523 }
524
525 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, inplace) {
526 TEST_REQUIRES_ARM_NEON_FMA;
527 for (size_t batch_size = 1; batch_size <= 180; batch_size += 35) {
528 VUnOpMicrokernelTester()
529 .batch_size(batch_size)
530 .inplace(true)
531 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
532 }
533 }
534#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
535
536
537#if XNN_ARCH_ARM || XNN_ARCH_ARM64
538 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_eq_40) {
539 TEST_REQUIRES_ARM_NEON_FMA;
540 VUnOpMicrokernelTester()
541 .batch_size(40)
542 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
543 }
544
545 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_div_40) {
546 TEST_REQUIRES_ARM_NEON_FMA;
547 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
548 VUnOpMicrokernelTester()
549 .batch_size(batch_size)
550 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
551 }
552 }
553
554 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_lt_40) {
555 TEST_REQUIRES_ARM_NEON_FMA;
556 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
557 VUnOpMicrokernelTester()
558 .batch_size(batch_size)
559 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
560 }
561 }
562
563 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_gt_40) {
564 TEST_REQUIRES_ARM_NEON_FMA;
565 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
566 VUnOpMicrokernelTester()
567 .batch_size(batch_size)
568 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
569 }
570 }
571
572 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, inplace) {
573 TEST_REQUIRES_ARM_NEON_FMA;
574 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
575 VUnOpMicrokernelTester()
576 .batch_size(batch_size)
577 .inplace(true)
578 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
579 }
580 }
581#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
582
583
584#if XNN_ARCH_ARM || XNN_ARCH_ARM64
585 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_eq_4) {
586 TEST_REQUIRES_ARM_NEON_FMA;
587 VUnOpMicrokernelTester()
588 .batch_size(4)
589 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
590 }
591
592 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_div_4) {
593 TEST_REQUIRES_ARM_NEON_FMA;
594 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
595 VUnOpMicrokernelTester()
596 .batch_size(batch_size)
597 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
598 }
599 }
600
601 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_lt_4) {
602 TEST_REQUIRES_ARM_NEON_FMA;
603 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
604 VUnOpMicrokernelTester()
605 .batch_size(batch_size)
606 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
607 }
608 }
609
610 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_gt_4) {
611 TEST_REQUIRES_ARM_NEON_FMA;
612 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
613 VUnOpMicrokernelTester()
614 .batch_size(batch_size)
615 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
616 }
617 }
618
619 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, inplace) {
620 TEST_REQUIRES_ARM_NEON_FMA;
621 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
622 VUnOpMicrokernelTester()
623 .batch_size(batch_size)
624 .inplace(true)
625 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
626 }
627 }
628#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
629
630
631#if XNN_ARCH_ARM || XNN_ARCH_ARM64
632 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_eq_8) {
633 TEST_REQUIRES_ARM_NEON_FMA;
634 VUnOpMicrokernelTester()
635 .batch_size(8)
636 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
637 }
638
639 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_div_8) {
640 TEST_REQUIRES_ARM_NEON_FMA;
641 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
642 VUnOpMicrokernelTester()
643 .batch_size(batch_size)
644 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
645 }
646 }
647
648 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_lt_8) {
649 TEST_REQUIRES_ARM_NEON_FMA;
650 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
651 VUnOpMicrokernelTester()
652 .batch_size(batch_size)
653 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
654 }
655 }
656
657 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_gt_8) {
658 TEST_REQUIRES_ARM_NEON_FMA;
659 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
660 VUnOpMicrokernelTester()
661 .batch_size(batch_size)
662 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
663 }
664 }
665
666 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, inplace) {
667 TEST_REQUIRES_ARM_NEON_FMA;
668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
669 VUnOpMicrokernelTester()
670 .batch_size(batch_size)
671 .inplace(true)
672 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
673 }
674 }
675#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
676
677
678#if XNN_ARCH_ARM || XNN_ARCH_ARM64
679 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_eq_12) {
680 TEST_REQUIRES_ARM_NEON_FMA;
681 VUnOpMicrokernelTester()
682 .batch_size(12)
683 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
684 }
685
686 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_div_12) {
687 TEST_REQUIRES_ARM_NEON_FMA;
688 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
689 VUnOpMicrokernelTester()
690 .batch_size(batch_size)
691 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
692 }
693 }
694
695 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_lt_12) {
696 TEST_REQUIRES_ARM_NEON_FMA;
697 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
698 VUnOpMicrokernelTester()
699 .batch_size(batch_size)
700 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
701 }
702 }
703
704 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_gt_12) {
705 TEST_REQUIRES_ARM_NEON_FMA;
706 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
707 VUnOpMicrokernelTester()
708 .batch_size(batch_size)
709 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
710 }
711 }
712
713 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, inplace) {
714 TEST_REQUIRES_ARM_NEON_FMA;
715 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
716 VUnOpMicrokernelTester()
717 .batch_size(batch_size)
718 .inplace(true)
719 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12), VUnOpMicrokernelTester::OpType::SquareRoot);
720 }
721 }
722#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
723
724
725#if XNN_ARCH_ARM || XNN_ARCH_ARM64
726 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_eq_16) {
727 TEST_REQUIRES_ARM_NEON_FMA;
728 VUnOpMicrokernelTester()
729 .batch_size(16)
730 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
731 }
732
733 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_div_16) {
734 TEST_REQUIRES_ARM_NEON_FMA;
735 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
736 VUnOpMicrokernelTester()
737 .batch_size(batch_size)
738 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
739 }
740 }
741
742 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_lt_16) {
743 TEST_REQUIRES_ARM_NEON_FMA;
744 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
745 VUnOpMicrokernelTester()
746 .batch_size(batch_size)
747 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
748 }
749 }
750
751 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_gt_16) {
752 TEST_REQUIRES_ARM_NEON_FMA;
753 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
754 VUnOpMicrokernelTester()
755 .batch_size(batch_size)
756 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
757 }
758 }
759
760 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, inplace) {
761 TEST_REQUIRES_ARM_NEON_FMA;
762 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
763 VUnOpMicrokernelTester()
764 .batch_size(batch_size)
765 .inplace(true)
766 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
767 }
768 }
769#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
770
771
772#if XNN_ARCH_ARM || XNN_ARCH_ARM64
773 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_eq_20) {
774 TEST_REQUIRES_ARM_NEON_FMA;
775 VUnOpMicrokernelTester()
776 .batch_size(20)
777 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
778 }
779
780 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_div_20) {
781 TEST_REQUIRES_ARM_NEON_FMA;
782 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
783 VUnOpMicrokernelTester()
784 .batch_size(batch_size)
785 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
786 }
787 }
788
789 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_lt_20) {
790 TEST_REQUIRES_ARM_NEON_FMA;
791 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
792 VUnOpMicrokernelTester()
793 .batch_size(batch_size)
794 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
795 }
796 }
797
798 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_gt_20) {
799 TEST_REQUIRES_ARM_NEON_FMA;
800 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
801 VUnOpMicrokernelTester()
802 .batch_size(batch_size)
803 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
804 }
805 }
806
807 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, inplace) {
808 TEST_REQUIRES_ARM_NEON_FMA;
809 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
810 VUnOpMicrokernelTester()
811 .batch_size(batch_size)
812 .inplace(true)
813 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20), VUnOpMicrokernelTester::OpType::SquareRoot);
814 }
815 }
816#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
817
818
819#if XNN_ARCH_ARM || XNN_ARCH_ARM64
820 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_eq_24) {
821 TEST_REQUIRES_ARM_NEON_FMA;
822 VUnOpMicrokernelTester()
823 .batch_size(24)
824 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
825 }
826
827 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_div_24) {
828 TEST_REQUIRES_ARM_NEON_FMA;
829 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
830 VUnOpMicrokernelTester()
831 .batch_size(batch_size)
832 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
833 }
834 }
835
836 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_lt_24) {
837 TEST_REQUIRES_ARM_NEON_FMA;
838 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
839 VUnOpMicrokernelTester()
840 .batch_size(batch_size)
841 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
842 }
843 }
844
845 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_gt_24) {
846 TEST_REQUIRES_ARM_NEON_FMA;
847 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
848 VUnOpMicrokernelTester()
849 .batch_size(batch_size)
850 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
851 }
852 }
853
854 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, inplace) {
855 TEST_REQUIRES_ARM_NEON_FMA;
856 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
857 VUnOpMicrokernelTester()
858 .batch_size(batch_size)
859 .inplace(true)
860 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
861 }
862 }
863#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
864
865
866#if XNN_ARCH_ARM || XNN_ARCH_ARM64
867 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_eq_28) {
868 TEST_REQUIRES_ARM_NEON_FMA;
869 VUnOpMicrokernelTester()
870 .batch_size(28)
871 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
872 }
873
874 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_div_28) {
875 TEST_REQUIRES_ARM_NEON_FMA;
876 for (size_t batch_size = 56; batch_size < 280; batch_size += 28) {
877 VUnOpMicrokernelTester()
878 .batch_size(batch_size)
879 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
880 }
881 }
882
883 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_lt_28) {
884 TEST_REQUIRES_ARM_NEON_FMA;
885 for (size_t batch_size = 1; batch_size < 28; batch_size++) {
886 VUnOpMicrokernelTester()
887 .batch_size(batch_size)
888 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
889 }
890 }
891
892 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_gt_28) {
893 TEST_REQUIRES_ARM_NEON_FMA;
894 for (size_t batch_size = 29; batch_size < 56; batch_size++) {
895 VUnOpMicrokernelTester()
896 .batch_size(batch_size)
897 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
898 }
899 }
900
901 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, inplace) {
902 TEST_REQUIRES_ARM_NEON_FMA;
903 for (size_t batch_size = 1; batch_size <= 140; batch_size += 27) {
904 VUnOpMicrokernelTester()
905 .batch_size(batch_size)
906 .inplace(true)
907 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28), VUnOpMicrokernelTester::OpType::SquareRoot);
908 }
909 }
910#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
911
912
913#if XNN_ARCH_ARM || XNN_ARCH_ARM64
914 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_eq_32) {
915 TEST_REQUIRES_ARM_NEON_FMA;
916 VUnOpMicrokernelTester()
917 .batch_size(32)
918 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
919 }
920
921 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_div_32) {
922 TEST_REQUIRES_ARM_NEON_FMA;
923 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
924 VUnOpMicrokernelTester()
925 .batch_size(batch_size)
926 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
927 }
928 }
929
930 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_lt_32) {
931 TEST_REQUIRES_ARM_NEON_FMA;
932 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
933 VUnOpMicrokernelTester()
934 .batch_size(batch_size)
935 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
936 }
937 }
938
939 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_gt_32) {
940 TEST_REQUIRES_ARM_NEON_FMA;
941 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
942 VUnOpMicrokernelTester()
943 .batch_size(batch_size)
944 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
945 }
946 }
947
948 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, inplace) {
949 TEST_REQUIRES_ARM_NEON_FMA;
950 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
951 VUnOpMicrokernelTester()
952 .batch_size(batch_size)
953 .inplace(true)
954 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
955 }
956 }
957#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
958
959
960#if XNN_ARCH_ARM || XNN_ARCH_ARM64
961 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_eq_36) {
962 TEST_REQUIRES_ARM_NEON_FMA;
963 VUnOpMicrokernelTester()
964 .batch_size(36)
965 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
966 }
967
968 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_div_36) {
969 TEST_REQUIRES_ARM_NEON_FMA;
970 for (size_t batch_size = 72; batch_size < 360; batch_size += 36) {
971 VUnOpMicrokernelTester()
972 .batch_size(batch_size)
973 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
974 }
975 }
976
977 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_lt_36) {
978 TEST_REQUIRES_ARM_NEON_FMA;
979 for (size_t batch_size = 1; batch_size < 36; batch_size++) {
980 VUnOpMicrokernelTester()
981 .batch_size(batch_size)
982 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
983 }
984 }
985
986 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_gt_36) {
987 TEST_REQUIRES_ARM_NEON_FMA;
988 for (size_t batch_size = 37; batch_size < 72; batch_size++) {
989 VUnOpMicrokernelTester()
990 .batch_size(batch_size)
991 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
992 }
993 }
994
995 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, inplace) {
996 TEST_REQUIRES_ARM_NEON_FMA;
997 for (size_t batch_size = 1; batch_size <= 180; batch_size += 35) {
998 VUnOpMicrokernelTester()
999 .batch_size(batch_size)
1000 .inplace(true)
1001 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36), VUnOpMicrokernelTester::OpType::SquareRoot);
1002 }
1003 }
1004#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1005
1006
1007#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1008 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_eq_40) {
1009 TEST_REQUIRES_ARM_NEON_FMA;
1010 VUnOpMicrokernelTester()
1011 .batch_size(40)
1012 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1013 }
1014
1015 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_div_40) {
1016 TEST_REQUIRES_ARM_NEON_FMA;
1017 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
1018 VUnOpMicrokernelTester()
1019 .batch_size(batch_size)
1020 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1021 }
1022 }
1023
1024 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_lt_40) {
1025 TEST_REQUIRES_ARM_NEON_FMA;
1026 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
1027 VUnOpMicrokernelTester()
1028 .batch_size(batch_size)
1029 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1030 }
1031 }
1032
1033 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_gt_40) {
1034 TEST_REQUIRES_ARM_NEON_FMA;
1035 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
1036 VUnOpMicrokernelTester()
1037 .batch_size(batch_size)
1038 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1039 }
1040 }
1041
1042 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, inplace) {
1043 TEST_REQUIRES_ARM_NEON_FMA;
1044 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
1045 VUnOpMicrokernelTester()
1046 .batch_size(batch_size)
1047 .inplace(true)
1048 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1049 }
1050 }
1051#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1052
1053
1054#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1055 TEST(F32_VSQRT__SSE_SQRT_X4, batch_eq_4) {
1056 TEST_REQUIRES_X86_SSE;
1057 VUnOpMicrokernelTester()
1058 .batch_size(4)
1059 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
1060 }
1061
1062 TEST(F32_VSQRT__SSE_SQRT_X4, batch_div_4) {
1063 TEST_REQUIRES_X86_SSE;
1064 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1065 VUnOpMicrokernelTester()
1066 .batch_size(batch_size)
1067 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
1068 }
1069 }
1070
1071 TEST(F32_VSQRT__SSE_SQRT_X4, batch_lt_4) {
1072 TEST_REQUIRES_X86_SSE;
1073 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1074 VUnOpMicrokernelTester()
1075 .batch_size(batch_size)
1076 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
1077 }
1078 }
1079
1080 TEST(F32_VSQRT__SSE_SQRT_X4, batch_gt_4) {
1081 TEST_REQUIRES_X86_SSE;
1082 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1083 VUnOpMicrokernelTester()
1084 .batch_size(batch_size)
1085 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
1086 }
1087 }
1088
1089 TEST(F32_VSQRT__SSE_SQRT_X4, inplace) {
1090 TEST_REQUIRES_X86_SSE;
1091 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1092 VUnOpMicrokernelTester()
1093 .batch_size(batch_size)
1094 .inplace(true)
1095 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
1096 }
1097 }
1098#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1099
1100
1101#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1102 TEST(F32_VSQRT__SSE_SQRT_X8, batch_eq_8) {
1103 TEST_REQUIRES_X86_SSE;
1104 VUnOpMicrokernelTester()
1105 .batch_size(8)
1106 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1107 }
1108
1109 TEST(F32_VSQRT__SSE_SQRT_X8, batch_div_8) {
1110 TEST_REQUIRES_X86_SSE;
1111 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1112 VUnOpMicrokernelTester()
1113 .batch_size(batch_size)
1114 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1115 }
1116 }
1117
1118 TEST(F32_VSQRT__SSE_SQRT_X8, batch_lt_8) {
1119 TEST_REQUIRES_X86_SSE;
1120 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1121 VUnOpMicrokernelTester()
1122 .batch_size(batch_size)
1123 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1124 }
1125 }
1126
1127 TEST(F32_VSQRT__SSE_SQRT_X8, batch_gt_8) {
1128 TEST_REQUIRES_X86_SSE;
1129 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1130 VUnOpMicrokernelTester()
1131 .batch_size(batch_size)
1132 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1133 }
1134 }
1135
1136 TEST(F32_VSQRT__SSE_SQRT_X8, inplace) {
1137 TEST_REQUIRES_X86_SSE;
1138 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1139 VUnOpMicrokernelTester()
1140 .batch_size(batch_size)
1141 .inplace(true)
1142 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__sse_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1143 }
1144 }
1145#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1146
1147
1148#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1149 TEST(F32_VSQRT__AVX_SQRT_X8, batch_eq_8) {
1150 TEST_REQUIRES_X86_AVX;
1151 VUnOpMicrokernelTester()
1152 .batch_size(8)
1153 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1154 }
1155
1156 TEST(F32_VSQRT__AVX_SQRT_X8, batch_div_8) {
1157 TEST_REQUIRES_X86_AVX;
1158 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1159 VUnOpMicrokernelTester()
1160 .batch_size(batch_size)
1161 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1162 }
1163 }
1164
1165 TEST(F32_VSQRT__AVX_SQRT_X8, batch_lt_8) {
1166 TEST_REQUIRES_X86_AVX;
1167 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1168 VUnOpMicrokernelTester()
1169 .batch_size(batch_size)
1170 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1171 }
1172 }
1173
1174 TEST(F32_VSQRT__AVX_SQRT_X8, batch_gt_8) {
1175 TEST_REQUIRES_X86_AVX;
1176 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1177 VUnOpMicrokernelTester()
1178 .batch_size(batch_size)
1179 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1180 }
1181 }
1182
1183 TEST(F32_VSQRT__AVX_SQRT_X8, inplace) {
1184 TEST_REQUIRES_X86_AVX;
1185 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1186 VUnOpMicrokernelTester()
1187 .batch_size(batch_size)
1188 .inplace(true)
1189 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1190 }
1191 }
1192#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1193
1194
1195#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1196 TEST(F32_VSQRT__AVX_SQRT_X16, batch_eq_16) {
1197 TEST_REQUIRES_X86_AVX;
1198 VUnOpMicrokernelTester()
1199 .batch_size(16)
1200 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1201 }
1202
1203 TEST(F32_VSQRT__AVX_SQRT_X16, batch_div_16) {
1204 TEST_REQUIRES_X86_AVX;
1205 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1206 VUnOpMicrokernelTester()
1207 .batch_size(batch_size)
1208 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1209 }
1210 }
1211
1212 TEST(F32_VSQRT__AVX_SQRT_X16, batch_lt_16) {
1213 TEST_REQUIRES_X86_AVX;
1214 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1215 VUnOpMicrokernelTester()
1216 .batch_size(batch_size)
1217 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1218 }
1219 }
1220
1221 TEST(F32_VSQRT__AVX_SQRT_X16, batch_gt_16) {
1222 TEST_REQUIRES_X86_AVX;
1223 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1224 VUnOpMicrokernelTester()
1225 .batch_size(batch_size)
1226 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1227 }
1228 }
1229
1230 TEST(F32_VSQRT__AVX_SQRT_X16, inplace) {
1231 TEST_REQUIRES_X86_AVX;
1232 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1233 VUnOpMicrokernelTester()
1234 .batch_size(batch_size)
1235 .inplace(true)
1236 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx_sqrt_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1237 }
1238 }
1239#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1240
1241
1242#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1243 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_eq_8) {
1244 TEST_REQUIRES_X86_FMA3;
1245 VUnOpMicrokernelTester()
1246 .batch_size(8)
1247 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1248 }
1249
1250 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_div_8) {
1251 TEST_REQUIRES_X86_FMA3;
1252 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1253 VUnOpMicrokernelTester()
1254 .batch_size(batch_size)
1255 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1256 }
1257 }
1258
1259 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_lt_8) {
1260 TEST_REQUIRES_X86_FMA3;
1261 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1262 VUnOpMicrokernelTester()
1263 .batch_size(batch_size)
1264 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1265 }
1266 }
1267
1268 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_gt_8) {
1269 TEST_REQUIRES_X86_FMA3;
1270 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1271 VUnOpMicrokernelTester()
1272 .batch_size(batch_size)
1273 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1274 }
1275 }
1276
1277 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, inplace) {
1278 TEST_REQUIRES_X86_FMA3;
1279 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1280 VUnOpMicrokernelTester()
1281 .batch_size(batch_size)
1282 .inplace(true)
1283 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
1284 }
1285 }
1286#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1287
1288
1289#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1290 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_eq_16) {
1291 TEST_REQUIRES_X86_FMA3;
1292 VUnOpMicrokernelTester()
1293 .batch_size(16)
1294 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1295 }
1296
1297 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_div_16) {
1298 TEST_REQUIRES_X86_FMA3;
1299 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1300 VUnOpMicrokernelTester()
1301 .batch_size(batch_size)
1302 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1303 }
1304 }
1305
1306 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_lt_16) {
1307 TEST_REQUIRES_X86_FMA3;
1308 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1309 VUnOpMicrokernelTester()
1310 .batch_size(batch_size)
1311 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1312 }
1313 }
1314
1315 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_gt_16) {
1316 TEST_REQUIRES_X86_FMA3;
1317 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1318 VUnOpMicrokernelTester()
1319 .batch_size(batch_size)
1320 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1321 }
1322 }
1323
1324 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, inplace) {
1325 TEST_REQUIRES_X86_FMA3;
1326 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1327 VUnOpMicrokernelTester()
1328 .batch_size(batch_size)
1329 .inplace(true)
1330 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1331 }
1332 }
1333#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1334
1335
1336#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1337 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_eq_24) {
1338 TEST_REQUIRES_X86_FMA3;
1339 VUnOpMicrokernelTester()
1340 .batch_size(24)
1341 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
1342 }
1343
1344 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_div_24) {
1345 TEST_REQUIRES_X86_FMA3;
1346 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1347 VUnOpMicrokernelTester()
1348 .batch_size(batch_size)
1349 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
1350 }
1351 }
1352
1353 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_lt_24) {
1354 TEST_REQUIRES_X86_FMA3;
1355 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1356 VUnOpMicrokernelTester()
1357 .batch_size(batch_size)
1358 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
1359 }
1360 }
1361
1362 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_gt_24) {
1363 TEST_REQUIRES_X86_FMA3;
1364 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1365 VUnOpMicrokernelTester()
1366 .batch_size(batch_size)
1367 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
1368 }
1369 }
1370
1371 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, inplace) {
1372 TEST_REQUIRES_X86_FMA3;
1373 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1374 VUnOpMicrokernelTester()
1375 .batch_size(batch_size)
1376 .inplace(true)
1377 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24), VUnOpMicrokernelTester::OpType::SquareRoot);
1378 }
1379 }
1380#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1381
1382
1383#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1384 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_eq_32) {
1385 TEST_REQUIRES_X86_FMA3;
1386 VUnOpMicrokernelTester()
1387 .batch_size(32)
1388 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1389 }
1390
1391 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_div_32) {
1392 TEST_REQUIRES_X86_FMA3;
1393 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1394 VUnOpMicrokernelTester()
1395 .batch_size(batch_size)
1396 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1397 }
1398 }
1399
1400 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_lt_32) {
1401 TEST_REQUIRES_X86_FMA3;
1402 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1403 VUnOpMicrokernelTester()
1404 .batch_size(batch_size)
1405 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1406 }
1407 }
1408
1409 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_gt_32) {
1410 TEST_REQUIRES_X86_FMA3;
1411 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1412 VUnOpMicrokernelTester()
1413 .batch_size(batch_size)
1414 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1415 }
1416 }
1417
1418 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, inplace) {
1419 TEST_REQUIRES_X86_FMA3;
1420 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1421 VUnOpMicrokernelTester()
1422 .batch_size(batch_size)
1423 .inplace(true)
1424 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1425 }
1426 }
1427#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1428
1429
1430#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1431 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_eq_40) {
1432 TEST_REQUIRES_X86_FMA3;
1433 VUnOpMicrokernelTester()
1434 .batch_size(40)
1435 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1436 }
1437
1438 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_div_40) {
1439 TEST_REQUIRES_X86_FMA3;
1440 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
1441 VUnOpMicrokernelTester()
1442 .batch_size(batch_size)
1443 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1444 }
1445 }
1446
1447 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_lt_40) {
1448 TEST_REQUIRES_X86_FMA3;
1449 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
1450 VUnOpMicrokernelTester()
1451 .batch_size(batch_size)
1452 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1453 }
1454 }
1455
1456 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_gt_40) {
1457 TEST_REQUIRES_X86_FMA3;
1458 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
1459 VUnOpMicrokernelTester()
1460 .batch_size(batch_size)
1461 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1462 }
1463 }
1464
1465 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, inplace) {
1466 TEST_REQUIRES_X86_FMA3;
1467 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
1468 VUnOpMicrokernelTester()
1469 .batch_size(batch_size)
1470 .inplace(true)
1471 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40), VUnOpMicrokernelTester::OpType::SquareRoot);
1472 }
1473 }
1474#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1475
1476
1477#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1478 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_eq_48) {
1479 TEST_REQUIRES_X86_FMA3;
1480 VUnOpMicrokernelTester()
1481 .batch_size(48)
1482 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1483 }
1484
1485 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_div_48) {
1486 TEST_REQUIRES_X86_FMA3;
1487 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
1488 VUnOpMicrokernelTester()
1489 .batch_size(batch_size)
1490 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1491 }
1492 }
1493
1494 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_lt_48) {
1495 TEST_REQUIRES_X86_FMA3;
1496 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
1497 VUnOpMicrokernelTester()
1498 .batch_size(batch_size)
1499 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1500 }
1501 }
1502
1503 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_gt_48) {
1504 TEST_REQUIRES_X86_FMA3;
1505 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
1506 VUnOpMicrokernelTester()
1507 .batch_size(batch_size)
1508 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1509 }
1510 }
1511
1512 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, inplace) {
1513 TEST_REQUIRES_X86_FMA3;
1514 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
1515 VUnOpMicrokernelTester()
1516 .batch_size(batch_size)
1517 .inplace(true)
1518 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1519 }
1520 }
1521#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1522
1523
1524#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1525 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_eq_56) {
1526 TEST_REQUIRES_X86_FMA3;
1527 VUnOpMicrokernelTester()
1528 .batch_size(56)
1529 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56), VUnOpMicrokernelTester::OpType::SquareRoot);
1530 }
1531
1532 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_div_56) {
1533 TEST_REQUIRES_X86_FMA3;
1534 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
1535 VUnOpMicrokernelTester()
1536 .batch_size(batch_size)
1537 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56), VUnOpMicrokernelTester::OpType::SquareRoot);
1538 }
1539 }
1540
1541 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_lt_56) {
1542 TEST_REQUIRES_X86_FMA3;
1543 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
1544 VUnOpMicrokernelTester()
1545 .batch_size(batch_size)
1546 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56), VUnOpMicrokernelTester::OpType::SquareRoot);
1547 }
1548 }
1549
1550 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_gt_56) {
1551 TEST_REQUIRES_X86_FMA3;
1552 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
1553 VUnOpMicrokernelTester()
1554 .batch_size(batch_size)
1555 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56), VUnOpMicrokernelTester::OpType::SquareRoot);
1556 }
1557 }
1558
1559 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, inplace) {
1560 TEST_REQUIRES_X86_FMA3;
1561 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
1562 VUnOpMicrokernelTester()
1563 .batch_size(batch_size)
1564 .inplace(true)
1565 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56), VUnOpMicrokernelTester::OpType::SquareRoot);
1566 }
1567 }
1568#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1569
1570
1571#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1572 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_eq_64) {
1573 TEST_REQUIRES_X86_FMA3;
1574 VUnOpMicrokernelTester()
1575 .batch_size(64)
1576 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1577 }
1578
1579 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_div_64) {
1580 TEST_REQUIRES_X86_FMA3;
1581 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
1582 VUnOpMicrokernelTester()
1583 .batch_size(batch_size)
1584 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1585 }
1586 }
1587
1588 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_lt_64) {
1589 TEST_REQUIRES_X86_FMA3;
1590 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
1591 VUnOpMicrokernelTester()
1592 .batch_size(batch_size)
1593 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1594 }
1595 }
1596
1597 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_gt_64) {
1598 TEST_REQUIRES_X86_FMA3;
1599 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
1600 VUnOpMicrokernelTester()
1601 .batch_size(batch_size)
1602 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1603 }
1604 }
1605
1606 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, inplace) {
1607 TEST_REQUIRES_X86_FMA3;
1608 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
1609 VUnOpMicrokernelTester()
1610 .batch_size(batch_size)
1611 .inplace(true)
1612 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1613 }
1614 }
1615#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1616
1617
1618#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1619 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_eq_16) {
1620 TEST_REQUIRES_X86_AVX512F;
1621 VUnOpMicrokernelTester()
1622 .batch_size(16)
1623 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1624 }
1625
1626 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_div_16) {
1627 TEST_REQUIRES_X86_AVX512F;
1628 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1629 VUnOpMicrokernelTester()
1630 .batch_size(batch_size)
1631 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1632 }
1633 }
1634
1635 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_lt_16) {
1636 TEST_REQUIRES_X86_AVX512F;
1637 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1638 VUnOpMicrokernelTester()
1639 .batch_size(batch_size)
1640 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1641 }
1642 }
1643
1644 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_gt_16) {
1645 TEST_REQUIRES_X86_AVX512F;
1646 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1647 VUnOpMicrokernelTester()
1648 .batch_size(batch_size)
1649 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1650 }
1651 }
1652
1653 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, inplace) {
1654 TEST_REQUIRES_X86_AVX512F;
1655 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1656 VUnOpMicrokernelTester()
1657 .batch_size(batch_size)
1658 .inplace(true)
1659 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16), VUnOpMicrokernelTester::OpType::SquareRoot);
1660 }
1661 }
1662#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1663
1664
1665#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1666 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_eq_32) {
1667 TEST_REQUIRES_X86_AVX512F;
1668 VUnOpMicrokernelTester()
1669 .batch_size(32)
1670 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1671 }
1672
1673 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_div_32) {
1674 TEST_REQUIRES_X86_AVX512F;
1675 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1676 VUnOpMicrokernelTester()
1677 .batch_size(batch_size)
1678 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1679 }
1680 }
1681
1682 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_lt_32) {
1683 TEST_REQUIRES_X86_AVX512F;
1684 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1685 VUnOpMicrokernelTester()
1686 .batch_size(batch_size)
1687 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1688 }
1689 }
1690
1691 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_gt_32) {
1692 TEST_REQUIRES_X86_AVX512F;
1693 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1694 VUnOpMicrokernelTester()
1695 .batch_size(batch_size)
1696 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1697 }
1698 }
1699
1700 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, inplace) {
1701 TEST_REQUIRES_X86_AVX512F;
1702 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1703 VUnOpMicrokernelTester()
1704 .batch_size(batch_size)
1705 .inplace(true)
1706 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32), VUnOpMicrokernelTester::OpType::SquareRoot);
1707 }
1708 }
1709#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1710
1711
1712#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1713 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_eq_48) {
1714 TEST_REQUIRES_X86_AVX512F;
1715 VUnOpMicrokernelTester()
1716 .batch_size(48)
1717 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1718 }
1719
1720 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_div_48) {
1721 TEST_REQUIRES_X86_AVX512F;
1722 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
1723 VUnOpMicrokernelTester()
1724 .batch_size(batch_size)
1725 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1726 }
1727 }
1728
1729 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_lt_48) {
1730 TEST_REQUIRES_X86_AVX512F;
1731 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
1732 VUnOpMicrokernelTester()
1733 .batch_size(batch_size)
1734 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1735 }
1736 }
1737
1738 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_gt_48) {
1739 TEST_REQUIRES_X86_AVX512F;
1740 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
1741 VUnOpMicrokernelTester()
1742 .batch_size(batch_size)
1743 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1744 }
1745 }
1746
1747 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, inplace) {
1748 TEST_REQUIRES_X86_AVX512F;
1749 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
1750 VUnOpMicrokernelTester()
1751 .batch_size(batch_size)
1752 .inplace(true)
1753 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48), VUnOpMicrokernelTester::OpType::SquareRoot);
1754 }
1755 }
1756#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1757
1758
1759#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1760 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_eq_64) {
1761 TEST_REQUIRES_X86_AVX512F;
1762 VUnOpMicrokernelTester()
1763 .batch_size(64)
1764 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1765 }
1766
1767 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_div_64) {
1768 TEST_REQUIRES_X86_AVX512F;
1769 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
1770 VUnOpMicrokernelTester()
1771 .batch_size(batch_size)
1772 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1773 }
1774 }
1775
1776 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_lt_64) {
1777 TEST_REQUIRES_X86_AVX512F;
1778 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
1779 VUnOpMicrokernelTester()
1780 .batch_size(batch_size)
1781 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1782 }
1783 }
1784
1785 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_gt_64) {
1786 TEST_REQUIRES_X86_AVX512F;
1787 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
1788 VUnOpMicrokernelTester()
1789 .batch_size(batch_size)
1790 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1791 }
1792 }
1793
1794 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, inplace) {
1795 TEST_REQUIRES_X86_AVX512F;
1796 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
1797 VUnOpMicrokernelTester()
1798 .batch_size(batch_size)
1799 .inplace(true)
1800 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64), VUnOpMicrokernelTester::OpType::SquareRoot);
1801 }
1802 }
1803#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1804
1805
1806#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1807 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_eq_80) {
1808 TEST_REQUIRES_X86_AVX512F;
1809 VUnOpMicrokernelTester()
1810 .batch_size(80)
1811 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80), VUnOpMicrokernelTester::OpType::SquareRoot);
1812 }
1813
1814 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_div_80) {
1815 TEST_REQUIRES_X86_AVX512F;
1816 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
1817 VUnOpMicrokernelTester()
1818 .batch_size(batch_size)
1819 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80), VUnOpMicrokernelTester::OpType::SquareRoot);
1820 }
1821 }
1822
1823 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_lt_80) {
1824 TEST_REQUIRES_X86_AVX512F;
1825 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
1826 VUnOpMicrokernelTester()
1827 .batch_size(batch_size)
1828 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80), VUnOpMicrokernelTester::OpType::SquareRoot);
1829 }
1830 }
1831
1832 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_gt_80) {
1833 TEST_REQUIRES_X86_AVX512F;
1834 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
1835 VUnOpMicrokernelTester()
1836 .batch_size(batch_size)
1837 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80), VUnOpMicrokernelTester::OpType::SquareRoot);
1838 }
1839 }
1840
1841 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, inplace) {
1842 TEST_REQUIRES_X86_AVX512F;
1843 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
1844 VUnOpMicrokernelTester()
1845 .batch_size(batch_size)
1846 .inplace(true)
1847 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80), VUnOpMicrokernelTester::OpType::SquareRoot);
1848 }
1849 }
1850#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1851
1852
1853#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1854 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_eq_96) {
1855 TEST_REQUIRES_X86_AVX512F;
1856 VUnOpMicrokernelTester()
1857 .batch_size(96)
1858 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96), VUnOpMicrokernelTester::OpType::SquareRoot);
1859 }
1860
1861 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_div_96) {
1862 TEST_REQUIRES_X86_AVX512F;
1863 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
1864 VUnOpMicrokernelTester()
1865 .batch_size(batch_size)
1866 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96), VUnOpMicrokernelTester::OpType::SquareRoot);
1867 }
1868 }
1869
1870 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_lt_96) {
1871 TEST_REQUIRES_X86_AVX512F;
1872 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
1873 VUnOpMicrokernelTester()
1874 .batch_size(batch_size)
1875 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96), VUnOpMicrokernelTester::OpType::SquareRoot);
1876 }
1877 }
1878
1879 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_gt_96) {
1880 TEST_REQUIRES_X86_AVX512F;
1881 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
1882 VUnOpMicrokernelTester()
1883 .batch_size(batch_size)
1884 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96), VUnOpMicrokernelTester::OpType::SquareRoot);
1885 }
1886 }
1887
1888 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, inplace) {
1889 TEST_REQUIRES_X86_AVX512F;
1890 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
1891 VUnOpMicrokernelTester()
1892 .batch_size(batch_size)
1893 .inplace(true)
1894 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96), VUnOpMicrokernelTester::OpType::SquareRoot);
1895 }
1896 }
1897#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1898
1899
1900#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1901 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_eq_112) {
1902 TEST_REQUIRES_X86_AVX512F;
1903 VUnOpMicrokernelTester()
1904 .batch_size(112)
1905 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112), VUnOpMicrokernelTester::OpType::SquareRoot);
1906 }
1907
1908 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_div_112) {
1909 TEST_REQUIRES_X86_AVX512F;
1910 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
1911 VUnOpMicrokernelTester()
1912 .batch_size(batch_size)
1913 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112), VUnOpMicrokernelTester::OpType::SquareRoot);
1914 }
1915 }
1916
1917 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_lt_112) {
1918 TEST_REQUIRES_X86_AVX512F;
1919 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
1920 VUnOpMicrokernelTester()
1921 .batch_size(batch_size)
1922 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112), VUnOpMicrokernelTester::OpType::SquareRoot);
1923 }
1924 }
1925
1926 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_gt_112) {
1927 TEST_REQUIRES_X86_AVX512F;
1928 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
1929 VUnOpMicrokernelTester()
1930 .batch_size(batch_size)
1931 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112), VUnOpMicrokernelTester::OpType::SquareRoot);
1932 }
1933 }
1934
1935 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, inplace) {
1936 TEST_REQUIRES_X86_AVX512F;
1937 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
1938 VUnOpMicrokernelTester()
1939 .batch_size(batch_size)
1940 .inplace(true)
1941 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112), VUnOpMicrokernelTester::OpType::SquareRoot);
1942 }
1943 }
1944#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1945
1946
1947#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1948 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_eq_128) {
1949 TEST_REQUIRES_X86_AVX512F;
1950 VUnOpMicrokernelTester()
1951 .batch_size(128)
1952 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128), VUnOpMicrokernelTester::OpType::SquareRoot);
1953 }
1954
1955 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_div_128) {
1956 TEST_REQUIRES_X86_AVX512F;
1957 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
1958 VUnOpMicrokernelTester()
1959 .batch_size(batch_size)
1960 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128), VUnOpMicrokernelTester::OpType::SquareRoot);
1961 }
1962 }
1963
1964 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_lt_128) {
1965 TEST_REQUIRES_X86_AVX512F;
1966 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
1967 VUnOpMicrokernelTester()
1968 .batch_size(batch_size)
1969 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128), VUnOpMicrokernelTester::OpType::SquareRoot);
1970 }
1971 }
1972
1973 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_gt_128) {
1974 TEST_REQUIRES_X86_AVX512F;
1975 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
1976 VUnOpMicrokernelTester()
1977 .batch_size(batch_size)
1978 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128), VUnOpMicrokernelTester::OpType::SquareRoot);
1979 }
1980 }
1981
1982 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, inplace) {
1983 TEST_REQUIRES_X86_AVX512F;
1984 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
1985 VUnOpMicrokernelTester()
1986 .batch_size(batch_size)
1987 .inplace(true)
1988 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128), VUnOpMicrokernelTester::OpType::SquareRoot);
1989 }
1990 }
1991#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1992
1993
1994#if XNN_ARCH_WASMSIMD
1995 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_eq_4) {
1996 VUnOpMicrokernelTester()
1997 .batch_size(4)
1998 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
1999 }
2000
2001 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_div_4) {
2002 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2003 VUnOpMicrokernelTester()
2004 .batch_size(batch_size)
2005 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
2006 }
2007 }
2008
2009 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_lt_4) {
2010 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2011 VUnOpMicrokernelTester()
2012 .batch_size(batch_size)
2013 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
2014 }
2015 }
2016
2017 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_gt_4) {
2018 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2019 VUnOpMicrokernelTester()
2020 .batch_size(batch_size)
2021 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
2022 }
2023 }
2024
2025 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, inplace) {
2026 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2027 VUnOpMicrokernelTester()
2028 .batch_size(batch_size)
2029 .inplace(true)
2030 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot);
2031 }
2032 }
2033#endif // XNN_ARCH_WASMSIMD
2034
2035
2036#if XNN_ARCH_WASMSIMD
2037 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_eq_8) {
2038 VUnOpMicrokernelTester()
2039 .batch_size(8)
2040 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
2041 }
2042
2043 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_div_8) {
2044 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2045 VUnOpMicrokernelTester()
2046 .batch_size(batch_size)
2047 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
2048 }
2049 }
2050
2051 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_lt_8) {
2052 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2053 VUnOpMicrokernelTester()
2054 .batch_size(batch_size)
2055 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
2056 }
2057 }
2058
2059 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_gt_8) {
2060 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2061 VUnOpMicrokernelTester()
2062 .batch_size(batch_size)
2063 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
2064 }
2065 }
2066
2067 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, inplace) {
2068 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2069 VUnOpMicrokernelTester()
2070 .batch_size(batch_size)
2071 .inplace(true)
2072 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8), VUnOpMicrokernelTester::OpType::SquareRoot);
2073 }
2074 }
2075#endif // XNN_ARCH_WASMSIMD
2076
2077
2078TEST(F32_VSQRT__SCALAR_SQRT_X1, batch_eq_1) {
2079 VUnOpMicrokernelTester()
2080 .batch_size(1)
2081 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2082}
2083
2084TEST(F32_VSQRT__SCALAR_SQRT_X1, batch_gt_1) {
2085 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
2086 VUnOpMicrokernelTester()
2087 .batch_size(batch_size)
2088 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2089 }
2090}
2091
2092TEST(F32_VSQRT__SCALAR_SQRT_X1, inplace) {
2093 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2094 VUnOpMicrokernelTester()
2095 .batch_size(batch_size)
2096 .inplace(true)
2097 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2098 }
2099}
2100
2101
2102TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_eq_2) {
2103 VUnOpMicrokernelTester()
2104 .batch_size(2)
2105 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2106}
2107
2108TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_div_2) {
2109 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
2110 VUnOpMicrokernelTester()
2111 .batch_size(batch_size)
2112 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2113 }
2114}
2115
2116TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_lt_2) {
2117 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
2118 VUnOpMicrokernelTester()
2119 .batch_size(batch_size)
2120 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2121 }
2122}
2123
2124TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_gt_2) {
2125 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
2126 VUnOpMicrokernelTester()
2127 .batch_size(batch_size)
2128 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2129 }
2130}
2131
2132TEST(F32_VSQRT__SCALAR_SQRT_X2, inplace) {
2133 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2134 VUnOpMicrokernelTester()
2135 .batch_size(batch_size)
2136 .inplace(true)
2137 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2138 }
2139}
2140
2141
2142TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_eq_4) {
2143 VUnOpMicrokernelTester()
2144 .batch_size(4)
2145 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2146}
2147
2148TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_div_4) {
2149 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2150 VUnOpMicrokernelTester()
2151 .batch_size(batch_size)
2152 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2153 }
2154}
2155
2156TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_lt_4) {
2157 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2158 VUnOpMicrokernelTester()
2159 .batch_size(batch_size)
2160 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2161 }
2162}
2163
2164TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_gt_4) {
2165 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2166 VUnOpMicrokernelTester()
2167 .batch_size(batch_size)
2168 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2169 }
2170}
2171
2172TEST(F32_VSQRT__SCALAR_SQRT_X4, inplace) {
2173 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2174 VUnOpMicrokernelTester()
2175 .batch_size(batch_size)
2176 .inplace(true)
2177 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4), VUnOpMicrokernelTester::OpType::SquareRoot, VUnOpMicrokernelTester::Variant::Scalar);
2178 }
2179}