blob: aa55b23076986be306a12bb63b8249c13bca2510 [file] [log] [blame]
Marat Dukhan346a9e52019-11-15 09:06:30 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#pragma once
7
8#include <gtest/gtest.h>
9
10#include <algorithm>
11#include <cassert>
12#include <cstddef>
13#include <cstdlib>
14#include <functional>
15#include <random>
16#include <vector>
17
Marat Dukhanfcfdd2d2021-06-29 18:57:02 -070018#include <fp16.h>
19
Marat Dukhan346a9e52019-11-15 09:06:30 -080020#include <xnnpack.h>
21#include <xnnpack/params-init.h>
22#include <xnnpack/params.h>
23
24
Marat Dukhan87ed45c2021-05-13 12:25:22 -070025class VUnaryMicrokernelTester {
Marat Dukhan346a9e52019-11-15 09:06:30 -080026 public:
27 enum class OpType {
Frank Barchardfb158e22020-07-15 16:10:10 -070028 ReLU,
Marat Dukhaneecf8fd2020-06-09 08:59:37 -070029 RoundToNearestEven,
30 RoundTowardsZero,
31 RoundUp,
32 RoundDown,
Marat Dukhan346a9e52019-11-15 09:06:30 -080033 };
34
35 enum class Variant {
36 Native,
37 Scalar,
38 };
39
Marat Dukhan87ed45c2021-05-13 12:25:22 -070040 inline VUnaryMicrokernelTester& batch_size(size_t batch_size) {
Marat Dukhan346a9e52019-11-15 09:06:30 -080041 assert(batch_size != 0);
42 this->batch_size_ = batch_size;
43 return *this;
44 }
45
46 inline size_t batch_size() const {
47 return this->batch_size_;
48 }
49
Marat Dukhan87ed45c2021-05-13 12:25:22 -070050 inline VUnaryMicrokernelTester& inplace(bool inplace) {
Marat Dukhan346a9e52019-11-15 09:06:30 -080051 this->inplace_ = inplace;
52 return *this;
53 }
54
55 inline bool inplace() const {
56 return this->inplace_;
57 }
58
Marat Dukhan87ed45c2021-05-13 12:25:22 -070059 inline VUnaryMicrokernelTester& slope(float slope) {
Marat Dukhan8cc7efe2020-06-10 16:24:27 -070060 this->slope_ = slope;
61 return *this;
62 }
63
64 inline float slope() const {
65 return this->slope_;
66 }
67
Marat Dukhan87ed45c2021-05-13 12:25:22 -070068 inline VUnaryMicrokernelTester& prescale(float prescale) {
Marat Dukhaned6baaf2020-12-01 15:07:08 -080069 this->prescale_ = prescale;
70 return *this;
71 }
72
73 inline float prescale() const {
74 return this->prescale_;
75 }
76
Marat Dukhan87ed45c2021-05-13 12:25:22 -070077 inline VUnaryMicrokernelTester& alpha(float alpha) {
Marat Dukhaned6baaf2020-12-01 15:07:08 -080078 this->alpha_ = alpha;
79 return *this;
80 }
81
82 inline float alpha() const {
83 return this->alpha_;
84 }
85
Marat Dukhan87ed45c2021-05-13 12:25:22 -070086 inline VUnaryMicrokernelTester& beta(float beta) {
Marat Dukhaned6baaf2020-12-01 15:07:08 -080087 this->beta_ = beta;
88 return *this;
89 }
90
91 inline float beta() const {
92 return this->beta_;
93 }
94
Marat Dukhan87ed45c2021-05-13 12:25:22 -070095 inline VUnaryMicrokernelTester& qmin(uint8_t qmin) {
Marat Dukhan346a9e52019-11-15 09:06:30 -080096 this->qmin_ = qmin;
97 return *this;
98 }
99
100 inline uint8_t qmin() const {
101 return this->qmin_;
102 }
103
Marat Dukhan87ed45c2021-05-13 12:25:22 -0700104 inline VUnaryMicrokernelTester& qmax(uint8_t qmax) {
Marat Dukhan346a9e52019-11-15 09:06:30 -0800105 this->qmax_ = qmax;
106 return *this;
107 }
108
109 inline uint8_t qmax() const {
110 return this->qmax_;
111 }
112
Marat Dukhan87ed45c2021-05-13 12:25:22 -0700113 inline VUnaryMicrokernelTester& iterations(size_t iterations) {
Marat Dukhan346a9e52019-11-15 09:06:30 -0800114 this->iterations_ = iterations;
115 return *this;
116 }
117
118 inline size_t iterations() const {
119 return this->iterations_;
120 }
121
Marat Dukhan1e782c42019-11-21 17:02:40 -0800122 void Test(xnn_f32_vunary_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
Marat Dukhan346a9e52019-11-15 09:06:30 -0800123 std::random_device random_device;
124 auto rng = std::mt19937(random_device());
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700125 auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700126 auto f32rng = std::bind(distribution, std::ref(rng));
Marat Dukhan346a9e52019-11-15 09:06:30 -0800127
128 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
129 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
130 std::vector<double> y_ref(batch_size());
131 for (size_t iteration = 0; iteration < iterations(); iteration++) {
132 if (inplace()) {
133 std::generate(y.begin(), y.end(), std::ref(f32rng));
134 } else {
135 std::generate(x.begin(), x.end(), std::ref(f32rng));
136 std::fill(y.begin(), y.end(), nanf(""));
137 }
138 const float* x_data = inplace() ? y.data() : x.data();
139
140 // Compute reference results.
141 for (size_t i = 0; i < batch_size(); i++) {
142 switch (op_type) {
Frank Barchardfb158e22020-07-15 16:10:10 -0700143 case OpType::ReLU:
144 y_ref[i] = std::max(x_data[i], 0.0f);
145 break;
Marat Dukhan0e801372022-01-04 00:10:41 -0800146 default:
147 GTEST_FAIL() << "Unexpected operation type";
148 return;
Marat Dukhan346a9e52019-11-15 09:06:30 -0800149 }
150 }
Marat Dukhan346a9e52019-11-15 09:06:30 -0800151
Marat Dukhan346a9e52019-11-15 09:06:30 -0800152 // Call optimized micro-kernel.
Marat Dukhan0e801372022-01-04 00:10:41 -0800153 vunary(batch_size() * sizeof(float), x_data, y.data(), nullptr);
Marat Dukhan346a9e52019-11-15 09:06:30 -0800154
155 // Verify results.
156 for (size_t i = 0; i < batch_size(); i++) {
Frank Barchard2b9d29b2020-09-17 12:03:39 -0700157 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800158 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
Marat Dukhan346a9e52019-11-15 09:06:30 -0800159 }
160 }
161 }
162
Marat Dukhane5efb162021-12-31 10:26:13 -0800163 void Test(xnn_f32_vabs_ukernel_function vabs, xnn_init_f32_abs_params_fn init_params = nullptr) const {
164 std::random_device random_device;
165 auto rng = std::mt19937(random_device());
166 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::ref(rng));
167
168 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
169 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
170 std::vector<float> y_ref(batch_size());
171 for (size_t iteration = 0; iteration < iterations(); iteration++) {
172 if (inplace()) {
173 std::generate(y.begin(), y.end(), std::ref(f32rng));
174 } else {
175 std::generate(x.begin(), x.end(), std::ref(f32rng));
176 std::fill(y.begin(), y.end(), nanf(""));
177 }
178 const float* x_data = inplace() ? y.data() : x.data();
179
180 // Compute reference results.
181 for (size_t i = 0; i < batch_size(); i++) {
182 y_ref[i] = std::abs(x_data[i]);
183 }
184
185 // Prepare parameters.
186 union xnn_f32_abs_params params;
187 if (init_params != nullptr) {
188 init_params(&params);
189 }
190
191 // Call optimized micro-kernel.
192 vabs(batch_size() * sizeof(float), x_data, y.data(), &params);
193
194 // Verify results.
195 for (size_t i = 0; i < batch_size(); i++) {
196 ASSERT_EQ(y[i], y_ref[i])
197 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
198 }
199 }
200 }
201
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800202 void Test(xnn_f32_vclamp_ukernel_function vclamp, xnn_init_f32_minmax_params_fn init_params) const {
Marat Dukhan94912792021-08-16 21:40:30 -0700203 std::random_device random_device;
204 auto rng = std::mt19937(random_device());
205 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), std::ref(rng));
206
207 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
208 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
209 std::vector<float> y_ref(batch_size());
210 for (size_t iteration = 0; iteration < iterations(); iteration++) {
211 if (inplace()) {
212 std::generate(y.begin(), y.end(), std::ref(f32rng));
213 } else {
214 std::generate(x.begin(), x.end(), std::ref(f32rng));
215 std::fill(y.begin(), y.end(), nanf(""));
216 }
217 const float* x_data = inplace() ? y.data() : x.data();
218
219 // Compute reference results.
220 for (size_t i = 0; i < batch_size(); i++) {
221 y_ref[i] = std::max(std::min(x_data[i], float(qmax())), float(qmin()));
222 }
223
224 // Prepare parameters.
225 union xnn_f32_minmax_params params;
226 init_params(&params, float(qmin()), float(qmax()));
227
228 // Call optimized micro-kernel.
229 vclamp(batch_size() * sizeof(float), x_data, y.data(), &params);
230
231 // Verify results.
232 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhane72b2822021-12-30 14:46:58 -0800233 ASSERT_EQ(y[i], y_ref[i])
Marat Dukhan94912792021-08-16 21:40:30 -0700234 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
235 }
236 }
Marat Dukhan6eaab712021-05-13 15:20:58 -0700237 }
238
Marat Dukhan4a79ff22022-01-01 12:16:48 -0800239 void Test(xnn_f32_velu_ukernel_function velu, xnn_init_f32_elu_params_fn init_params) const {
240 std::random_device random_device;
241 auto rng = std::mt19937(random_device());
242 auto f32rng = std::bind(std::uniform_real_distribution<float>(-20.0f, 20.0f), std::ref(rng));
243
244 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
245 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
246 std::vector<double> y_ref(batch_size());
247 for (size_t iteration = 0; iteration < iterations(); iteration++) {
248 if (inplace()) {
249 std::generate(y.begin(), y.end(), std::ref(f32rng));
250 } else {
251 std::generate(x.begin(), x.end(), std::ref(f32rng));
252 std::fill(y.begin(), y.end(), nanf(""));
253 }
254 const float* x_data = inplace() ? y.data() : x.data();
255
256 // Compute reference results.
257 for (size_t i = 0; i < batch_size(); i++) {
258 y_ref[i] = std::signbit(x_data[i]) ? alpha() * std::expm1(double(x_data[i]) * prescale()) : double(x_data[i]) * beta();
259 }
260
261 // Prepare parameters.
262 union xnn_f32_elu_params params;
263 init_params(&params, prescale(), alpha(), beta());
264
265 // Call optimized micro-kernel.
266 velu(batch_size() * sizeof(float), x_data, y.data(), &params);
267
268 // Verify results.
269 for (size_t i = 0; i < batch_size(); i++) {
270 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
271 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
272 }
273 }
274 }
275
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800276 void Test(xnn_f32_vhswish_ukernel_function vhswish, xnn_init_f32_hswish_params_fn init_params) const {
277 std::random_device random_device;
278 auto rng = std::mt19937(random_device());
279 auto f32rng = std::bind(std::uniform_real_distribution<float>(-4.0f, 4.0f), std::ref(rng));
280
281 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
282 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
283 std::vector<double> y_ref(batch_size());
284 for (size_t iteration = 0; iteration < iterations(); iteration++) {
285 if (inplace()) {
286 std::generate(y.begin(), y.end(), std::ref(f32rng));
287 } else {
288 std::generate(x.begin(), x.end(), std::ref(f32rng));
289 std::fill(y.begin(), y.end(), nanf(""));
290 }
291 const float* x_data = inplace() ? y.data() : x.data();
292
293 // Compute reference results.
294 for (size_t i = 0; i < batch_size(); i++) {
295 y_ref[i] = (x_data[i] / 6.0f) * std::max(std::min(x_data[i] + 3.0f, 6.0f), 0.0f);
296 }
297
298 // Prepare parameters.
299 union xnn_f32_hswish_params params;
300 init_params(&params);
301
302 // Call optimized micro-kernel.
303 vhswish(batch_size() * sizeof(float), x_data, y.data(), &params);
304
305 // Verify results.
306 for (size_t i = 0; i < batch_size(); i++) {
307 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
308 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
309 }
310 }
311 }
312
Marat Dukhan2894e992021-12-30 08:29:48 -0800313 void Test(xnn_f32_vlrelu_ukernel_function vlrelu, xnn_init_f32_lrelu_params_fn init_params) const {
314 std::random_device random_device;
315 auto rng = std::mt19937(random_device());
316 auto f32rng = std::bind(std::uniform_real_distribution<float>(-125.0f, 125.0f), std::ref(rng));
317
318 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
319 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
320 std::vector<double> y_ref(batch_size());
321 for (size_t iteration = 0; iteration < iterations(); iteration++) {
322 if (inplace()) {
323 std::generate(y.begin(), y.end(), std::ref(f32rng));
324 } else {
325 std::generate(x.begin(), x.end(), std::ref(f32rng));
326 std::fill(y.begin(), y.end(), nanf(""));
327 }
328 const float* x_data = inplace() ? y.data() : x.data();
329
330 // Compute reference results.
331 for (size_t i = 0; i < batch_size(); i++) {
332 y_ref[i] = std::signbit(x_data[i]) ? x_data[i] * slope() : x_data[i];
333 }
334
335 // Prepare parameters.
336 union xnn_f32_lrelu_params params;
337 init_params(&params, slope());
338
339 // Call optimized micro-kernel.
340 vlrelu(batch_size() * sizeof(float), x_data, y.data(), &params);
341
342 // Verify results.
343 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhane72b2822021-12-30 14:46:58 -0800344 ASSERT_EQ(y[i], y_ref[i])
345 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
346 }
347 }
348 }
349
Marat Dukhane5efb162021-12-31 10:26:13 -0800350 void Test(xnn_f32_vneg_ukernel_function vneg, xnn_init_f32_neg_params_fn init_params = nullptr) const {
351 std::random_device random_device;
352 auto rng = std::mt19937(random_device());
353 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::ref(rng));
354
355 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
356 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
357 std::vector<float> y_ref(batch_size());
358 for (size_t iteration = 0; iteration < iterations(); iteration++) {
359 if (inplace()) {
360 std::generate(y.begin(), y.end(), std::ref(f32rng));
361 } else {
362 std::generate(x.begin(), x.end(), std::ref(f32rng));
363 std::fill(y.begin(), y.end(), nanf(""));
364 }
365 const float* x_data = inplace() ? y.data() : x.data();
366
367 // Compute reference results.
368 for (size_t i = 0; i < batch_size(); i++) {
369 y_ref[i] = -x_data[i];
370 }
371
372 // Prepare parameters.
373 union xnn_f32_neg_params params;
374 if (init_params != nullptr) {
375 init_params(&params);
376 }
377
378 // Call optimized micro-kernel.
379 vneg(batch_size() * sizeof(float), x_data, y.data(), &params);
380
381 // Verify results.
382 for (size_t i = 0; i < batch_size(); i++) {
383 ASSERT_EQ(y[i], y_ref[i])
384 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
385 }
386 }
387 }
388
Marat Dukhan0e801372022-01-04 00:10:41 -0800389 void Test(xnn_f32_vround_ukernel_function vrnd, OpType op_type, xnn_init_f32_rnd_params_fn init_params = nullptr) const {
390 std::random_device random_device;
391 auto rng = std::mt19937(random_device());
392 auto distribution = std::uniform_real_distribution<float>(-5.0f, 5.0f);
393 auto f32rng = std::bind(distribution, std::ref(rng));
394
395 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
396 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
397 std::vector<float> y_ref(batch_size());
398 for (size_t iteration = 0; iteration < iterations(); iteration++) {
399 if (inplace()) {
400 std::generate(y.begin(), y.end(), std::ref(f32rng));
401 } else {
402 std::generate(x.begin(), x.end(), std::ref(f32rng));
403 std::fill(y.begin(), y.end(), nanf(""));
404 }
405 const float* x_data = inplace() ? y.data() : x.data();
406
407 // Compute reference results.
408 for (size_t i = 0; i < batch_size(); i++) {
409 switch (op_type) {
410 case OpType::RoundToNearestEven:
411 y_ref[i] = std::nearbyint(double(x_data[i]));
412 break;
413 case OpType::RoundTowardsZero:
414 y_ref[i] = std::trunc(double(x_data[i]));
415 break;
416 case OpType::RoundUp:
417 y_ref[i] = std::ceil(double(x_data[i]));
418 break;
419 case OpType::RoundDown:
420 y_ref[i] = std::floor(double(x_data[i]));
421 break;
422 default:
423 GTEST_FAIL() << "Unexpected operation type";
424 return;
425 }
426 }
427
428 // Prepare parameters.
429 xnn_f32_rnd_params params;
430 if (init_params != nullptr) {
431 init_params(&params);
432 }
433
434 // Call optimized micro-kernel.
435 vrnd(batch_size() * sizeof(float), x_data, y.data(), &params);
436
437 // Verify results.
438 for (size_t i = 0; i < batch_size(); i++) {
439 ASSERT_EQ(y[i], y_ref[i])
440 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
441 }
442 }
443 }
444
Marat Dukhance834ad2022-01-03 00:22:01 -0800445 void Test(xnn_f32_vsigmoid_ukernel_function vsigmoid, xnn_init_f32_sigmoid_params_fn init_params) const {
446 std::random_device random_device;
447 auto rng = std::mt19937(random_device());
448 auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
449 auto f32rng = std::bind(distribution, std::ref(rng));
450
451 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
452 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
453 std::vector<double> y_ref(batch_size());
454 for (size_t iteration = 0; iteration < iterations(); iteration++) {
455 if (inplace()) {
456 std::generate(y.begin(), y.end(), std::ref(f32rng));
457 } else {
458 std::generate(x.begin(), x.end(), std::ref(f32rng));
459 std::fill(y.begin(), y.end(), nanf(""));
460 }
461 const float* x_data = inplace() ? y.data() : x.data();
462
463 // Compute reference results.
464 for (size_t i = 0; i < batch_size(); i++) {
465 const double e = std::exp(double(x_data[i]));
466 y_ref[i] = e / (1.0 + e);
467 }
468
469 // Prepare parameters.
470 union xnn_f32_sigmoid_params params;
471 init_params(&params);
472
473 // Call optimized micro-kernel.
474 vsigmoid(batch_size() * sizeof(float), x_data, y.data(), &params);
475
476 // Verify results.
477 for (size_t i = 0; i < batch_size(); i++) {
478 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
479 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
480 }
481 }
482 }
483
Marat Dukhane5efb162021-12-31 10:26:13 -0800484 void Test(xnn_f32_vsqr_ukernel_function vsqr, xnn_init_f32_default_params_fn init_params = nullptr) const {
485 std::random_device random_device;
486 auto rng = std::mt19937(random_device());
487 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
488
489 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
490 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
491 std::vector<float> y_ref(batch_size());
492 for (size_t iteration = 0; iteration < iterations(); iteration++) {
493 if (inplace()) {
494 std::generate(y.begin(), y.end(), std::ref(f32rng));
495 } else {
496 std::generate(x.begin(), x.end(), std::ref(f32rng));
497 std::fill(y.begin(), y.end(), nanf(""));
498 }
499 const float* x_data = inplace() ? y.data() : x.data();
500
501 // Compute reference results.
502 for (size_t i = 0; i < batch_size(); i++) {
503 y_ref[i] = x_data[i] * x_data[i];
504 }
505
506 // Prepare parameters.
507 union xnn_f32_default_params params;
508 if (init_params != nullptr) {
509 init_params(&params);
510 }
511
512 // Call optimized micro-kernel.
513 vsqr(batch_size() * sizeof(float), x_data, y.data(), &params);
514
515 // Verify results.
516 for (size_t i = 0; i < batch_size(); i++) {
517 ASSERT_EQ(y[i], y_ref[i])
518 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
519 }
520 }
521 }
522
Marat Dukhane72b2822021-12-30 14:46:58 -0800523 void Test(xnn_f32_vsqrt_ukernel_function vsqrt, xnn_init_f32_sqrt_params_fn init_params = nullptr) const {
524 std::random_device random_device;
525 auto rng = std::mt19937(random_device());
526 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 10.0f), std::ref(rng));
527
528 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
529 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
530 std::vector<float> y_ref(batch_size());
531 for (size_t iteration = 0; iteration < iterations(); iteration++) {
532 if (inplace()) {
533 std::generate(y.begin(), y.end(), std::ref(f32rng));
534 } else {
535 std::generate(x.begin(), x.end(), std::ref(f32rng));
536 std::fill(y.begin(), y.end(), nanf(""));
537 }
538 const float* x_data = inplace() ? y.data() : x.data();
539
540 // Compute reference results.
541 for (size_t i = 0; i < batch_size(); i++) {
542 y_ref[i] = std::sqrt(x_data[i]);
543 }
544
545 // Prepare parameters.
546 union xnn_f32_sqrt_params params;
547 if (init_params != nullptr) {
548 init_params(&params);
549 }
550
551 // Call optimized micro-kernel.
552 vsqrt(batch_size() * sizeof(float), x_data, y.data(), init_params != nullptr ? &params : nullptr);
553
554 // Verify results.
555 for (size_t i = 0; i < batch_size(); i++) {
556 ASSERT_EQ(y[i], y_ref[i])
Marat Dukhan2894e992021-12-30 08:29:48 -0800557 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
558 }
559 }
560 }
561
Marat Dukhan94912792021-08-16 21:40:30 -0700562 inline void Test(xnn_f32_vabs_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
Marat Dukhan6eaab712021-05-13 15:20:58 -0700563 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
564 }
565
566 inline void Test(xnn_f32_velu_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
567 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
568 }
569
Marat Dukhan6eaab712021-05-13 15:20:58 -0700570 inline void Test(xnn_f32_vneg_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
571 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
572 }
573
574 inline void Test(xnn_f32_vrelu_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
575 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
576 }
577
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800578 void Test(xnn_f16_vclamp_ukernel_function vclamp, xnn_init_f16_minmax_params_fn init_params) const {
579 std::random_device random_device;
580 auto rng = std::mt19937(random_device());
581 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), std::ref(rng));
582 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
583
584 std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
585 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
586 std::vector<float> y_ref(batch_size());
587 for (size_t iteration = 0; iteration < iterations(); iteration++) {
588 std::generate(x.begin(), x.end(), std::ref(f16rng));
589 if (inplace()) {
590 std::generate(y.begin(), y.end(), std::ref(f16rng));
591 } else {
592 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
593 }
594 const uint16_t* x_data = inplace() ? y.data() : x.data();
595
596 // Compute reference results.
597 for (size_t i = 0; i < batch_size(); i++) {
598 y_ref[i] = std::max(std::min(fp16_ieee_to_fp32_value(x_data[i]), float(qmax())), float(qmin()));
599 }
600
601 // Prepare parameters.
Marat Dukhan14dd8d02022-01-06 16:03:31 -0800602 union xnn_f16_minmax_params params;
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800603 init_params(&params, fp16_ieee_from_fp32_value(float(qmin())), fp16_ieee_from_fp32_value(float(qmax())));
604
605 // Call optimized micro-kernel.
606 vclamp(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
607
608 // Verify results.
609 for (size_t i = 0; i < batch_size(); i++) {
610 ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
611 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
612 }
613 }
Marat Dukhana6c05162021-05-13 16:52:02 -0700614 }
615
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800616 void Test(xnn_f16_vhswish_ukernel_function vhswish, xnn_init_f16_hswish_params_fn init_params) const {
617 std::random_device random_device;
618 auto rng = std::mt19937(random_device());
619 auto f32rng = std::bind(std::uniform_real_distribution<float>(-4.0f, 4.0f), std::ref(rng));
620 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
621
622 std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
623 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
624 std::vector<float> y_ref(batch_size());
625 for (size_t iteration = 0; iteration < iterations(); iteration++) {
626 std::generate(x.begin(), x.end(), std::ref(f16rng));
627 if (inplace()) {
628 std::generate(y.begin(), y.end(), std::ref(f16rng));
629 } else {
630 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
631 }
632 const uint16_t* x_data = inplace() ? y.data() : x.data();
633
634 // Compute reference results.
635 for (size_t i = 0; i < batch_size(); i++) {
636 const float x_value = fp16_ieee_to_fp32_value(x_data[i]);
637 y_ref[i] = (x_value / 6.0f) * std::max(std::min(x_value + 3.0f, 6.0f), 0.0f);
638 }
639
640 // Prepare parameters.
Marat Dukhan14dd8d02022-01-06 16:03:31 -0800641 union xnn_f16_hswish_params params;
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800642 init_params(&params);
643
644 // Call optimized micro-kernel.
645 vhswish(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
646
647 // Verify results.
648 for (size_t i = 0; i < batch_size(); i++) {
649 ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
650 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
651 }
652 }
Marat Dukhana6c05162021-05-13 16:52:02 -0700653 }
654
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800655 void Test(xnn_s8_vclamp_ukernel_function vclamp, xnn_init_s8_minmax_params_fn init_params) const {
Marat Dukhane79acb72021-08-16 19:03:53 -0700656 std::random_device random_device;
657 auto rng = std::mt19937(random_device());
658 auto i8rng = std::bind(
659 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
660 std::ref(rng));
661
662 std::vector<int8_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t));
663 std::vector<int8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0));
664 std::vector<int8_t> y_ref(batch_size());
665 for (size_t iteration = 0; iteration < iterations(); iteration++) {
666 std::generate(x.begin(), x.end(), std::ref(i8rng));
667 if (inplace()) {
668 std::copy(x.cbegin(), x.cend(), y.begin());
669 } else {
670 std::fill(y.begin(), y.end(), INT8_C(0xA5));
671 }
672 const int8_t* x_data = inplace() ? y.data() : x.data();
673
674 // Compute reference results.
675 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800676 y_ref[i] = std::min(std::max(x_data[i], int8_t(qmin() - 0x80)), int8_t(qmax() - 0x80));
Marat Dukhane79acb72021-08-16 19:03:53 -0700677 }
678
679 // Prepare parameters.
680 union xnn_s8_minmax_params params;
681 init_params(&params, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
682
683 // Call optimized micro-kernel.
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800684 vclamp(batch_size() * sizeof(int8_t), x_data, y.data(), &params);
Marat Dukhane79acb72021-08-16 19:03:53 -0700685
686 // Verify results.
687 for (size_t i = 0; i < batch_size(); i++) {
688 ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i]))
689 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << int32_t(x[i]);
690 }
691 }
692 }
693
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800694 void Test(xnn_u8_vclamp_ukernel_function vclamp, xnn_init_u8_minmax_params_fn init_params) const {
Marat Dukhana6c05162021-05-13 16:52:02 -0700695 std::random_device random_device;
696 auto rng = std::mt19937(random_device());
Marat Dukhan1f5b1082021-08-16 17:01:44 -0700697 auto u8rng = std::bind(
698 std::uniform_int_distribution<int32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
Marat Dukhana6c05162021-05-13 16:52:02 -0700699
700 std::vector<uint8_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
701 std::vector<uint8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0));
702 std::vector<uint8_t> y_ref(batch_size());
703 for (size_t iteration = 0; iteration < iterations(); iteration++) {
704 std::generate(x.begin(), x.end(), std::ref(u8rng));
705 if (inplace()) {
706 std::copy(x.cbegin(), x.cend(), y.begin());
707 } else {
708 std::fill(y.begin(), y.end(), UINT8_C(0xA5));
709 }
710 const uint8_t* x_data = inplace() ? y.data() : x.data();
711
712 // Compute reference results.
713 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800714 y_ref[i] = std::min(std::max(x_data[i], qmin()), qmax());
Marat Dukhana6c05162021-05-13 16:52:02 -0700715 }
716
717 // Prepare parameters.
Marat Dukhan1f5b1082021-08-16 17:01:44 -0700718 union xnn_u8_minmax_params params;
719 init_params(&params, qmin(), qmax());
Marat Dukhana6c05162021-05-13 16:52:02 -0700720
721 // Call optimized micro-kernel.
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800722 vclamp(batch_size() * sizeof(uint8_t), x_data, y.data(), &params);
Marat Dukhana6c05162021-05-13 16:52:02 -0700723
724 // Verify results.
725 for (size_t i = 0; i < batch_size(); i++) {
726 ASSERT_EQ(uint32_t(y_ref[i]), uint32_t(y[i]))
727 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << uint32_t(x[i]);
728 }
729 }
730 }
731
Marat Dukhan346a9e52019-11-15 09:06:30 -0800732 private:
Marat Dukhaned6baaf2020-12-01 15:07:08 -0800733 size_t batch_size_ = 1;
734 bool inplace_ = false;
735 float slope_ = 0.5f;
736 float prescale_ = 1.0f;
737 float alpha_ = 1.0f;
738 float beta_ = 1.0f;
739 uint8_t qmin_ = 0;
740 uint8_t qmax_ = 255;
741 size_t iterations_ = 15;
Marat Dukhan346a9e52019-11-15 09:06:30 -0800742};