blob: 50bfab7755243a4cfc5b7769ae966c35ab7b1eba [file] [log] [blame]
Marat Dukhan346a9e52019-11-15 09:06:30 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#pragma once
7
8#include <gtest/gtest.h>
9
10#include <algorithm>
11#include <cassert>
12#include <cstddef>
13#include <cstdlib>
14#include <functional>
15#include <random>
16#include <vector>
17
Marat Dukhanfcfdd2d2021-06-29 18:57:02 -070018#include <fp16.h>
19
Marat Dukhan346a9e52019-11-15 09:06:30 -080020#include <xnnpack.h>
21#include <xnnpack/params-init.h>
22#include <xnnpack/params.h>
23
24
Marat Dukhan87ed45c2021-05-13 12:25:22 -070025class VUnaryMicrokernelTester {
Marat Dukhan346a9e52019-11-15 09:06:30 -080026 public:
27 enum class OpType {
Frank Barchardfb158e22020-07-15 16:10:10 -070028 ReLU,
Marat Dukhaneecf8fd2020-06-09 08:59:37 -070029 RoundToNearestEven,
30 RoundTowardsZero,
31 RoundUp,
32 RoundDown,
Marat Dukhan346a9e52019-11-15 09:06:30 -080033 };
34
35 enum class Variant {
36 Native,
37 Scalar,
38 };
39
Marat Dukhan87ed45c2021-05-13 12:25:22 -070040 inline VUnaryMicrokernelTester& batch_size(size_t batch_size) {
Marat Dukhan346a9e52019-11-15 09:06:30 -080041 assert(batch_size != 0);
42 this->batch_size_ = batch_size;
43 return *this;
44 }
45
46 inline size_t batch_size() const {
47 return this->batch_size_;
48 }
49
Marat Dukhan87ed45c2021-05-13 12:25:22 -070050 inline VUnaryMicrokernelTester& inplace(bool inplace) {
Marat Dukhan346a9e52019-11-15 09:06:30 -080051 this->inplace_ = inplace;
52 return *this;
53 }
54
55 inline bool inplace() const {
56 return this->inplace_;
57 }
58
Marat Dukhan87ed45c2021-05-13 12:25:22 -070059 inline VUnaryMicrokernelTester& slope(float slope) {
Marat Dukhan8cc7efe2020-06-10 16:24:27 -070060 this->slope_ = slope;
61 return *this;
62 }
63
64 inline float slope() const {
65 return this->slope_;
66 }
67
Marat Dukhan87ed45c2021-05-13 12:25:22 -070068 inline VUnaryMicrokernelTester& prescale(float prescale) {
Marat Dukhaned6baaf2020-12-01 15:07:08 -080069 this->prescale_ = prescale;
70 return *this;
71 }
72
73 inline float prescale() const {
74 return this->prescale_;
75 }
76
Marat Dukhan87ed45c2021-05-13 12:25:22 -070077 inline VUnaryMicrokernelTester& alpha(float alpha) {
Marat Dukhaned6baaf2020-12-01 15:07:08 -080078 this->alpha_ = alpha;
79 return *this;
80 }
81
82 inline float alpha() const {
83 return this->alpha_;
84 }
85
Marat Dukhan87ed45c2021-05-13 12:25:22 -070086 inline VUnaryMicrokernelTester& beta(float beta) {
Marat Dukhaned6baaf2020-12-01 15:07:08 -080087 this->beta_ = beta;
88 return *this;
89 }
90
91 inline float beta() const {
92 return this->beta_;
93 }
94
Marat Dukhan87ed45c2021-05-13 12:25:22 -070095 inline VUnaryMicrokernelTester& qmin(uint8_t qmin) {
Marat Dukhan346a9e52019-11-15 09:06:30 -080096 this->qmin_ = qmin;
97 return *this;
98 }
99
100 inline uint8_t qmin() const {
101 return this->qmin_;
102 }
103
Marat Dukhan87ed45c2021-05-13 12:25:22 -0700104 inline VUnaryMicrokernelTester& qmax(uint8_t qmax) {
Marat Dukhan346a9e52019-11-15 09:06:30 -0800105 this->qmax_ = qmax;
106 return *this;
107 }
108
109 inline uint8_t qmax() const {
110 return this->qmax_;
111 }
112
Marat Dukhan87ed45c2021-05-13 12:25:22 -0700113 inline VUnaryMicrokernelTester& iterations(size_t iterations) {
Marat Dukhan346a9e52019-11-15 09:06:30 -0800114 this->iterations_ = iterations;
115 return *this;
116 }
117
118 inline size_t iterations() const {
119 return this->iterations_;
120 }
121
Marat Dukhan1e782c42019-11-21 17:02:40 -0800122 void Test(xnn_f32_vunary_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
Marat Dukhan346a9e52019-11-15 09:06:30 -0800123 std::random_device random_device;
124 auto rng = std::mt19937(random_device());
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700125 auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700126 auto f32rng = std::bind(distribution, std::ref(rng));
Marat Dukhan346a9e52019-11-15 09:06:30 -0800127
128 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
129 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
130 std::vector<double> y_ref(batch_size());
131 for (size_t iteration = 0; iteration < iterations(); iteration++) {
132 if (inplace()) {
133 std::generate(y.begin(), y.end(), std::ref(f32rng));
134 } else {
135 std::generate(x.begin(), x.end(), std::ref(f32rng));
136 std::fill(y.begin(), y.end(), nanf(""));
137 }
138 const float* x_data = inplace() ? y.data() : x.data();
139
140 // Compute reference results.
141 for (size_t i = 0; i < batch_size(); i++) {
142 switch (op_type) {
Frank Barchardfb158e22020-07-15 16:10:10 -0700143 case OpType::ReLU:
144 y_ref[i] = std::max(x_data[i], 0.0f);
145 break;
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700146 case OpType::RoundToNearestEven:
147 y_ref[i] = std::nearbyint(double(x_data[i]));
148 break;
149 case OpType::RoundTowardsZero:
150 y_ref[i] = std::trunc(double(x_data[i]));
151 break;
152 case OpType::RoundUp:
153 y_ref[i] = std::ceil(double(x_data[i]));
154 break;
155 case OpType::RoundDown:
156 y_ref[i] = std::floor(double(x_data[i]));
157 break;
Marat Dukhan346a9e52019-11-15 09:06:30 -0800158 }
159 }
Marat Dukhan346a9e52019-11-15 09:06:30 -0800160
Frank Barchard9f3a8432020-06-02 13:59:35 -0700161 // Prepare parameters.
Marat Dukhan2b9efd82020-06-08 01:09:31 -0700162 union {
Marat Dukhan60d3f242021-05-13 11:59:02 -0700163 union xnn_f32_relu_params relu;
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700164 union xnn_f32_rnd_params rnd;
Marat Dukhan2b9efd82020-06-08 01:09:31 -0700165 } params;
166 switch (op_type) {
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700167 case OpType::RoundToNearestEven:
168 case OpType::RoundTowardsZero:
169 case OpType::RoundUp:
170 case OpType::RoundDown:
171 switch (variant) {
172 case Variant::Native:
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700173 xnn_init_f32_rnd_params(&params.rnd);
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700174 break;
175 case Variant::Scalar:
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700176 xnn_init_scalar_f32_rnd_params(&params.rnd);
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700177 break;
178 }
179 break;
Frank Barchardfb158e22020-07-15 16:10:10 -0700180 case OpType::ReLU:
Marat Dukhan346a9e52019-11-15 09:06:30 -0800181 break;
182 }
183
184 // Call optimized micro-kernel.
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700185 vunary(batch_size() * sizeof(float), x_data, y.data(), &params);
Marat Dukhan346a9e52019-11-15 09:06:30 -0800186
187 // Verify results.
188 for (size_t i = 0; i < batch_size(); i++) {
Frank Barchard2b9d29b2020-09-17 12:03:39 -0700189 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800190 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
Marat Dukhan346a9e52019-11-15 09:06:30 -0800191 }
192 }
193 }
194
Marat Dukhane5efb162021-12-31 10:26:13 -0800195 void Test(xnn_f32_vabs_ukernel_function vabs, xnn_init_f32_abs_params_fn init_params = nullptr) const {
196 std::random_device random_device;
197 auto rng = std::mt19937(random_device());
198 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::ref(rng));
199
200 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
201 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
202 std::vector<float> y_ref(batch_size());
203 for (size_t iteration = 0; iteration < iterations(); iteration++) {
204 if (inplace()) {
205 std::generate(y.begin(), y.end(), std::ref(f32rng));
206 } else {
207 std::generate(x.begin(), x.end(), std::ref(f32rng));
208 std::fill(y.begin(), y.end(), nanf(""));
209 }
210 const float* x_data = inplace() ? y.data() : x.data();
211
212 // Compute reference results.
213 for (size_t i = 0; i < batch_size(); i++) {
214 y_ref[i] = std::abs(x_data[i]);
215 }
216
217 // Prepare parameters.
218 union xnn_f32_abs_params params;
219 if (init_params != nullptr) {
220 init_params(&params);
221 }
222
223 // Call optimized micro-kernel.
224 vabs(batch_size() * sizeof(float), x_data, y.data(), &params);
225
226 // Verify results.
227 for (size_t i = 0; i < batch_size(); i++) {
228 ASSERT_EQ(y[i], y_ref[i])
229 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
230 }
231 }
232 }
233
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800234 void Test(xnn_f32_vclamp_ukernel_function vclamp, xnn_init_f32_minmax_params_fn init_params) const {
Marat Dukhan94912792021-08-16 21:40:30 -0700235 std::random_device random_device;
236 auto rng = std::mt19937(random_device());
237 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), std::ref(rng));
238
239 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
240 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
241 std::vector<float> y_ref(batch_size());
242 for (size_t iteration = 0; iteration < iterations(); iteration++) {
243 if (inplace()) {
244 std::generate(y.begin(), y.end(), std::ref(f32rng));
245 } else {
246 std::generate(x.begin(), x.end(), std::ref(f32rng));
247 std::fill(y.begin(), y.end(), nanf(""));
248 }
249 const float* x_data = inplace() ? y.data() : x.data();
250
251 // Compute reference results.
252 for (size_t i = 0; i < batch_size(); i++) {
253 y_ref[i] = std::max(std::min(x_data[i], float(qmax())), float(qmin()));
254 }
255
256 // Prepare parameters.
257 union xnn_f32_minmax_params params;
258 init_params(&params, float(qmin()), float(qmax()));
259
260 // Call optimized micro-kernel.
261 vclamp(batch_size() * sizeof(float), x_data, y.data(), &params);
262
263 // Verify results.
264 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhane72b2822021-12-30 14:46:58 -0800265 ASSERT_EQ(y[i], y_ref[i])
Marat Dukhan94912792021-08-16 21:40:30 -0700266 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
267 }
268 }
Marat Dukhan6eaab712021-05-13 15:20:58 -0700269 }
270
Marat Dukhan4a79ff22022-01-01 12:16:48 -0800271 void Test(xnn_f32_velu_ukernel_function velu, xnn_init_f32_elu_params_fn init_params) const {
272 std::random_device random_device;
273 auto rng = std::mt19937(random_device());
274 auto f32rng = std::bind(std::uniform_real_distribution<float>(-20.0f, 20.0f), std::ref(rng));
275
276 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
277 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
278 std::vector<double> y_ref(batch_size());
279 for (size_t iteration = 0; iteration < iterations(); iteration++) {
280 if (inplace()) {
281 std::generate(y.begin(), y.end(), std::ref(f32rng));
282 } else {
283 std::generate(x.begin(), x.end(), std::ref(f32rng));
284 std::fill(y.begin(), y.end(), nanf(""));
285 }
286 const float* x_data = inplace() ? y.data() : x.data();
287
288 // Compute reference results.
289 for (size_t i = 0; i < batch_size(); i++) {
290 y_ref[i] = std::signbit(x_data[i]) ? alpha() * std::expm1(double(x_data[i]) * prescale()) : double(x_data[i]) * beta();
291 }
292
293 // Prepare parameters.
294 union xnn_f32_elu_params params;
295 init_params(&params, prescale(), alpha(), beta());
296
297 // Call optimized micro-kernel.
298 velu(batch_size() * sizeof(float), x_data, y.data(), &params);
299
300 // Verify results.
301 for (size_t i = 0; i < batch_size(); i++) {
302 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
303 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
304 }
305 }
306 }
307
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800308 void Test(xnn_f32_vhswish_ukernel_function vhswish, xnn_init_f32_hswish_params_fn init_params) const {
309 std::random_device random_device;
310 auto rng = std::mt19937(random_device());
311 auto f32rng = std::bind(std::uniform_real_distribution<float>(-4.0f, 4.0f), std::ref(rng));
312
313 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
314 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
315 std::vector<double> y_ref(batch_size());
316 for (size_t iteration = 0; iteration < iterations(); iteration++) {
317 if (inplace()) {
318 std::generate(y.begin(), y.end(), std::ref(f32rng));
319 } else {
320 std::generate(x.begin(), x.end(), std::ref(f32rng));
321 std::fill(y.begin(), y.end(), nanf(""));
322 }
323 const float* x_data = inplace() ? y.data() : x.data();
324
325 // Compute reference results.
326 for (size_t i = 0; i < batch_size(); i++) {
327 y_ref[i] = (x_data[i] / 6.0f) * std::max(std::min(x_data[i] + 3.0f, 6.0f), 0.0f);
328 }
329
330 // Prepare parameters.
331 union xnn_f32_hswish_params params;
332 init_params(&params);
333
334 // Call optimized micro-kernel.
335 vhswish(batch_size() * sizeof(float), x_data, y.data(), &params);
336
337 // Verify results.
338 for (size_t i = 0; i < batch_size(); i++) {
339 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
340 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
341 }
342 }
343 }
344
Marat Dukhan2894e992021-12-30 08:29:48 -0800345 void Test(xnn_f32_vlrelu_ukernel_function vlrelu, xnn_init_f32_lrelu_params_fn init_params) const {
346 std::random_device random_device;
347 auto rng = std::mt19937(random_device());
348 auto f32rng = std::bind(std::uniform_real_distribution<float>(-125.0f, 125.0f), std::ref(rng));
349
350 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
351 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
352 std::vector<double> y_ref(batch_size());
353 for (size_t iteration = 0; iteration < iterations(); iteration++) {
354 if (inplace()) {
355 std::generate(y.begin(), y.end(), std::ref(f32rng));
356 } else {
357 std::generate(x.begin(), x.end(), std::ref(f32rng));
358 std::fill(y.begin(), y.end(), nanf(""));
359 }
360 const float* x_data = inplace() ? y.data() : x.data();
361
362 // Compute reference results.
363 for (size_t i = 0; i < batch_size(); i++) {
364 y_ref[i] = std::signbit(x_data[i]) ? x_data[i] * slope() : x_data[i];
365 }
366
367 // Prepare parameters.
368 union xnn_f32_lrelu_params params;
369 init_params(&params, slope());
370
371 // Call optimized micro-kernel.
372 vlrelu(batch_size() * sizeof(float), x_data, y.data(), &params);
373
374 // Verify results.
375 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhane72b2822021-12-30 14:46:58 -0800376 ASSERT_EQ(y[i], y_ref[i])
377 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
378 }
379 }
380 }
381
Marat Dukhane5efb162021-12-31 10:26:13 -0800382 void Test(xnn_f32_vneg_ukernel_function vneg, xnn_init_f32_neg_params_fn init_params = nullptr) const {
383 std::random_device random_device;
384 auto rng = std::mt19937(random_device());
385 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::ref(rng));
386
387 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
388 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
389 std::vector<float> y_ref(batch_size());
390 for (size_t iteration = 0; iteration < iterations(); iteration++) {
391 if (inplace()) {
392 std::generate(y.begin(), y.end(), std::ref(f32rng));
393 } else {
394 std::generate(x.begin(), x.end(), std::ref(f32rng));
395 std::fill(y.begin(), y.end(), nanf(""));
396 }
397 const float* x_data = inplace() ? y.data() : x.data();
398
399 // Compute reference results.
400 for (size_t i = 0; i < batch_size(); i++) {
401 y_ref[i] = -x_data[i];
402 }
403
404 // Prepare parameters.
405 union xnn_f32_neg_params params;
406 if (init_params != nullptr) {
407 init_params(&params);
408 }
409
410 // Call optimized micro-kernel.
411 vneg(batch_size() * sizeof(float), x_data, y.data(), &params);
412
413 // Verify results.
414 for (size_t i = 0; i < batch_size(); i++) {
415 ASSERT_EQ(y[i], y_ref[i])
416 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
417 }
418 }
419 }
420
Marat Dukhance834ad2022-01-03 00:22:01 -0800421 void Test(xnn_f32_vsigmoid_ukernel_function vsigmoid, xnn_init_f32_sigmoid_params_fn init_params) const {
422 std::random_device random_device;
423 auto rng = std::mt19937(random_device());
424 auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
425 auto f32rng = std::bind(distribution, std::ref(rng));
426
427 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
428 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
429 std::vector<double> y_ref(batch_size());
430 for (size_t iteration = 0; iteration < iterations(); iteration++) {
431 if (inplace()) {
432 std::generate(y.begin(), y.end(), std::ref(f32rng));
433 } else {
434 std::generate(x.begin(), x.end(), std::ref(f32rng));
435 std::fill(y.begin(), y.end(), nanf(""));
436 }
437 const float* x_data = inplace() ? y.data() : x.data();
438
439 // Compute reference results.
440 for (size_t i = 0; i < batch_size(); i++) {
441 const double e = std::exp(double(x_data[i]));
442 y_ref[i] = e / (1.0 + e);
443 }
444
445 // Prepare parameters.
446 union xnn_f32_sigmoid_params params;
447 init_params(&params);
448
449 // Call optimized micro-kernel.
450 vsigmoid(batch_size() * sizeof(float), x_data, y.data(), &params);
451
452 // Verify results.
453 for (size_t i = 0; i < batch_size(); i++) {
454 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
455 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
456 }
457 }
458 }
459
Marat Dukhane5efb162021-12-31 10:26:13 -0800460 void Test(xnn_f32_vsqr_ukernel_function vsqr, xnn_init_f32_default_params_fn init_params = nullptr) const {
461 std::random_device random_device;
462 auto rng = std::mt19937(random_device());
463 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
464
465 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
466 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
467 std::vector<float> y_ref(batch_size());
468 for (size_t iteration = 0; iteration < iterations(); iteration++) {
469 if (inplace()) {
470 std::generate(y.begin(), y.end(), std::ref(f32rng));
471 } else {
472 std::generate(x.begin(), x.end(), std::ref(f32rng));
473 std::fill(y.begin(), y.end(), nanf(""));
474 }
475 const float* x_data = inplace() ? y.data() : x.data();
476
477 // Compute reference results.
478 for (size_t i = 0; i < batch_size(); i++) {
479 y_ref[i] = x_data[i] * x_data[i];
480 }
481
482 // Prepare parameters.
483 union xnn_f32_default_params params;
484 if (init_params != nullptr) {
485 init_params(&params);
486 }
487
488 // Call optimized micro-kernel.
489 vsqr(batch_size() * sizeof(float), x_data, y.data(), &params);
490
491 // Verify results.
492 for (size_t i = 0; i < batch_size(); i++) {
493 ASSERT_EQ(y[i], y_ref[i])
494 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
495 }
496 }
497 }
498
Marat Dukhane72b2822021-12-30 14:46:58 -0800499 void Test(xnn_f32_vsqrt_ukernel_function vsqrt, xnn_init_f32_sqrt_params_fn init_params = nullptr) const {
500 std::random_device random_device;
501 auto rng = std::mt19937(random_device());
502 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 10.0f), std::ref(rng));
503
504 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
505 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
506 std::vector<float> y_ref(batch_size());
507 for (size_t iteration = 0; iteration < iterations(); iteration++) {
508 if (inplace()) {
509 std::generate(y.begin(), y.end(), std::ref(f32rng));
510 } else {
511 std::generate(x.begin(), x.end(), std::ref(f32rng));
512 std::fill(y.begin(), y.end(), nanf(""));
513 }
514 const float* x_data = inplace() ? y.data() : x.data();
515
516 // Compute reference results.
517 for (size_t i = 0; i < batch_size(); i++) {
518 y_ref[i] = std::sqrt(x_data[i]);
519 }
520
521 // Prepare parameters.
522 union xnn_f32_sqrt_params params;
523 if (init_params != nullptr) {
524 init_params(&params);
525 }
526
527 // Call optimized micro-kernel.
528 vsqrt(batch_size() * sizeof(float), x_data, y.data(), init_params != nullptr ? &params : nullptr);
529
530 // Verify results.
531 for (size_t i = 0; i < batch_size(); i++) {
532 ASSERT_EQ(y[i], y_ref[i])
Marat Dukhan2894e992021-12-30 08:29:48 -0800533 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
534 }
535 }
536 }
537
Marat Dukhan94912792021-08-16 21:40:30 -0700538 inline void Test(xnn_f32_vabs_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
Marat Dukhan6eaab712021-05-13 15:20:58 -0700539 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
540 }
541
542 inline void Test(xnn_f32_velu_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
543 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
544 }
545
Marat Dukhan6eaab712021-05-13 15:20:58 -0700546 inline void Test(xnn_f32_vneg_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
547 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
548 }
549
550 inline void Test(xnn_f32_vrelu_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
551 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
552 }
553
554 inline void Test(xnn_f32_vround_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
555 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
556 }
557
Marat Dukhana6c05162021-05-13 16:52:02 -0700558 void Test(xnn_f16_vunary_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
559 std::random_device random_device;
560 auto rng = std::mt19937(random_device());
561 auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
Marat Dukhana6c05162021-05-13 16:52:02 -0700562 auto f32rng = std::bind(distribution, std::ref(rng));
563 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
564
565 std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
566 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
567 std::vector<float> y_ref(batch_size());
568 for (size_t iteration = 0; iteration < iterations(); iteration++) {
569 std::generate(x.begin(), x.end(), std::ref(f16rng));
570 if (inplace()) {
571 std::generate(y.begin(), y.end(), std::ref(f16rng));
572 } else {
573 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
574 }
575 const uint16_t* x_data = inplace() ? y.data() : x.data();
576
577 // Compute reference results.
578 for (size_t i = 0; i < batch_size(); i++) {
579 switch (op_type) {
Marat Dukhana6c05162021-05-13 16:52:02 -0700580 case OpType::ReLU:
581 y_ref[i] = std::max(fp16_ieee_to_fp32_value(x_data[i]), 0.0f);
582 break;
583 default:
584 GTEST_FAIL() << "Unexpected op type";
585 }
586 }
587
588 // Prepare parameters.
589 union {
Marat Dukhana6c05162021-05-13 16:52:02 -0700590 struct xnn_f16_minmax_params minmax;
591 } params;
592 switch (op_type) {
Marat Dukhana6c05162021-05-13 16:52:02 -0700593 case OpType::ReLU:
594 break;
595 default:
596 GTEST_FAIL() << "Unexpected op type";
597 }
598
599 // Call optimized micro-kernel.
600 vunary(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
601
602 // Verify results.
603 for (size_t i = 0; i < batch_size(); i++) {
604 ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
605 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
606 }
607 }
608 }
609
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800610 void Test(xnn_f16_vclamp_ukernel_function vclamp, xnn_init_f16_minmax_params_fn init_params) const {
611 std::random_device random_device;
612 auto rng = std::mt19937(random_device());
613 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), std::ref(rng));
614 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
615
616 std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
617 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
618 std::vector<float> y_ref(batch_size());
619 for (size_t iteration = 0; iteration < iterations(); iteration++) {
620 std::generate(x.begin(), x.end(), std::ref(f16rng));
621 if (inplace()) {
622 std::generate(y.begin(), y.end(), std::ref(f16rng));
623 } else {
624 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
625 }
626 const uint16_t* x_data = inplace() ? y.data() : x.data();
627
628 // Compute reference results.
629 for (size_t i = 0; i < batch_size(); i++) {
630 y_ref[i] = std::max(std::min(fp16_ieee_to_fp32_value(x_data[i]), float(qmax())), float(qmin()));
631 }
632
633 // Prepare parameters.
634 struct xnn_f16_minmax_params params;
635 init_params(&params, fp16_ieee_from_fp32_value(float(qmin())), fp16_ieee_from_fp32_value(float(qmax())));
636
637 // Call optimized micro-kernel.
638 vclamp(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
639
640 // Verify results.
641 for (size_t i = 0; i < batch_size(); i++) {
642 ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
643 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
644 }
645 }
Marat Dukhana6c05162021-05-13 16:52:02 -0700646 }
647
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800648 void Test(xnn_f16_vhswish_ukernel_function vhswish, xnn_init_f16_hswish_params_fn init_params) const {
649 std::random_device random_device;
650 auto rng = std::mt19937(random_device());
651 auto f32rng = std::bind(std::uniform_real_distribution<float>(-4.0f, 4.0f), std::ref(rng));
652 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
653
654 std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
655 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
656 std::vector<float> y_ref(batch_size());
657 for (size_t iteration = 0; iteration < iterations(); iteration++) {
658 std::generate(x.begin(), x.end(), std::ref(f16rng));
659 if (inplace()) {
660 std::generate(y.begin(), y.end(), std::ref(f16rng));
661 } else {
662 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
663 }
664 const uint16_t* x_data = inplace() ? y.data() : x.data();
665
666 // Compute reference results.
667 for (size_t i = 0; i < batch_size(); i++) {
668 const float x_value = fp16_ieee_to_fp32_value(x_data[i]);
669 y_ref[i] = (x_value / 6.0f) * std::max(std::min(x_value + 3.0f, 6.0f), 0.0f);
670 }
671
672 // Prepare parameters.
673 struct xnn_f16_hswish_params params;
674 init_params(&params);
675
676 // Call optimized micro-kernel.
677 vhswish(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
678
679 // Verify results.
680 for (size_t i = 0; i < batch_size(); i++) {
681 ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
682 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
683 }
684 }
Marat Dukhana6c05162021-05-13 16:52:02 -0700685 }
686
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800687 void Test(xnn_s8_vclamp_ukernel_function vclamp, xnn_init_s8_minmax_params_fn init_params) const {
Marat Dukhane79acb72021-08-16 19:03:53 -0700688 std::random_device random_device;
689 auto rng = std::mt19937(random_device());
690 auto i8rng = std::bind(
691 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
692 std::ref(rng));
693
694 std::vector<int8_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t));
695 std::vector<int8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0));
696 std::vector<int8_t> y_ref(batch_size());
697 for (size_t iteration = 0; iteration < iterations(); iteration++) {
698 std::generate(x.begin(), x.end(), std::ref(i8rng));
699 if (inplace()) {
700 std::copy(x.cbegin(), x.cend(), y.begin());
701 } else {
702 std::fill(y.begin(), y.end(), INT8_C(0xA5));
703 }
704 const int8_t* x_data = inplace() ? y.data() : x.data();
705
706 // Compute reference results.
707 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800708 y_ref[i] = std::min(std::max(x_data[i], int8_t(qmin() - 0x80)), int8_t(qmax() - 0x80));
Marat Dukhane79acb72021-08-16 19:03:53 -0700709 }
710
711 // Prepare parameters.
712 union xnn_s8_minmax_params params;
713 init_params(&params, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
714
715 // Call optimized micro-kernel.
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800716 vclamp(batch_size() * sizeof(int8_t), x_data, y.data(), &params);
Marat Dukhane79acb72021-08-16 19:03:53 -0700717
718 // Verify results.
719 for (size_t i = 0; i < batch_size(); i++) {
720 ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i]))
721 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << int32_t(x[i]);
722 }
723 }
724 }
725
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800726 void Test(xnn_u8_vclamp_ukernel_function vclamp, xnn_init_u8_minmax_params_fn init_params) const {
Marat Dukhana6c05162021-05-13 16:52:02 -0700727 std::random_device random_device;
728 auto rng = std::mt19937(random_device());
Marat Dukhan1f5b1082021-08-16 17:01:44 -0700729 auto u8rng = std::bind(
730 std::uniform_int_distribution<int32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
Marat Dukhana6c05162021-05-13 16:52:02 -0700731
732 std::vector<uint8_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
733 std::vector<uint8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0));
734 std::vector<uint8_t> y_ref(batch_size());
735 for (size_t iteration = 0; iteration < iterations(); iteration++) {
736 std::generate(x.begin(), x.end(), std::ref(u8rng));
737 if (inplace()) {
738 std::copy(x.cbegin(), x.cend(), y.begin());
739 } else {
740 std::fill(y.begin(), y.end(), UINT8_C(0xA5));
741 }
742 const uint8_t* x_data = inplace() ? y.data() : x.data();
743
744 // Compute reference results.
745 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800746 y_ref[i] = std::min(std::max(x_data[i], qmin()), qmax());
Marat Dukhana6c05162021-05-13 16:52:02 -0700747 }
748
749 // Prepare parameters.
Marat Dukhan1f5b1082021-08-16 17:01:44 -0700750 union xnn_u8_minmax_params params;
751 init_params(&params, qmin(), qmax());
Marat Dukhana6c05162021-05-13 16:52:02 -0700752
753 // Call optimized micro-kernel.
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800754 vclamp(batch_size() * sizeof(uint8_t), x_data, y.data(), &params);
Marat Dukhana6c05162021-05-13 16:52:02 -0700755
756 // Verify results.
757 for (size_t i = 0; i < batch_size(); i++) {
758 ASSERT_EQ(uint32_t(y_ref[i]), uint32_t(y[i]))
759 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << uint32_t(x[i]);
760 }
761 }
762 }
763
Marat Dukhan346a9e52019-11-15 09:06:30 -0800764 private:
Marat Dukhaned6baaf2020-12-01 15:07:08 -0800765 size_t batch_size_ = 1;
766 bool inplace_ = false;
767 float slope_ = 0.5f;
768 float prescale_ = 1.0f;
769 float alpha_ = 1.0f;
770 float beta_ = 1.0f;
771 uint8_t qmin_ = 0;
772 uint8_t qmax_ = 255;
773 size_t iterations_ = 15;
Marat Dukhan346a9e52019-11-15 09:06:30 -0800774};