blob: d56046066a72545268b3250a8737601597ea1e1c [file] [log] [blame]
Marat Dukhan346a9e52019-11-15 09:06:30 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#pragma once
7
8#include <gtest/gtest.h>
9
10#include <algorithm>
11#include <cassert>
12#include <cstddef>
13#include <cstdlib>
14#include <functional>
15#include <random>
16#include <vector>
17
Marat Dukhanfcfdd2d2021-06-29 18:57:02 -070018#include <fp16.h>
19
Marat Dukhan346a9e52019-11-15 09:06:30 -080020#include <xnnpack.h>
21#include <xnnpack/params-init.h>
22#include <xnnpack/params.h>
23
24
Marat Dukhan87ed45c2021-05-13 12:25:22 -070025class VUnaryMicrokernelTester {
Marat Dukhan346a9e52019-11-15 09:06:30 -080026 public:
27 enum class OpType {
Frank Barchardfb158e22020-07-15 16:10:10 -070028 ReLU,
Marat Dukhaneecf8fd2020-06-09 08:59:37 -070029 RoundToNearestEven,
30 RoundTowardsZero,
31 RoundUp,
32 RoundDown,
33 Square,
Marat Dukhan346a9e52019-11-15 09:06:30 -080034 Sigmoid,
35 };
36
37 enum class Variant {
38 Native,
39 Scalar,
40 };
41
Marat Dukhan87ed45c2021-05-13 12:25:22 -070042 inline VUnaryMicrokernelTester& batch_size(size_t batch_size) {
Marat Dukhan346a9e52019-11-15 09:06:30 -080043 assert(batch_size != 0);
44 this->batch_size_ = batch_size;
45 return *this;
46 }
47
48 inline size_t batch_size() const {
49 return this->batch_size_;
50 }
51
Marat Dukhan87ed45c2021-05-13 12:25:22 -070052 inline VUnaryMicrokernelTester& inplace(bool inplace) {
Marat Dukhan346a9e52019-11-15 09:06:30 -080053 this->inplace_ = inplace;
54 return *this;
55 }
56
57 inline bool inplace() const {
58 return this->inplace_;
59 }
60
Marat Dukhan87ed45c2021-05-13 12:25:22 -070061 inline VUnaryMicrokernelTester& slope(float slope) {
Marat Dukhan8cc7efe2020-06-10 16:24:27 -070062 this->slope_ = slope;
63 return *this;
64 }
65
66 inline float slope() const {
67 return this->slope_;
68 }
69
Marat Dukhan87ed45c2021-05-13 12:25:22 -070070 inline VUnaryMicrokernelTester& prescale(float prescale) {
Marat Dukhaned6baaf2020-12-01 15:07:08 -080071 this->prescale_ = prescale;
72 return *this;
73 }
74
75 inline float prescale() const {
76 return this->prescale_;
77 }
78
Marat Dukhan87ed45c2021-05-13 12:25:22 -070079 inline VUnaryMicrokernelTester& alpha(float alpha) {
Marat Dukhaned6baaf2020-12-01 15:07:08 -080080 this->alpha_ = alpha;
81 return *this;
82 }
83
84 inline float alpha() const {
85 return this->alpha_;
86 }
87
Marat Dukhan87ed45c2021-05-13 12:25:22 -070088 inline VUnaryMicrokernelTester& beta(float beta) {
Marat Dukhaned6baaf2020-12-01 15:07:08 -080089 this->beta_ = beta;
90 return *this;
91 }
92
93 inline float beta() const {
94 return this->beta_;
95 }
96
Marat Dukhan87ed45c2021-05-13 12:25:22 -070097 inline VUnaryMicrokernelTester& qmin(uint8_t qmin) {
Marat Dukhan346a9e52019-11-15 09:06:30 -080098 this->qmin_ = qmin;
99 return *this;
100 }
101
102 inline uint8_t qmin() const {
103 return this->qmin_;
104 }
105
Marat Dukhan87ed45c2021-05-13 12:25:22 -0700106 inline VUnaryMicrokernelTester& qmax(uint8_t qmax) {
Marat Dukhan346a9e52019-11-15 09:06:30 -0800107 this->qmax_ = qmax;
108 return *this;
109 }
110
111 inline uint8_t qmax() const {
112 return this->qmax_;
113 }
114
Marat Dukhan87ed45c2021-05-13 12:25:22 -0700115 inline VUnaryMicrokernelTester& iterations(size_t iterations) {
Marat Dukhan346a9e52019-11-15 09:06:30 -0800116 this->iterations_ = iterations;
117 return *this;
118 }
119
120 inline size_t iterations() const {
121 return this->iterations_;
122 }
123
Marat Dukhan1e782c42019-11-21 17:02:40 -0800124 void Test(xnn_f32_vunary_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
Marat Dukhan346a9e52019-11-15 09:06:30 -0800125 std::random_device random_device;
126 auto rng = std::mt19937(random_device());
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700127 auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700128 auto f32rng = std::bind(distribution, std::ref(rng));
Marat Dukhan346a9e52019-11-15 09:06:30 -0800129
130 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
131 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
132 std::vector<double> y_ref(batch_size());
133 for (size_t iteration = 0; iteration < iterations(); iteration++) {
134 if (inplace()) {
135 std::generate(y.begin(), y.end(), std::ref(f32rng));
136 } else {
137 std::generate(x.begin(), x.end(), std::ref(f32rng));
138 std::fill(y.begin(), y.end(), nanf(""));
139 }
140 const float* x_data = inplace() ? y.data() : x.data();
141
142 // Compute reference results.
143 for (size_t i = 0; i < batch_size(); i++) {
144 switch (op_type) {
Frank Barchardfb158e22020-07-15 16:10:10 -0700145 case OpType::ReLU:
146 y_ref[i] = std::max(x_data[i], 0.0f);
147 break;
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700148 case OpType::RoundToNearestEven:
149 y_ref[i] = std::nearbyint(double(x_data[i]));
150 break;
151 case OpType::RoundTowardsZero:
152 y_ref[i] = std::trunc(double(x_data[i]));
153 break;
154 case OpType::RoundUp:
155 y_ref[i] = std::ceil(double(x_data[i]));
156 break;
157 case OpType::RoundDown:
158 y_ref[i] = std::floor(double(x_data[i]));
159 break;
160 case OpType::Square:
Marat Dukhan2b9efd82020-06-08 01:09:31 -0700161 y_ref[i] = double(x_data[i]) * double(x_data[i]);
162 break;
Marat Dukhan346a9e52019-11-15 09:06:30 -0800163 case OpType::Sigmoid:
164 {
165 const double e = std::exp(double(x_data[i]));
166 y_ref[i] = e / (1.0 + e);
167 break;
168 }
169 }
170 }
Marat Dukhan346a9e52019-11-15 09:06:30 -0800171
Frank Barchard9f3a8432020-06-02 13:59:35 -0700172 // Prepare parameters.
Marat Dukhan2b9efd82020-06-08 01:09:31 -0700173 union {
Marat Dukhaned6baaf2020-12-01 15:07:08 -0800174 union xnn_f32_elu_params elu;
Marat Dukhan2b9efd82020-06-08 01:09:31 -0700175 union xnn_f32_neg_params neg;
Marat Dukhan60d3f242021-05-13 11:59:02 -0700176 union xnn_f32_relu_params relu;
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700177 union xnn_f32_rnd_params rnd;
Marat Dukhan2b9efd82020-06-08 01:09:31 -0700178 } params;
179 switch (op_type) {
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700180 case OpType::RoundToNearestEven:
181 case OpType::RoundTowardsZero:
182 case OpType::RoundUp:
183 case OpType::RoundDown:
184 switch (variant) {
185 case Variant::Native:
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700186 xnn_init_f32_rnd_params(&params.rnd);
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700187 break;
188 case Variant::Scalar:
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700189 xnn_init_scalar_f32_rnd_params(&params.rnd);
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700190 break;
191 }
192 break;
Frank Barchardfb158e22020-07-15 16:10:10 -0700193 case OpType::ReLU:
Marat Dukhan2b9efd82020-06-08 01:09:31 -0700194 case OpType::Sigmoid:
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700195 case OpType::Square:
Marat Dukhan346a9e52019-11-15 09:06:30 -0800196 break;
197 }
198
199 // Call optimized micro-kernel.
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700200 vunary(batch_size() * sizeof(float), x_data, y.data(), &params);
Marat Dukhan346a9e52019-11-15 09:06:30 -0800201
202 // Verify results.
203 for (size_t i = 0; i < batch_size(); i++) {
Frank Barchard2b9d29b2020-09-17 12:03:39 -0700204 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800205 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
Marat Dukhan346a9e52019-11-15 09:06:30 -0800206 }
207 }
208 }
209
Marat Dukhane5efb162021-12-31 10:26:13 -0800210 void Test(xnn_f32_vabs_ukernel_function vabs, xnn_init_f32_abs_params_fn init_params = nullptr) const {
211 std::random_device random_device;
212 auto rng = std::mt19937(random_device());
213 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::ref(rng));
214
215 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
216 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
217 std::vector<float> y_ref(batch_size());
218 for (size_t iteration = 0; iteration < iterations(); iteration++) {
219 if (inplace()) {
220 std::generate(y.begin(), y.end(), std::ref(f32rng));
221 } else {
222 std::generate(x.begin(), x.end(), std::ref(f32rng));
223 std::fill(y.begin(), y.end(), nanf(""));
224 }
225 const float* x_data = inplace() ? y.data() : x.data();
226
227 // Compute reference results.
228 for (size_t i = 0; i < batch_size(); i++) {
229 y_ref[i] = std::abs(x_data[i]);
230 }
231
232 // Prepare parameters.
233 union xnn_f32_abs_params params;
234 if (init_params != nullptr) {
235 init_params(&params);
236 }
237
238 // Call optimized micro-kernel.
239 vabs(batch_size() * sizeof(float), x_data, y.data(), &params);
240
241 // Verify results.
242 for (size_t i = 0; i < batch_size(); i++) {
243 ASSERT_EQ(y[i], y_ref[i])
244 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
245 }
246 }
247 }
248
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800249 void Test(xnn_f32_vclamp_ukernel_function vclamp, xnn_init_f32_minmax_params_fn init_params) const {
Marat Dukhan94912792021-08-16 21:40:30 -0700250 std::random_device random_device;
251 auto rng = std::mt19937(random_device());
252 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), std::ref(rng));
253
254 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
255 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
256 std::vector<float> y_ref(batch_size());
257 for (size_t iteration = 0; iteration < iterations(); iteration++) {
258 if (inplace()) {
259 std::generate(y.begin(), y.end(), std::ref(f32rng));
260 } else {
261 std::generate(x.begin(), x.end(), std::ref(f32rng));
262 std::fill(y.begin(), y.end(), nanf(""));
263 }
264 const float* x_data = inplace() ? y.data() : x.data();
265
266 // Compute reference results.
267 for (size_t i = 0; i < batch_size(); i++) {
268 y_ref[i] = std::max(std::min(x_data[i], float(qmax())), float(qmin()));
269 }
270
271 // Prepare parameters.
272 union xnn_f32_minmax_params params;
273 init_params(&params, float(qmin()), float(qmax()));
274
275 // Call optimized micro-kernel.
276 vclamp(batch_size() * sizeof(float), x_data, y.data(), &params);
277
278 // Verify results.
279 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhane72b2822021-12-30 14:46:58 -0800280 ASSERT_EQ(y[i], y_ref[i])
Marat Dukhan94912792021-08-16 21:40:30 -0700281 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
282 }
283 }
Marat Dukhan6eaab712021-05-13 15:20:58 -0700284 }
285
Marat Dukhan4a79ff22022-01-01 12:16:48 -0800286 void Test(xnn_f32_velu_ukernel_function velu, xnn_init_f32_elu_params_fn init_params) const {
287 std::random_device random_device;
288 auto rng = std::mt19937(random_device());
289 auto f32rng = std::bind(std::uniform_real_distribution<float>(-20.0f, 20.0f), std::ref(rng));
290
291 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
292 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
293 std::vector<double> y_ref(batch_size());
294 for (size_t iteration = 0; iteration < iterations(); iteration++) {
295 if (inplace()) {
296 std::generate(y.begin(), y.end(), std::ref(f32rng));
297 } else {
298 std::generate(x.begin(), x.end(), std::ref(f32rng));
299 std::fill(y.begin(), y.end(), nanf(""));
300 }
301 const float* x_data = inplace() ? y.data() : x.data();
302
303 // Compute reference results.
304 for (size_t i = 0; i < batch_size(); i++) {
305 y_ref[i] = std::signbit(x_data[i]) ? alpha() * std::expm1(double(x_data[i]) * prescale()) : double(x_data[i]) * beta();
306 }
307
308 // Prepare parameters.
309 union xnn_f32_elu_params params;
310 init_params(&params, prescale(), alpha(), beta());
311
312 // Call optimized micro-kernel.
313 velu(batch_size() * sizeof(float), x_data, y.data(), &params);
314
315 // Verify results.
316 for (size_t i = 0; i < batch_size(); i++) {
317 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
318 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
319 }
320 }
321 }
322
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800323 void Test(xnn_f32_vhswish_ukernel_function vhswish, xnn_init_f32_hswish_params_fn init_params) const {
324 std::random_device random_device;
325 auto rng = std::mt19937(random_device());
326 auto f32rng = std::bind(std::uniform_real_distribution<float>(-4.0f, 4.0f), std::ref(rng));
327
328 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
329 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
330 std::vector<double> y_ref(batch_size());
331 for (size_t iteration = 0; iteration < iterations(); iteration++) {
332 if (inplace()) {
333 std::generate(y.begin(), y.end(), std::ref(f32rng));
334 } else {
335 std::generate(x.begin(), x.end(), std::ref(f32rng));
336 std::fill(y.begin(), y.end(), nanf(""));
337 }
338 const float* x_data = inplace() ? y.data() : x.data();
339
340 // Compute reference results.
341 for (size_t i = 0; i < batch_size(); i++) {
342 y_ref[i] = (x_data[i] / 6.0f) * std::max(std::min(x_data[i] + 3.0f, 6.0f), 0.0f);
343 }
344
345 // Prepare parameters.
346 union xnn_f32_hswish_params params;
347 init_params(&params);
348
349 // Call optimized micro-kernel.
350 vhswish(batch_size() * sizeof(float), x_data, y.data(), &params);
351
352 // Verify results.
353 for (size_t i = 0; i < batch_size(); i++) {
354 ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
355 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
356 }
357 }
358 }
359
Marat Dukhan2894e992021-12-30 08:29:48 -0800360 void Test(xnn_f32_vlrelu_ukernel_function vlrelu, xnn_init_f32_lrelu_params_fn init_params) const {
361 std::random_device random_device;
362 auto rng = std::mt19937(random_device());
363 auto f32rng = std::bind(std::uniform_real_distribution<float>(-125.0f, 125.0f), std::ref(rng));
364
365 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
366 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
367 std::vector<double> y_ref(batch_size());
368 for (size_t iteration = 0; iteration < iterations(); iteration++) {
369 if (inplace()) {
370 std::generate(y.begin(), y.end(), std::ref(f32rng));
371 } else {
372 std::generate(x.begin(), x.end(), std::ref(f32rng));
373 std::fill(y.begin(), y.end(), nanf(""));
374 }
375 const float* x_data = inplace() ? y.data() : x.data();
376
377 // Compute reference results.
378 for (size_t i = 0; i < batch_size(); i++) {
379 y_ref[i] = std::signbit(x_data[i]) ? x_data[i] * slope() : x_data[i];
380 }
381
382 // Prepare parameters.
383 union xnn_f32_lrelu_params params;
384 init_params(&params, slope());
385
386 // Call optimized micro-kernel.
387 vlrelu(batch_size() * sizeof(float), x_data, y.data(), &params);
388
389 // Verify results.
390 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhane72b2822021-12-30 14:46:58 -0800391 ASSERT_EQ(y[i], y_ref[i])
392 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
393 }
394 }
395 }
396
Marat Dukhane5efb162021-12-31 10:26:13 -0800397 void Test(xnn_f32_vneg_ukernel_function vneg, xnn_init_f32_neg_params_fn init_params = nullptr) const {
398 std::random_device random_device;
399 auto rng = std::mt19937(random_device());
400 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::ref(rng));
401
402 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
403 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
404 std::vector<float> y_ref(batch_size());
405 for (size_t iteration = 0; iteration < iterations(); iteration++) {
406 if (inplace()) {
407 std::generate(y.begin(), y.end(), std::ref(f32rng));
408 } else {
409 std::generate(x.begin(), x.end(), std::ref(f32rng));
410 std::fill(y.begin(), y.end(), nanf(""));
411 }
412 const float* x_data = inplace() ? y.data() : x.data();
413
414 // Compute reference results.
415 for (size_t i = 0; i < batch_size(); i++) {
416 y_ref[i] = -x_data[i];
417 }
418
419 // Prepare parameters.
420 union xnn_f32_neg_params params;
421 if (init_params != nullptr) {
422 init_params(&params);
423 }
424
425 // Call optimized micro-kernel.
426 vneg(batch_size() * sizeof(float), x_data, y.data(), &params);
427
428 // Verify results.
429 for (size_t i = 0; i < batch_size(); i++) {
430 ASSERT_EQ(y[i], y_ref[i])
431 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
432 }
433 }
434 }
435
436 void Test(xnn_f32_vsqr_ukernel_function vsqr, xnn_init_f32_default_params_fn init_params = nullptr) const {
437 std::random_device random_device;
438 auto rng = std::mt19937(random_device());
439 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
440
441 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
442 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
443 std::vector<float> y_ref(batch_size());
444 for (size_t iteration = 0; iteration < iterations(); iteration++) {
445 if (inplace()) {
446 std::generate(y.begin(), y.end(), std::ref(f32rng));
447 } else {
448 std::generate(x.begin(), x.end(), std::ref(f32rng));
449 std::fill(y.begin(), y.end(), nanf(""));
450 }
451 const float* x_data = inplace() ? y.data() : x.data();
452
453 // Compute reference results.
454 for (size_t i = 0; i < batch_size(); i++) {
455 y_ref[i] = x_data[i] * x_data[i];
456 }
457
458 // Prepare parameters.
459 union xnn_f32_default_params params;
460 if (init_params != nullptr) {
461 init_params(&params);
462 }
463
464 // Call optimized micro-kernel.
465 vsqr(batch_size() * sizeof(float), x_data, y.data(), &params);
466
467 // Verify results.
468 for (size_t i = 0; i < batch_size(); i++) {
469 ASSERT_EQ(y[i], y_ref[i])
470 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
471 }
472 }
473 }
474
Marat Dukhane72b2822021-12-30 14:46:58 -0800475 void Test(xnn_f32_vsqrt_ukernel_function vsqrt, xnn_init_f32_sqrt_params_fn init_params = nullptr) const {
476 std::random_device random_device;
477 auto rng = std::mt19937(random_device());
478 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 10.0f), std::ref(rng));
479
480 std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
481 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
482 std::vector<float> y_ref(batch_size());
483 for (size_t iteration = 0; iteration < iterations(); iteration++) {
484 if (inplace()) {
485 std::generate(y.begin(), y.end(), std::ref(f32rng));
486 } else {
487 std::generate(x.begin(), x.end(), std::ref(f32rng));
488 std::fill(y.begin(), y.end(), nanf(""));
489 }
490 const float* x_data = inplace() ? y.data() : x.data();
491
492 // Compute reference results.
493 for (size_t i = 0; i < batch_size(); i++) {
494 y_ref[i] = std::sqrt(x_data[i]);
495 }
496
497 // Prepare parameters.
498 union xnn_f32_sqrt_params params;
499 if (init_params != nullptr) {
500 init_params(&params);
501 }
502
503 // Call optimized micro-kernel.
504 vsqrt(batch_size() * sizeof(float), x_data, y.data(), init_params != nullptr ? &params : nullptr);
505
506 // Verify results.
507 for (size_t i = 0; i < batch_size(); i++) {
508 ASSERT_EQ(y[i], y_ref[i])
Marat Dukhan2894e992021-12-30 08:29:48 -0800509 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
510 }
511 }
512 }
513
Marat Dukhan94912792021-08-16 21:40:30 -0700514 inline void Test(xnn_f32_vabs_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
Marat Dukhan6eaab712021-05-13 15:20:58 -0700515 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
516 }
517
518 inline void Test(xnn_f32_velu_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
519 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
520 }
521
Marat Dukhan6eaab712021-05-13 15:20:58 -0700522 inline void Test(xnn_f32_vneg_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
523 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
524 }
525
526 inline void Test(xnn_f32_vrelu_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
527 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
528 }
529
530 inline void Test(xnn_f32_vround_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
531 Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
532 }
533
Marat Dukhana6c05162021-05-13 16:52:02 -0700534 void Test(xnn_f16_vunary_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
535 std::random_device random_device;
536 auto rng = std::mt19937(random_device());
537 auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
Marat Dukhana6c05162021-05-13 16:52:02 -0700538 auto f32rng = std::bind(distribution, std::ref(rng));
539 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
540
541 std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
542 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
543 std::vector<float> y_ref(batch_size());
544 for (size_t iteration = 0; iteration < iterations(); iteration++) {
545 std::generate(x.begin(), x.end(), std::ref(f16rng));
546 if (inplace()) {
547 std::generate(y.begin(), y.end(), std::ref(f16rng));
548 } else {
549 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
550 }
551 const uint16_t* x_data = inplace() ? y.data() : x.data();
552
553 // Compute reference results.
554 for (size_t i = 0; i < batch_size(); i++) {
555 switch (op_type) {
Marat Dukhana6c05162021-05-13 16:52:02 -0700556 case OpType::ReLU:
557 y_ref[i] = std::max(fp16_ieee_to_fp32_value(x_data[i]), 0.0f);
558 break;
559 default:
560 GTEST_FAIL() << "Unexpected op type";
561 }
562 }
563
564 // Prepare parameters.
565 union {
Marat Dukhana6c05162021-05-13 16:52:02 -0700566 struct xnn_f16_minmax_params minmax;
567 } params;
568 switch (op_type) {
Marat Dukhana6c05162021-05-13 16:52:02 -0700569 case OpType::ReLU:
570 break;
571 default:
572 GTEST_FAIL() << "Unexpected op type";
573 }
574
575 // Call optimized micro-kernel.
576 vunary(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
577
578 // Verify results.
579 for (size_t i = 0; i < batch_size(); i++) {
580 ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
581 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
582 }
583 }
584 }
585
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800586 void Test(xnn_f16_vclamp_ukernel_function vclamp, xnn_init_f16_minmax_params_fn init_params) const {
587 std::random_device random_device;
588 auto rng = std::mt19937(random_device());
589 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), std::ref(rng));
590 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
591
592 std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
593 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
594 std::vector<float> y_ref(batch_size());
595 for (size_t iteration = 0; iteration < iterations(); iteration++) {
596 std::generate(x.begin(), x.end(), std::ref(f16rng));
597 if (inplace()) {
598 std::generate(y.begin(), y.end(), std::ref(f16rng));
599 } else {
600 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
601 }
602 const uint16_t* x_data = inplace() ? y.data() : x.data();
603
604 // Compute reference results.
605 for (size_t i = 0; i < batch_size(); i++) {
606 y_ref[i] = std::max(std::min(fp16_ieee_to_fp32_value(x_data[i]), float(qmax())), float(qmin()));
607 }
608
609 // Prepare parameters.
610 struct xnn_f16_minmax_params params;
611 init_params(&params, fp16_ieee_from_fp32_value(float(qmin())), fp16_ieee_from_fp32_value(float(qmax())));
612
613 // Call optimized micro-kernel.
614 vclamp(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
615
616 // Verify results.
617 for (size_t i = 0; i < batch_size(); i++) {
618 ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
619 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
620 }
621 }
Marat Dukhana6c05162021-05-13 16:52:02 -0700622 }
623
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800624 void Test(xnn_f16_vhswish_ukernel_function vhswish, xnn_init_f16_hswish_params_fn init_params) const {
625 std::random_device random_device;
626 auto rng = std::mt19937(random_device());
627 auto f32rng = std::bind(std::uniform_real_distribution<float>(-4.0f, 4.0f), std::ref(rng));
628 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
629
630 std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
631 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
632 std::vector<float> y_ref(batch_size());
633 for (size_t iteration = 0; iteration < iterations(); iteration++) {
634 std::generate(x.begin(), x.end(), std::ref(f16rng));
635 if (inplace()) {
636 std::generate(y.begin(), y.end(), std::ref(f16rng));
637 } else {
638 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
639 }
640 const uint16_t* x_data = inplace() ? y.data() : x.data();
641
642 // Compute reference results.
643 for (size_t i = 0; i < batch_size(); i++) {
644 const float x_value = fp16_ieee_to_fp32_value(x_data[i]);
645 y_ref[i] = (x_value / 6.0f) * std::max(std::min(x_value + 3.0f, 6.0f), 0.0f);
646 }
647
648 // Prepare parameters.
649 struct xnn_f16_hswish_params params;
650 init_params(&params);
651
652 // Call optimized micro-kernel.
653 vhswish(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
654
655 // Verify results.
656 for (size_t i = 0; i < batch_size(); i++) {
657 ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
658 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
659 }
660 }
Marat Dukhana6c05162021-05-13 16:52:02 -0700661 }
662
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800663 void Test(xnn_s8_vclamp_ukernel_function vclamp, xnn_init_s8_minmax_params_fn init_params) const {
Marat Dukhane79acb72021-08-16 19:03:53 -0700664 std::random_device random_device;
665 auto rng = std::mt19937(random_device());
666 auto i8rng = std::bind(
667 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
668 std::ref(rng));
669
670 std::vector<int8_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t));
671 std::vector<int8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0));
672 std::vector<int8_t> y_ref(batch_size());
673 for (size_t iteration = 0; iteration < iterations(); iteration++) {
674 std::generate(x.begin(), x.end(), std::ref(i8rng));
675 if (inplace()) {
676 std::copy(x.cbegin(), x.cend(), y.begin());
677 } else {
678 std::fill(y.begin(), y.end(), INT8_C(0xA5));
679 }
680 const int8_t* x_data = inplace() ? y.data() : x.data();
681
682 // Compute reference results.
683 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800684 y_ref[i] = std::min(std::max(x_data[i], int8_t(qmin() - 0x80)), int8_t(qmax() - 0x80));
Marat Dukhane79acb72021-08-16 19:03:53 -0700685 }
686
687 // Prepare parameters.
688 union xnn_s8_minmax_params params;
689 init_params(&params, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
690
691 // Call optimized micro-kernel.
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800692 vclamp(batch_size() * sizeof(int8_t), x_data, y.data(), &params);
Marat Dukhane79acb72021-08-16 19:03:53 -0700693
694 // Verify results.
695 for (size_t i = 0; i < batch_size(); i++) {
696 ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i]))
697 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << int32_t(x[i]);
698 }
699 }
700 }
701
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800702 void Test(xnn_u8_vclamp_ukernel_function vclamp, xnn_init_u8_minmax_params_fn init_params) const {
Marat Dukhana6c05162021-05-13 16:52:02 -0700703 std::random_device random_device;
704 auto rng = std::mt19937(random_device());
Marat Dukhan1f5b1082021-08-16 17:01:44 -0700705 auto u8rng = std::bind(
706 std::uniform_int_distribution<int32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
Marat Dukhana6c05162021-05-13 16:52:02 -0700707
708 std::vector<uint8_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
709 std::vector<uint8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0));
710 std::vector<uint8_t> y_ref(batch_size());
711 for (size_t iteration = 0; iteration < iterations(); iteration++) {
712 std::generate(x.begin(), x.end(), std::ref(u8rng));
713 if (inplace()) {
714 std::copy(x.cbegin(), x.cend(), y.begin());
715 } else {
716 std::fill(y.begin(), y.end(), UINT8_C(0xA5));
717 }
718 const uint8_t* x_data = inplace() ? y.data() : x.data();
719
720 // Compute reference results.
721 for (size_t i = 0; i < batch_size(); i++) {
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800722 y_ref[i] = std::min(std::max(x_data[i], qmin()), qmax());
Marat Dukhana6c05162021-05-13 16:52:02 -0700723 }
724
725 // Prepare parameters.
Marat Dukhan1f5b1082021-08-16 17:01:44 -0700726 union xnn_u8_minmax_params params;
727 init_params(&params, qmin(), qmax());
Marat Dukhana6c05162021-05-13 16:52:02 -0700728
729 // Call optimized micro-kernel.
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800730 vclamp(batch_size() * sizeof(uint8_t), x_data, y.data(), &params);
Marat Dukhana6c05162021-05-13 16:52:02 -0700731
732 // Verify results.
733 for (size_t i = 0; i < batch_size(); i++) {
734 ASSERT_EQ(uint32_t(y_ref[i]), uint32_t(y[i]))
735 << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << uint32_t(x[i]);
736 }
737 }
738 }
739
Marat Dukhan346a9e52019-11-15 09:06:30 -0800740 private:
Marat Dukhaned6baaf2020-12-01 15:07:08 -0800741 size_t batch_size_ = 1;
742 bool inplace_ = false;
743 float slope_ = 0.5f;
744 float prescale_ = 1.0f;
745 float alpha_ = 1.0f;
746 float beta_ = 1.0f;
747 uint8_t qmin_ = 0;
748 uint8_t qmax_ = 255;
749 size_t iterations_ = 15;
Marat Dukhan346a9e52019-11-15 09:06:30 -0800750};