blob: 5610aa4972dbac6ac534e0e07774fb7ef2144779 [file] [log] [blame]
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#pragma once
7
8#include <gtest/gtest.h>
9
10#include <algorithm>
11#include <cassert>
12#include <cstddef>
13#include <cstdlib>
14#include <functional>
15#include <random>
16#include <vector>
17
Frank Barchard9c1a7352020-06-04 20:15:01 -070018#include <fp16.h>
19
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080020#include <xnnpack.h>
21#include <xnnpack/params-init.h>
22#include <xnnpack/params.h>
23
24
Marat Dukhan10f1fe02021-05-13 12:51:12 -070025class VBinaryCMicrokernelTester {
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080026 public:
27 enum class OpType {
28 AddC,
Marat Dukhan77ca6302019-12-06 12:48:15 -080029 DivC,
30 RDivC,
Marat Dukhan403b7d42019-12-05 12:49:11 -080031 MaxC,
32 MinC,
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080033 MulC,
Marat Dukhan13bafb02020-06-05 00:43:11 -070034 SqrDiffC,
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080035 SubC,
36 RSubC,
37 };
38
39 enum class Variant {
40 Native,
41 Scalar,
42 };
43
Marat Dukhan10f1fe02021-05-13 12:51:12 -070044 inline VBinaryCMicrokernelTester& batch_size(size_t batch_size) {
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080045 assert(batch_size != 0);
46 this->batch_size_ = batch_size;
47 return *this;
48 }
49
50 inline size_t batch_size() const {
51 return this->batch_size_;
52 }
53
Marat Dukhan10f1fe02021-05-13 12:51:12 -070054 inline VBinaryCMicrokernelTester& inplace(bool inplace) {
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080055 this->inplace_ = inplace;
56 return *this;
57 }
58
59 inline bool inplace() const {
60 return this->inplace_;
61 }
62
Marat Dukhan10f1fe02021-05-13 12:51:12 -070063 inline VBinaryCMicrokernelTester& qmin(uint8_t qmin) {
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080064 this->qmin_ = qmin;
65 return *this;
66 }
67
68 inline uint8_t qmin() const {
69 return this->qmin_;
70 }
71
Marat Dukhan10f1fe02021-05-13 12:51:12 -070072 inline VBinaryCMicrokernelTester& qmax(uint8_t qmax) {
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080073 this->qmax_ = qmax;
74 return *this;
75 }
76
77 inline uint8_t qmax() const {
78 return this->qmax_;
79 }
80
Marat Dukhan10f1fe02021-05-13 12:51:12 -070081 inline VBinaryCMicrokernelTester& iterations(size_t iterations) {
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080082 this->iterations_ = iterations;
83 return *this;
84 }
85
86 inline size_t iterations() const {
87 return this->iterations_;
88 }
89
Frank Barchardbf31e3f2020-05-12 14:00:07 -070090 void Test(xnn_f16_vbinary_ukernel_function vbinaryc, OpType op_type) const {
Frank Barchardd793f6c2020-05-08 13:37:43 -070091 std::random_device random_device;
92 auto rng = std::mt19937(random_device());
Frank Barchard967712d2021-03-22 12:01:44 -070093 auto f32rng = std::bind(std::uniform_real_distribution<float>(1.0e-3f, 1.0f), rng);
Frank Barchardd793f6c2020-05-08 13:37:43 -070094 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
95
96 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
97 const uint16_t b = f16rng();
98 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
99 std::vector<float> y_ref(batch_size());
100 for (size_t iteration = 0; iteration < iterations(); iteration++) {
101 std::generate(a.begin(), a.end(), std::ref(f16rng));
102 if (inplace()) {
103 std::generate(y.begin(), y.end(), std::ref(f16rng));
104 } else {
105 std::fill(y.begin(), y.end(), nanf(""));
106 }
107 const uint16_t* a_data = inplace() ? y.data() : a.data();
108
109 // Compute reference results.
110 for (size_t i = 0; i < batch_size(); i++) {
111 switch (op_type) {
112 case OpType::AddC:
113 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b);
114 break;
115 case OpType::DivC:
116 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b);
117 break;
118 case OpType::RDivC:
119 y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]);
120 break;
121 case OpType::MaxC:
122 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
123 break;
124 case OpType::MinC:
125 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
126 break;
127 case OpType::MulC:
128 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b);
129 break;
Marat Dukhan13bafb02020-06-05 00:43:11 -0700130 case OpType::SqrDiffC:
131 {
132 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
133 y_ref[i] = diff * diff;
134 break;
135 }
Frank Barchardd793f6c2020-05-08 13:37:43 -0700136 case OpType::SubC:
137 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
138 break;
139 case OpType::RSubC:
140 y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]);
141 break;
142 }
143 }
144 // Call optimized micro-kernel.
145 vbinaryc(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), nullptr);
146
147 // Verify results.
148 for (size_t i = 0; i < batch_size(); i++) {
Frank Barchard2b9d29b2020-09-17 12:03:39 -0700149 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
Frank Barchardd793f6c2020-05-08 13:37:43 -0700150 << "at " << i << " / " << batch_size();
151 }
152 }
153 }
154
Frank Barchardbf31e3f2020-05-12 14:00:07 -0700155 void Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type) const {
Frank Barchardd793f6c2020-05-08 13:37:43 -0700156 std::random_device random_device;
157 auto rng = std::mt19937(random_device());
Frank Barchard967712d2021-03-22 12:01:44 -0700158 auto f32rng = std::bind(std::uniform_real_distribution<float>(1.0e-3f, 1.0f), rng);
Frank Barchardd793f6c2020-05-08 13:37:43 -0700159 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
160
161 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
162 const uint16_t b = f16rng();
163 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
164 std::vector<float> y_ref(batch_size());
165 for (size_t iteration = 0; iteration < iterations(); iteration++) {
166 std::generate(a.begin(), a.end(), std::ref(f16rng));
167 if (inplace()) {
168 std::generate(y.begin(), y.end(), std::ref(f16rng));
169 } else {
170 std::fill(y.begin(), y.end(), nanf(""));
171 }
172 const uint16_t* a_data = inplace() ? y.data() : a.data();
173
174 // Compute reference results.
175 for (size_t i = 0; i < batch_size(); i++) {
176 switch (op_type) {
177 case OpType::AddC:
178 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b);
179 break;
180 case OpType::DivC:
181 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b);
182 break;
183 case OpType::RDivC:
184 y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]);
185 break;
186 case OpType::MaxC:
187 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
188 break;
189 case OpType::MinC:
190 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
191 break;
192 case OpType::MulC:
193 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b);
194 break;
Marat Dukhan13bafb02020-06-05 00:43:11 -0700195 case OpType::SqrDiffC:
196 {
197 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
198 y_ref[i] = diff * diff;
199 break;
200 }
Frank Barchardd793f6c2020-05-08 13:37:43 -0700201 case OpType::SubC:
202 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
203 break;
204 case OpType::RSubC:
205 y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]);
206 break;
207 }
208 }
209 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
210 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
211 const float accumulated_range = accumulated_max - accumulated_min;
212 const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
213 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
214 +std::numeric_limits<float>::infinity()));
215 const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
216 (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
217 -std::numeric_limits<float>::infinity()));
218 for (size_t i = 0; i < batch_size(); i++) {
219 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
220 }
221
Frank Barchard9f3a8432020-06-02 13:59:35 -0700222 // Prepare parameters.
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700223 xnn_f16_minmax_params params;
224 xnn_init_f16_minmax_params(
225 &params,
226 fp16_ieee_from_fp32_value(y_min),
227 fp16_ieee_from_fp32_value(y_max));
Frank Barchardd793f6c2020-05-08 13:37:43 -0700228
229 // Call optimized micro-kernel.
230 vbinaryc_minmax(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), &params);
231
232 // Verify results.
233 for (size_t i = 0; i < batch_size(); i++) {
Frank Barchard2b9d29b2020-09-17 12:03:39 -0700234 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
Frank Barchardd793f6c2020-05-08 13:37:43 -0700235 << "at " << i << " / " << batch_size();
236 }
237 }
238 }
239
Marat Dukhan1e782c42019-11-21 17:02:40 -0800240 void Test(xnn_f32_vbinary_ukernel_function vbinaryc, OpType op_type, Variant variant = Variant::Native) const {
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800241 std::random_device random_device;
242 auto rng = std::mt19937(random_device());
243 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
244
245 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
246 const float b = f32rng();
247 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
248 std::vector<float> y_ref(batch_size());
249 for (size_t iteration = 0; iteration < iterations(); iteration++) {
250 std::generate(a.begin(), a.end(), std::ref(f32rng));
251 if (inplace()) {
252 std::generate(y.begin(), y.end(), std::ref(f32rng));
253 } else {
254 std::fill(y.begin(), y.end(), nanf(""));
255 }
256 const float* a_data = inplace() ? y.data() : a.data();
257
258 // Compute reference results.
259 for (size_t i = 0; i < batch_size(); i++) {
260 switch (op_type) {
261 case OpType::AddC:
262 y_ref[i] = a_data[i] + b;
263 break;
Marat Dukhan77ca6302019-12-06 12:48:15 -0800264 case OpType::DivC:
265 y_ref[i] = a_data[i] / b;
266 break;
267 case OpType::RDivC:
268 y_ref[i] = b / a_data[i];
269 break;
Marat Dukhan403b7d42019-12-05 12:49:11 -0800270 case OpType::MaxC:
271 y_ref[i] = std::max<float>(a_data[i], b);
272 break;
273 case OpType::MinC:
274 y_ref[i] = std::min<float>(a_data[i], b);
275 break;
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800276 case OpType::MulC:
277 y_ref[i] = a_data[i] * b;
278 break;
Marat Dukhan13bafb02020-06-05 00:43:11 -0700279 case OpType::SqrDiffC:
280 {
281 const float diff = a_data[i] - b;
282 y_ref[i] = diff * diff;
283 break;
284 }
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800285 case OpType::SubC:
286 y_ref[i] = a_data[i] - b;
287 break;
288 case OpType::RSubC:
289 y_ref[i] = b - a_data[i];
290 break;
291 }
292 }
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700293 // Call optimized micro-kernel.
294 vbinaryc(batch_size() * sizeof(float), a_data, &b, y.data(), nullptr);
295
296 // Verify results.
297 for (size_t i = 0; i < batch_size(); i++) {
298 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
299 << "at " << i << " / " << batch_size();
300 }
301 }
302 }
303
304 void Test(xnn_f32_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type, Variant variant = Variant::Native) const {
305 std::random_device random_device;
306 auto rng = std::mt19937(random_device());
307 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
308
309 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
310 const float b = f32rng();
311 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
312 std::vector<float> y_ref(batch_size());
313 for (size_t iteration = 0; iteration < iterations(); iteration++) {
314 std::generate(a.begin(), a.end(), std::ref(f32rng));
315 if (inplace()) {
316 std::generate(y.begin(), y.end(), std::ref(f32rng));
317 } else {
318 std::fill(y.begin(), y.end(), nanf(""));
319 }
320 const float* a_data = inplace() ? y.data() : a.data();
321
322 // Compute reference results.
323 for (size_t i = 0; i < batch_size(); i++) {
324 switch (op_type) {
325 case OpType::AddC:
326 y_ref[i] = a_data[i] + b;
327 break;
328 case OpType::DivC:
329 y_ref[i] = a_data[i] / b;
330 break;
331 case OpType::RDivC:
332 y_ref[i] = b / a_data[i];
333 break;
334 case OpType::MaxC:
335 y_ref[i] = std::max<float>(a_data[i], b);
336 break;
337 case OpType::MinC:
338 y_ref[i] = std::min<float>(a_data[i], b);
339 break;
340 case OpType::MulC:
341 y_ref[i] = a_data[i] * b;
342 break;
Marat Dukhan13bafb02020-06-05 00:43:11 -0700343 case OpType::SqrDiffC:
344 {
345 const float diff = a_data[i] - b;
346 y_ref[i] = diff * diff;
347 break;
348 }
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700349 case OpType::SubC:
350 y_ref[i] = a_data[i] - b;
351 break;
352 case OpType::RSubC:
353 y_ref[i] = b - a_data[i];
354 break;
355 }
356 }
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800357 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
358 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
359 const float accumulated_range = accumulated_max - accumulated_min;
360 const float y_max = accumulated_range > 0.0f ?
361 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
362 +std::numeric_limits<float>::infinity();
363 const float y_min = accumulated_range > 0.0f ?
364 (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
365 -std::numeric_limits<float>::infinity();
366 for (size_t i = 0; i < batch_size(); i++) {
367 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
368 }
369
Frank Barchard9f3a8432020-06-02 13:59:35 -0700370 // Prepare parameters.
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700371 xnn_f32_minmax_params params;
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800372 switch (variant) {
373 case Variant::Native:
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700374 xnn_init_f32_minmax_params(&params, y_min, y_max);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800375 break;
376 case Variant::Scalar:
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700377 xnn_init_f32_minmax_scalar_params(&params, y_min, y_max);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800378 break;
379 }
380
381 // Call optimized micro-kernel.
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700382 vbinaryc_minmax(batch_size() * sizeof(float), a_data, &b, y.data(), &params);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800383
384 // Verify results.
385 for (size_t i = 0; i < batch_size(); i++) {
386 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
387 << "at " << i << " / " << batch_size();
388 }
389 }
390 }
391
Frank Barchard674778d2020-08-08 10:17:25 -0700392 void Test(xnn_f32_vbinary_relu_ukernel_function vbinaryc_relu, OpType op_type, Variant variant = Variant::Native) const {
393 std::random_device random_device;
394 auto rng = std::mt19937(random_device());
395 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng);
396
397 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
398 const float b = f32rng();
399 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
400 std::vector<float> y_ref(batch_size());
401 for (size_t iteration = 0; iteration < iterations(); iteration++) {
402 std::generate(a.begin(), a.end(), std::ref(f32rng));
403 if (inplace()) {
404 std::generate(y.begin(), y.end(), std::ref(f32rng));
405 } else {
406 std::fill(y.begin(), y.end(), nanf(""));
407 }
408 const float* a_data = inplace() ? y.data() : a.data();
409
410 // Compute reference results.
411 for (size_t i = 0; i < batch_size(); i++) {
412 switch (op_type) {
413 case OpType::AddC:
414 y_ref[i] = a_data[i] + b;
415 break;
416 case OpType::DivC:
417 y_ref[i] = a_data[i] / b;
418 break;
419 case OpType::RDivC:
420 y_ref[i] = b / a_data[i];
421 break;
422 case OpType::MaxC:
423 y_ref[i] = std::max<float>(a_data[i], b);
424 break;
425 case OpType::MinC:
426 y_ref[i] = std::min<float>(a_data[i], b);
427 break;
428 case OpType::MulC:
429 y_ref[i] = a_data[i] * b;
430 break;
431 case OpType::SqrDiffC:
432 {
433 const float diff = a_data[i] - b;
434 y_ref[i] = diff * diff;
435 break;
436 }
437 case OpType::SubC:
438 y_ref[i] = a_data[i] - b;
439 break;
440 case OpType::RSubC:
441 y_ref[i] = b - a_data[i];
442 break;
443 }
444 }
445 for (size_t i = 0; i < batch_size(); i++) {
446 y_ref[i] = std::max(y_ref[i], 0.0f);
447 }
448
449 // Prepare parameters.
450 xnn_f32_relu_params params = { };
451
452 // Call optimized micro-kernel.
453 vbinaryc_relu(batch_size() * sizeof(float), a_data, &b, y.data(), &params);
454
455 // Verify results.
456 for (size_t i = 0; i < batch_size(); i++) {
457 ASSERT_GE(y[i], 0.0f)
458 << "at " << i << " / " << batch_size();
459 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
460 << "at " << i << " / " << batch_size();
461 }
462 }
463 }
464
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800465 private:
466 size_t batch_size_{1};
467 bool inplace_{false};
468 uint8_t qmin_{0};
469 uint8_t qmax_{255};
470 size_t iterations_{15};
471};