blob: 132ee519b4145ce0f210b7449bc57bef865b44ba [file] [log] [blame]
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#pragma once
7
8#include <gtest/gtest.h>
9
10#include <algorithm>
11#include <cassert>
12#include <cstddef>
13#include <cstdlib>
14#include <functional>
15#include <random>
16#include <vector>
17
18#include <xnnpack.h>
19#include <xnnpack/params-init.h>
20#include <xnnpack/params.h>
21
22
23class VBinOpMicrokernelTester {
24 public:
25 enum class OpType {
26 Add,
Marat Dukhan77ca6302019-12-06 12:48:15 -080027 Div,
Marat Dukhan403b7d42019-12-05 12:49:11 -080028 Max,
29 Min,
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080030 Mul,
31 Sub,
32 };
33
34 enum class Variant {
35 Native,
36 Scalar,
37 };
38
39 inline VBinOpMicrokernelTester& batch_size(size_t batch_size) {
40 assert(batch_size != 0);
41 this->batch_size_ = batch_size;
42 return *this;
43 }
44
45 inline size_t batch_size() const {
46 return this->batch_size_;
47 }
48
49 inline VBinOpMicrokernelTester& inplace_a(bool inplace_a) {
50 this->inplace_a_ = inplace_a;
51 return *this;
52 }
53
54 inline bool inplace_a() const {
55 return this->inplace_a_;
56 }
57
58 inline VBinOpMicrokernelTester& inplace_b(bool inplace_b) {
59 this->inplace_b_ = inplace_b;
60 return *this;
61 }
62
63 inline bool inplace_b() const {
64 return this->inplace_b_;
65 }
66
67 inline VBinOpMicrokernelTester& qmin(uint8_t qmin) {
68 this->qmin_ = qmin;
69 return *this;
70 }
71
72 inline uint8_t qmin() const {
73 return this->qmin_;
74 }
75
76 inline VBinOpMicrokernelTester& qmax(uint8_t qmax) {
77 this->qmax_ = qmax;
78 return *this;
79 }
80
81 inline uint8_t qmax() const {
82 return this->qmax_;
83 }
84
85 inline VBinOpMicrokernelTester& iterations(size_t iterations) {
86 this->iterations_ = iterations;
87 return *this;
88 }
89
90 inline size_t iterations() const {
91 return this->iterations_;
92 }
93
Frank Barchardbf31e3f2020-05-12 14:00:07 -070094 void Test(xnn_f16_vbinary_ukernel_function vbinary, OpType op_type) const {
Frank Barchardd793f6c2020-05-08 13:37:43 -070095 std::random_device random_device;
96 auto rng = std::mt19937(random_device());
97 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
98 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
99
100 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
101 std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
102 std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
103 std::vector<float> y_ref(batch_size());
104 for (size_t iteration = 0; iteration < iterations(); iteration++) {
105 std::generate(a.begin(), a.end(), std::ref(f16rng));
106 std::generate(b.begin(), b.end(), std::ref(f16rng));
107 if (inplace_a() || inplace_b()) {
108 std::generate(y.begin(), y.end(), std::ref(f16rng));
109 } else {
110 std::fill(y.begin(), y.end(), nanf(""));
111 }
112 const uint16_t* a_data = inplace_a() ? y.data() : a.data();
113 const uint16_t* b_data = inplace_b() ? y.data() : b.data();
114
115 // Compute reference results.
116 for (size_t i = 0; i < batch_size(); i++) {
117 switch (op_type) {
118 case OpType::Add:
119 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]);
120 break;
121 case OpType::Div:
122 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]);
123 break;
124 case OpType::Max:
125 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
126 break;
127 case OpType::Min:
128 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
129 break;
130 case OpType::Mul:
131 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]);
132 break;
133 case OpType::Sub:
134 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
135 break;
136 }
137 }
138
139 // Call optimized micro-kernel.
140 vbinary(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), nullptr);
141
142 // Verify results.
143 for (size_t i = 0; i < batch_size(); i++) {
144 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::abs(y_ref[i]) * 1.0e-2f)
145 << "at " << i << " / " << batch_size();
146 }
147 }
148 }
149
Frank Barchardbf31e3f2020-05-12 14:00:07 -0700150 void Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type) const {
Frank Barchardd793f6c2020-05-08 13:37:43 -0700151 std::random_device random_device;
152 auto rng = std::mt19937(random_device());
153 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
154 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
155
156 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
157 std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
158 std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
159 std::vector<float> y_ref(batch_size());
160 for (size_t iteration = 0; iteration < iterations(); iteration++) {
161 std::generate(a.begin(), a.end(), std::ref(f16rng));
162 std::generate(b.begin(), b.end(), std::ref(f16rng));
163 if (inplace_a() || inplace_b()) {
164 std::generate(y.begin(), y.end(), std::ref(f16rng));
165 } else {
166 std::fill(y.begin(), y.end(), nanf(""));
167 }
168 const uint16_t* a_data = inplace_a() ? y.data() : a.data();
169 const uint16_t* b_data = inplace_b() ? y.data() : b.data();
170
171 // Compute reference results.
172 for (size_t i = 0; i < batch_size(); i++) {
173 switch (op_type) {
174 case OpType::Add:
175 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]);
176 break;
177 case OpType::Div:
178 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]);
179 break;
180 case OpType::Max:
181 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
182 break;
183 case OpType::Min:
184 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
185 break;
186 case OpType::Mul:
187 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]);
188 break;
189 case OpType::Sub:
190 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
191 break;
192 }
193 }
194
195 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
196 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
197 const float accumulated_range = accumulated_max - accumulated_min;
198 const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
199 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
200 +std::numeric_limits<float>::infinity()));
201 const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
202 (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
203 -std::numeric_limits<float>::infinity()));
204 for (size_t i = 0; i < batch_size(); i++) {
205 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
206 }
207
208 // Prepare output parameters.
Frank Barchardbf31e3f2020-05-12 14:00:07 -0700209 xnn_f16_minmax_params params = xnn_init_f16_minmax_params(
Frank Barchardd793f6c2020-05-08 13:37:43 -0700210 fp16_ieee_from_fp32_value(y_min),
211 fp16_ieee_from_fp32_value(y_max));
212
213 // Call optimized micro-kernel.
214 vbinary_minmax(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), &params);
215
216 // Verify results.
217 for (size_t i = 0; i < batch_size(); i++) {
218 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::abs(y_ref[i]) * 1.0e-2f)
219 << "at " << i << " / " << batch_size();
220 }
221 }
222 }
223
Marat Dukhan1e782c42019-11-21 17:02:40 -0800224 void Test(xnn_f32_vbinary_ukernel_function vbinary, OpType op_type, Variant variant = Variant::Native) const {
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800225 std::random_device random_device;
226 auto rng = std::mt19937(random_device());
Marat Dukhan77ca6302019-12-06 12:48:15 -0800227 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800228
229 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
230 std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
231 std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
232 std::vector<float> y_ref(batch_size());
233 for (size_t iteration = 0; iteration < iterations(); iteration++) {
234 std::generate(a.begin(), a.end(), std::ref(f32rng));
235 std::generate(b.begin(), b.end(), std::ref(f32rng));
236 if (inplace_a() || inplace_b()) {
237 std::generate(y.begin(), y.end(), std::ref(f32rng));
238 } else {
239 std::fill(y.begin(), y.end(), nanf(""));
240 }
241 const float* a_data = inplace_a() ? y.data() : a.data();
242 const float* b_data = inplace_b() ? y.data() : b.data();
243
244 // Compute reference results.
245 for (size_t i = 0; i < batch_size(); i++) {
246 switch (op_type) {
247 case OpType::Add:
248 y_ref[i] = a_data[i] + b_data[i];
249 break;
Marat Dukhan77ca6302019-12-06 12:48:15 -0800250 case OpType::Div:
251 y_ref[i] = a_data[i] / b_data[i];
252 break;
Marat Dukhan403b7d42019-12-05 12:49:11 -0800253 case OpType::Max:
254 y_ref[i] = std::max<float>(a_data[i], b_data[i]);
255 break;
256 case OpType::Min:
257 y_ref[i] = std::min<float>(a_data[i], b_data[i]);
258 break;
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800259 case OpType::Mul:
260 y_ref[i] = a_data[i] * b_data[i];
261 break;
262 case OpType::Sub:
263 y_ref[i] = a_data[i] - b_data[i];
264 break;
265 }
266 }
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700267
268 // Call optimized micro-kernel.
269 vbinary(batch_size() * sizeof(float), a_data, b_data, y.data(), nullptr);
270
271 // Verify results.
272 for (size_t i = 0; i < batch_size(); i++) {
273 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
274 << "at " << i << " / " << batch_size();
275 }
276 }
277 }
278
279 void Test(xnn_f32_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type, Variant variant = Variant::Native) const {
280 std::random_device random_device;
281 auto rng = std::mt19937(random_device());
282 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
283
284 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
285 std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
286 std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
287 std::vector<float> y_ref(batch_size());
288 for (size_t iteration = 0; iteration < iterations(); iteration++) {
289 std::generate(a.begin(), a.end(), std::ref(f32rng));
290 std::generate(b.begin(), b.end(), std::ref(f32rng));
291 if (inplace_a() || inplace_b()) {
292 std::generate(y.begin(), y.end(), std::ref(f32rng));
293 } else {
294 std::fill(y.begin(), y.end(), nanf(""));
295 }
296 const float* a_data = inplace_a() ? y.data() : a.data();
297 const float* b_data = inplace_b() ? y.data() : b.data();
298
299 // Compute reference results.
300 for (size_t i = 0; i < batch_size(); i++) {
301 switch (op_type) {
302 case OpType::Add:
303 y_ref[i] = a_data[i] + b_data[i];
304 break;
305 case OpType::Div:
306 y_ref[i] = a_data[i] / b_data[i];
307 break;
308 case OpType::Max:
309 y_ref[i] = std::max<float>(a_data[i], b_data[i]);
310 break;
311 case OpType::Min:
312 y_ref[i] = std::min<float>(a_data[i], b_data[i]);
313 break;
314 case OpType::Mul:
315 y_ref[i] = a_data[i] * b_data[i];
316 break;
317 case OpType::Sub:
318 y_ref[i] = a_data[i] - b_data[i];
319 break;
320 }
321 }
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800322 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
323 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
324 const float accumulated_range = accumulated_max - accumulated_min;
325 const float y_max = accumulated_range > 0.0f ?
326 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
327 +std::numeric_limits<float>::infinity();
328 const float y_min = accumulated_range > 0.0f ?
329 (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
330 -std::numeric_limits<float>::infinity();
331 for (size_t i = 0; i < batch_size(); i++) {
332 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
333 }
334
335 // Prepare output parameters.
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700336 xnn_f32_minmax_params params = { };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800337 switch (variant) {
338 case Variant::Native:
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700339 params = xnn_init_f32_minmax_params(y_min, y_max);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800340 break;
341 case Variant::Scalar:
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700342 params = xnn_init_scalar_f32_minmax_params(y_min, y_max);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800343 break;
344 }
345
346 // Call optimized micro-kernel.
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700347 vbinary_minmax(batch_size() * sizeof(float), a_data, b_data, y.data(), &params);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800348
349 // Verify results.
350 for (size_t i = 0; i < batch_size(); i++) {
351 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
352 << "at " << i << " / " << batch_size();
353 }
354 }
355 }
356
357 private:
358 size_t batch_size_{1};
359 bool inplace_a_{false};
360 bool inplace_b_{false};
361 uint8_t qmin_{0};
362 uint8_t qmax_{255};
363 size_t iterations_{15};
364};