blob: 5857285c89c9c55e3f929b716049b64c08601dae [file] [log] [blame]
Marat Dukhan35dacfb2019-11-07 19:18:16 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#pragma once
7
8#include <gtest/gtest.h>
9
10#include <algorithm>
11#include <cassert>
12#include <cmath>
13#include <cstddef>
Marat Dukhan9fab3f92019-11-08 14:55:19 -080014#include <cstdint>
Marat Dukhan35dacfb2019-11-07 19:18:16 -080015#include <functional>
16#include <random>
17#include <vector>
18
19#include <xnnpack.h>
20#include <xnnpack/AlignedAllocator.h>
Marat Dukhancdb42a52021-11-22 20:09:32 -080021#include <xnnpack/math.h>
Marat Dukhan35dacfb2019-11-07 19:18:16 -080022#include <xnnpack/params.h>
23
24
Marat Dukhan660fd192020-03-10 04:55:30 -070025class IBilinearMicrokernelTester {
Marat Dukhan35dacfb2019-11-07 19:18:16 -080026 public:
Marat Dukhan660fd192020-03-10 04:55:30 -070027 inline IBilinearMicrokernelTester& pixels(uint32_t pixels) {
Marat Dukhan35dacfb2019-11-07 19:18:16 -080028 assert(pixels >= 1);
29 this->pixels_ = pixels;
30 return *this;
31 }
32
33 inline uint32_t pixels() const {
34 return this->pixels_;
35 }
36
Marat Dukhan660fd192020-03-10 04:55:30 -070037 inline IBilinearMicrokernelTester& channels(uint32_t channels) {
Marat Dukhan35dacfb2019-11-07 19:18:16 -080038 assert(channels >= 1);
39 this->channels_ = channels;
40 return *this;
41 }
42
43 inline uint32_t channels() const {
44 return this->channels_;
45 }
46
Marat Dukhan660fd192020-03-10 04:55:30 -070047 inline IBilinearMicrokernelTester& input_offset(uint32_t input_offset) {
Marat Dukhan9fab3f92019-11-08 14:55:19 -080048 this->input_offset_ = input_offset;
49 return *this;
50 }
51
52 inline uint32_t input_offset() const {
53 return this->input_offset_;
54 }
55
Marat Dukhan660fd192020-03-10 04:55:30 -070056 inline IBilinearMicrokernelTester& output_stride(uint32_t output_stride) {
Marat Dukhan35dacfb2019-11-07 19:18:16 -080057 assert(output_stride != 0);
58 this->output_stride_ = output_stride;
59 return *this;
60 }
61
62 inline uint32_t output_stride() const {
63 if (this->output_stride_ == 0) {
64 return channels();
65 } else {
66 assert(this->output_stride_ >= channels());
67 return this->output_stride_;
68 }
69 }
70
Marat Dukhan660fd192020-03-10 04:55:30 -070071 inline IBilinearMicrokernelTester& iterations(size_t iterations) {
Marat Dukhan35dacfb2019-11-07 19:18:16 -080072 this->iterations_ = iterations;
73 return *this;
74 }
75
76 inline size_t iterations() const {
77 return this->iterations_;
78 }
79
XNNPACK Team6be46b22020-10-22 23:34:54 -070080 inline IBilinearMicrokernelTester& input_stride(uint32_t input_stride) {
81 assert(input_stride != 0);
82 this->input_stride_ = input_stride;
83 return *this;
84 }
85
86 inline uint32_t input_stride() const {
87 if (this->input_stride_ == 0) {
88 return 4 * pixels();
89 } else {
90 assert(this->input_stride_ >= 4 * pixels());
91 return this->input_stride_;
92 }
93 }
94
Marat Dukhan660fd192020-03-10 04:55:30 -070095 void Test(xnn_f32_ibilinear_ukernel_function ibilinear) const {
Marat Dukhan35dacfb2019-11-07 19:18:16 -080096 std::random_device random_device;
97 auto rng = std::mt19937(random_device());
98 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
99
100 std::vector<const float*> indirection(pixels() * 4);
101 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels());
Marat Dukhan9594db02019-12-05 14:32:37 -0800102 std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800103 std::vector<float> output((pixels() - 1) * output_stride() + channels());
104 std::vector<float> output_ref(pixels() * channels());
105
106 for (size_t iteration = 0; iteration < iterations(); iteration++) {
107 std::generate(input.begin(), input.end(), std::ref(f32rng));
108 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng));
109 std::fill(output.begin(), output.end(), nanf(""));
110
111 for (size_t i = 0; i < indirection.size(); i++) {
Marat Dukhan9fab3f92019-11-08 14:55:19 -0800112 indirection[i] = input.data() + i * channels() - input_offset();
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800113 }
114 std::shuffle(indirection.begin(), indirection.end(), rng);
115
116 // Compute reference results.
117 for (size_t i = 0; i < pixels(); i++) {
118 for (size_t c = 0; c < channels(); c++) {
119 const float alpha_h = packed_weights[i * 2 + 0];
120 const float alpha_v = packed_weights[i * 2 + 1];
121 output_ref[i * channels() + c] =
Marat Dukhan9fab3f92019-11-08 14:55:19 -0800122 indirection[i * 4 + 0][c + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
123 indirection[i * 4 + 1][c + input_offset()] * alpha_h * (1.0f - alpha_v) +
124 indirection[i * 4 + 2][c + input_offset()] * (1.0f - alpha_h) * alpha_v +
125 indirection[i * 4 + 3][c + input_offset()] * alpha_h * alpha_v;
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800126 }
127 }
128
129 // Call optimized micro-kernel.
Marat Dukhan660fd192020-03-10 04:55:30 -0700130 ibilinear(
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800131 pixels(), channels() * sizeof(float),
Marat Dukhan9fab3f92019-11-08 14:55:19 -0800132 indirection.data(), input_offset() * sizeof(float),
133 packed_weights.data(), output.data(),
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800134 (output_stride() - channels()) * sizeof(float));
135
136 // Verify results.
137 for (size_t i = 0; i < pixels(); i++) {
138 for (size_t c = 0; c < channels(); c++) {
139 ASSERT_NEAR(
140 output_ref[i * channels() + c],
141 output[i * output_stride() + c],
Marat Dukhan01836252020-04-13 19:17:43 -0700142 std::abs(output_ref[i * channels() + c]) * 1.0e-4)
Marat Dukhancdb42a52021-11-22 20:09:32 -0800143 << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
144 }
145 }
146 }
147 }
148
149 void Test(xnn_s8_ibilinear_ukernel_function ibilinear) const {
150 std::random_device random_device;
151 auto rng = std::mt19937(random_device());
152 auto i8rng = std::bind(
153 std::uniform_int_distribution<int16_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
154 std::ref(rng));
155 auto w11rng = std::bind(std::uniform_int_distribution<int16_t>(0, 2047), std::ref(rng));
156
157 std::vector<const int8_t*> indirection(pixels() * 4);
158 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + indirection.size() * channels());
159 std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2);
160 std::vector<int8_t> output((pixels() - 1) * output_stride() + channels());
161 std::vector<int8_t> output_ref(pixels() * channels());
162
163 for (size_t iteration = 0; iteration < iterations(); iteration++) {
164 std::generate(input.begin(), input.end(), std::ref(i8rng));
165 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(w11rng));
166 std::fill(output.begin(), output.end(), INT8_C(0xFA));
167
168 for (size_t i = 0; i < indirection.size(); i++) {
169 indirection[i] = input.data() + i * channels() - input_offset();
170 }
171 std::shuffle(indirection.begin(), indirection.end(), rng);
172
173 // Compute reference results.
174 for (size_t i = 0; i < pixels(); i++) {
175 for (size_t c = 0; c < channels(); c++) {
176 const int32_t alpha_h = packed_weights[i * 2 + 0];
177 const int32_t alpha_v = packed_weights[i * 2 + 1];
178 const int32_t acc = asr_s32(
179 int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) +
180 int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) +
181 int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v +
182 int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v +
183 2097152, 22);
184 ASSERT_GE(acc, std::numeric_limits<int8_t>::min());
185 ASSERT_LE(acc, std::numeric_limits<int8_t>::max());
186 output_ref[i * channels() + c] = (int8_t) acc;
187 }
188 }
189
190 // Call optimized micro-kernel.
191 ibilinear(
192 pixels(), channels() * sizeof(int8_t),
193 indirection.data(), input_offset() * sizeof(int8_t),
194 packed_weights.data(), output.data(),
195 (output_stride() - channels()) * sizeof(int8_t));
196
197 // Verify results.
198 for (size_t i = 0; i < pixels(); i++) {
199 for (size_t c = 0; c < channels(); c++) {
200 ASSERT_EQ(int32_t(output_ref[i * channels() + c]), int32_t(output[i * output_stride() + c]))
201 << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
202 }
203 }
204 }
205 }
206
207 void Test(xnn_u8_ibilinear_ukernel_function ibilinear) const {
208 std::random_device random_device;
209 auto rng = std::mt19937(random_device());
210 auto u8rng = std::bind(
211 std::uniform_int_distribution<uint16_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
212 auto w11rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 2047), std::ref(rng));
213
214 std::vector<const uint8_t*> indirection(pixels() * 4);
215 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + indirection.size() * channels());
216 std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2);
217 std::vector<uint8_t> output((pixels() - 1) * output_stride() + channels());
218 std::vector<uint8_t> output_ref(pixels() * channels());
219
220 for (size_t iteration = 0; iteration < iterations(); iteration++) {
221 std::generate(input.begin(), input.end(), std::ref(u8rng));
222 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(w11rng));
223 std::fill(output.begin(), output.end(), UINT8_C(0xFA));
224
225 for (size_t i = 0; i < indirection.size(); i++) {
226 indirection[i] = input.data() + i * channels() - input_offset();
227 }
228 std::shuffle(indirection.begin(), indirection.end(), rng);
229
230 // Compute reference results.
231 for (size_t i = 0; i < pixels(); i++) {
232 for (size_t c = 0; c < channels(); c++) {
233 const uint32_t alpha_h = uint32_t(int32_t(packed_weights[i * 2 + 0]));
234 const uint32_t alpha_v = uint32_t(int32_t(packed_weights[i * 2 + 1]));
235 const uint32_t acc = (2097152 +
236 int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) +
237 int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) +
238 int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v +
239 int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v) >> 22;
240 ASSERT_LE(acc, std::numeric_limits<uint8_t>::max());
241 output_ref[i * channels() + c] = (uint8_t) acc;
242 }
243 }
244
245 // Call optimized micro-kernel.
246 ibilinear(
247 pixels(), channels() * sizeof(uint8_t),
248 indirection.data(), input_offset() * sizeof(uint8_t),
249 packed_weights.data(), output.data(),
250 (output_stride() - channels()) * sizeof(uint8_t));
251
252 // Verify results.
253 for (size_t i = 0; i < pixels(); i++) {
254 for (size_t c = 0; c < channels(); c++) {
255 ASSERT_EQ(uint32_t(output_ref[i * channels() + c]), uint32_t(output[i * output_stride() + c]))
256 << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800257 }
258 }
259 }
260 }
261
XNNPACK Teamcb2b6672020-10-23 19:30:50 -0700262 void TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear) const {
XNNPACK Team6be46b22020-10-22 23:34:54 -0700263 std::random_device random_device;
264 auto rng = std::mt19937(random_device());
265 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
266
XNNPACK Team3155c472020-10-23 19:36:50 -0700267 std::vector<const float*> indirection(pixels() * 2);
XNNPACK Team6be46b22020-10-22 23:34:54 -0700268 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + (channels() - 1) * input_stride() + 4 * pixels());
XNNPACK Teamcb2b6672020-10-23 19:30:50 -0700269 std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
XNNPACK Team6be46b22020-10-22 23:34:54 -0700270 std::vector<float> output(pixels() * channels());
271 std::vector<float> output_ref(pixels() * channels());
272
273 for (size_t iteration = 0; iteration < iterations(); iteration++) {
274 std::generate(input.begin(), input.end(), std::ref(f32rng));
XNNPACK Teamcb2b6672020-10-23 19:30:50 -0700275 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng));
XNNPACK Team6be46b22020-10-22 23:34:54 -0700276 std::fill(output.begin(), output.end(), nanf(""));
277
XNNPACK Team3155c472020-10-23 19:36:50 -0700278 // Indirection will point to the even ("left") pixels of the input.
279 // The kernels will expect "right" pixels to be placed right next to them.
XNNPACK Team6be46b22020-10-22 23:34:54 -0700280 for (size_t i = 0; i < indirection.size(); i++) {
XNNPACK Team3155c472020-10-23 19:36:50 -0700281 const float* left_corner = input.data() + 2 * i - input_offset();
282 indirection[i] = left_corner;
XNNPACK Team6be46b22020-10-22 23:34:54 -0700283 }
284 std::shuffle(indirection.begin(), indirection.end(), rng);
285
286 // Compute reference results.
287 for (size_t i = 0; i < pixels(); i++) {
288 for (size_t c = 0; c < channels(); c++) {
XNNPACK Teamcb2b6672020-10-23 19:30:50 -0700289 const float alpha_h = packed_weights[i * 2 + 0];
290 const float alpha_v = packed_weights[i * 2 + 1];
XNNPACK Team6be46b22020-10-22 23:34:54 -0700291 // `c * pixels() + i` because the output is NCHW.
292 output_ref[c * pixels() + i] =
293 // `c * indirection.size()` because the input is NCHW.
XNNPACK Team3155c472020-10-23 19:36:50 -0700294 (indirection[i * 2 + 0] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
295 (indirection[i * 2 + 0] + 1)[c * input_stride() + input_offset()] * alpha_h * (1.0f - alpha_v) +
296 (indirection[i * 2 + 1] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * alpha_v +
297 (indirection[i * 2 + 1] + 1)[c * input_stride() + input_offset()] * alpha_h * alpha_v;
XNNPACK Team6be46b22020-10-22 23:34:54 -0700298 }
299 }
300
301 // Call optimized micro-kernel.
302 ibilinear(
303 pixels(), channels(),
304 indirection.data(), input_offset() * sizeof(float),
XNNPACK Teamcb2b6672020-10-23 19:30:50 -0700305 packed_weights.data(), output.data(), input_stride() * sizeof(float));
XNNPACK Team6be46b22020-10-22 23:34:54 -0700306
307 // Verify results.
308 for (size_t c = 0; c < channels(); c++) {
309 for (size_t i = 0; i < pixels(); i++) {
310 ASSERT_NEAR(
311 output_ref[c * pixels() + i],
312 output[c * pixels() + i],
313 std::abs(output_ref[c * pixels() + i]) * 1.0e-4)
314 << "i = " << i << ", channel = " << c;
315 }
316 }
317 }
318 }
319
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800320 private:
321 uint32_t channels_{1};
322 uint32_t pixels_{1};
323 uint32_t output_stride_{0};
XNNPACK Team6be46b22020-10-22 23:34:54 -0700324 uint32_t input_stride_{0};
Marat Dukhan9fab3f92019-11-08 14:55:19 -0800325 uint32_t input_offset_{0};
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800326 size_t iterations_{3};
327};