blob: 34253dd8a2123970be1f2249da11ed4790d153d1 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstddef>
17#include <cstdlib>
18#include <functional>
19#include <random>
20#include <vector>
21
22#include <xnnpack.h>
23#include <xnnpack/AlignedAllocator.h>
24#include <xnnpack/math.h>
25#include <xnnpack/pack.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070026#include <xnnpack/params-init.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070027#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070028
29
Marat Dukhan1f29b802020-05-15 23:46:39 -070030class DWConvCHWMicrokernelTester {
XNNPACK Teamb455b122019-09-27 18:10:33 -070031 public:
32 enum class Variant {
33 Native,
34 Scalar,
35 };
36
Marat Dukhan1f29b802020-05-15 23:46:39 -070037 inline DWConvCHWMicrokernelTester& input_tuple_size(uint32_t input_tuple_size) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070038 this->input_tuple_size_ = input_tuple_size;
39 return *this;
40 }
41
42 inline uint32_t input_tuple_size() const {
43 return this->input_tuple_size_;
44 }
45
Marat Dukhan1f29b802020-05-15 23:46:39 -070046 inline DWConvCHWMicrokernelTester& output_tuple_size(uint32_t output_tuple_size) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070047 this->output_tuple_size_ = output_tuple_size;
48 return *this;
49 }
50
51 inline uint32_t output_tuple_size() const {
52 return this->output_tuple_size_;
53 }
54
Marat Dukhan1f29b802020-05-15 23:46:39 -070055 inline DWConvCHWMicrokernelTester& padding_left(uint32_t padding_left) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070056 this->padding_left_ = padding_left;
57 return *this;
58 }
59
60 inline uint32_t padding_left() const {
61 return this->padding_left_;
62 }
63
Marat Dukhan1f29b802020-05-15 23:46:39 -070064 inline DWConvCHWMicrokernelTester& padding_right(uint32_t padding_right) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070065 this->padding_right_ = padding_right;
66 return *this;
67 }
68
69 inline uint32_t padding_right() const {
70 return this->padding_right_;
71 }
72
Marat Dukhan1f29b802020-05-15 23:46:39 -070073 inline DWConvCHWMicrokernelTester& padding_top(uint32_t padding_top) {
Erich Elsen4e5db3d2020-05-07 08:57:47 -070074 this->padding_top_ = padding_top;
75 return *this;
76 }
77
78 inline uint32_t padding_top() const {
79 return this->padding_top_;
80 }
81
82
Marat Dukhan1f29b802020-05-15 23:46:39 -070083 inline DWConvCHWMicrokernelTester& padding_bottom(uint32_t padding_bottom) {
Erich Elsen4e5db3d2020-05-07 08:57:47 -070084 this->padding_bottom_ = padding_bottom;
85 return *this;
86 }
87 inline uint32_t padding_bottom() const {
88 return this->padding_bottom_;
89 }
90
XNNPACK Teamb455b122019-09-27 18:10:33 -070091 inline uint32_t input_height() const {
Erich Elsen4e5db3d2020-05-07 08:57:47 -070092 return (output_height() - 1) * subsampling() + kernel_height() - padding_top() - padding_bottom();
XNNPACK Teamb455b122019-09-27 18:10:33 -070093 }
94
Marat Dukhan1f29b802020-05-15 23:46:39 -070095 inline DWConvCHWMicrokernelTester& input_width(uint32_t input_width) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070096 assert(input_width >= 1);
97 this->input_width_ = input_width;
98 return *this;
99 }
100
101 inline uint32_t input_width() const {
102 return this->input_width_;
103 }
104
Marat Dukhan1f29b802020-05-15 23:46:39 -0700105 inline DWConvCHWMicrokernelTester& subsampling(uint32_t subsampling) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106 assert(subsampling >= 1);
107 this->subsampling_ = subsampling;
108 return *this;
109 }
110
111 inline uint32_t subsampling() const {
112 return this->subsampling_;
113 }
114
Marat Dukhan1f29b802020-05-15 23:46:39 -0700115 inline DWConvCHWMicrokernelTester& kernel_height(uint32_t kernel_height) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700116 assert(kernel_height != 0);
117 this->kernel_height_ = kernel_height;
118 return *this;
119 }
120
121 inline uint32_t kernel_height() const {
122 return this->kernel_height_;
123 }
124
Marat Dukhan1f29b802020-05-15 23:46:39 -0700125 inline DWConvCHWMicrokernelTester& kernel_width(uint32_t kernel_width) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700126 assert(kernel_width != 0);
127 this->kernel_width_ = kernel_width;
128 return *this;
129 }
130
131 inline uint32_t kernel_width() const {
132 return this->kernel_width_;
133 }
134
135 inline uint32_t kernel_size() const {
136 return kernel_height() * kernel_width();
137 }
138
Marat Dukhan1f29b802020-05-15 23:46:39 -0700139 inline DWConvCHWMicrokernelTester& output_height(uint32_t output_height) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700140 assert(output_height >= 1);
141 this->output_height_ = output_height;
142 return *this;
143 }
144
145 inline uint32_t output_height() const {
146 return this->output_height_;
147 }
148
149 inline uint32_t output_width() const {
150 const uint32_t padded_input_width = padding_left() + input_width() + padding_right();
151 if (padded_input_width <= kernel_width()) {
152 return 1;
153 } else {
154 return (padded_input_width - kernel_width()) / subsampling() + 1;
155 }
156 }
157
Marat Dukhan1f29b802020-05-15 23:46:39 -0700158 inline DWConvCHWMicrokernelTester& input_tuple_stride(uint32_t input_tuple_stride) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700159 assert(input_tuple_stride != 0);
160 this->input_tuple_stride_ = input_tuple_stride;
161 return *this;
162 }
163
164 inline uint32_t input_tuple_stride() const {
165 if (this->input_tuple_stride_ == 0) {
166 return this->input_tuple_size();
167 } else {
168 return this->input_tuple_stride_;
169 }
170 }
171
Marat Dukhan1f29b802020-05-15 23:46:39 -0700172 inline DWConvCHWMicrokernelTester& output_tuple_stride(uint32_t output_tuple_stride) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700173 assert(output_tuple_stride != 0);
174 this->output_tuple_stride_ = output_tuple_stride;
175 return *this;
176 }
177
178 inline uint32_t output_tuple_stride() const {
179 if (this->output_tuple_stride_ == 0) {
180 return this->output_tuple_size();
181 } else {
182 return this->output_tuple_stride_;
183 }
184 }
185
Marat Dukhan1f29b802020-05-15 23:46:39 -0700186 inline DWConvCHWMicrokernelTester& input_width_stride(uint32_t input_width_stride) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700187 assert(input_width_stride != 0);
188 this->input_width_stride_ = input_width_stride;
189 return *this;
190 }
191
192 inline uint32_t input_width_stride() const {
193 if (this->input_width_stride_ == 0) {
194 return (this->input_width() + input_tuple_size() - 1) / input_tuple_size() * input_tuple_size();
195 } else {
196 return this->input_width_stride_;
197 }
198 }
199
Marat Dukhan1f29b802020-05-15 23:46:39 -0700200 inline DWConvCHWMicrokernelTester& output_width_stride(uint32_t output_width_stride) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700201 assert(output_width_stride != 0);
202 this->output_width_stride_ = output_width_stride;
203 return *this;
204 }
205
206 inline uint32_t output_width_stride() const {
207 if (this->output_width_stride_ == 0) {
208 return (this->output_width() + output_tuple_size() - 1) / output_tuple_size() * output_tuple_size();
209 } else {
210 return this->output_width_stride_;
211 }
212 }
213
Marat Dukhan1f29b802020-05-15 23:46:39 -0700214 inline DWConvCHWMicrokernelTester& qmin(uint8_t qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700215 this->qmin_ = qmin;
216 return *this;
217 }
218
219 inline uint8_t qmin() const {
220 return this->qmin_;
221 }
222
Marat Dukhan1f29b802020-05-15 23:46:39 -0700223 inline DWConvCHWMicrokernelTester& qmax(uint8_t qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700224 this->qmax_ = qmax;
225 return *this;
226 }
227
228 inline uint8_t qmax() const {
229 return this->qmax_;
230 }
231
Marat Dukhan1f29b802020-05-15 23:46:39 -0700232 inline DWConvCHWMicrokernelTester& iterations(size_t iterations) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700233 this->iterations_ = iterations;
234 return *this;
235 }
236
237 inline size_t iterations() const {
238 return this->iterations_;
239 }
240
Marat Dukhan1f29b802020-05-15 23:46:39 -0700241 void Test(xnn_f32_dwconv_chw_ukernel_function dwconv, Variant variant = Variant::Native) const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700242 ASSERT_EQ(0, input_tuple_stride() % input_tuple_size());
243 ASSERT_EQ(0, output_tuple_stride() % output_tuple_size());
244
245 std::random_device random_device;
246 auto rng = std::mt19937(random_device());
247 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
248
Marat Dukhan9594db02019-12-05 14:32:37 -0800249 std::vector<float, AlignedAllocator<float, 64>> input((input_height() - 1) * input_width_stride() +
XNNPACK Teamb455b122019-09-27 18:10:33 -0700250 (input_width() - 1) / input_tuple_size() * input_tuple_stride() + input_tuple_stride() + input_tuple_size());
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700251 std::vector<float> zero((input_width() - 1) / input_tuple_size() * input_tuple_stride() + input_tuple_stride() + input_tuple_size());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700252 std::vector<float> packed_weights(kernel_size() + 1);
Marat Dukhan9594db02019-12-05 14:32:37 -0800253 std::vector<float, AlignedAllocator<float, 64>> output((output_height() - 1) * output_width_stride() +
XNNPACK Teamb455b122019-09-27 18:10:33 -0700254 (output_width() - 1) / output_tuple_size() * output_tuple_stride() + output_tuple_size());
255 std::vector<float> output_ref(output_height() * output_width());
256
257 for (size_t iteration = 0; iteration < iterations(); iteration++) {
258 std::generate(input.begin(), input.end(), std::ref(f32rng));
259 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng));
260 std::fill(output.begin(), output.end(), nanf(""));
261
262 for (size_t oy = 0; oy < output_height(); oy++) {
263 for (size_t ox = 0; ox < output_width(); ox++) {
264 float acc = packed_weights[0];
265 for (size_t ky = 0; ky < kernel_height(); ky++) {
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700266 const size_t iy = oy * subsampling() + ky - padding_top();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700267 for (size_t kx = 0; kx < kernel_width(); kx++) {
268 const size_t ix = ox * subsampling() + kx - padding_left();
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700269 if (ix < input_width() && iy <= input_height() - 1) {
270 float input_val = input[ iy * input_width_stride() + ix / input_tuple_size() * input_tuple_stride() + ix % input_tuple_size()];
271 float kernel_val = packed_weights[1 + ky * kernel_width() + kx];
272 acc += input_val * kernel_val;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700273 }
274 }
275 }
276 output_ref[oy * output_width() + ox] = acc;
277 }
278 }
279
280 // Compute clamping parameters.
281 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
282 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
283 const float accumulated_range = accumulated_max - accumulated_min;
284 const float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin());
285 const float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
286
Frank Barchard9f3a8432020-06-02 13:59:35 -0700287 // Prepare parameters.
Marat Dukhan1f29b802020-05-15 23:46:39 -0700288 xnn_f32_chw_params chw_params = { };
XNNPACK Teamb455b122019-09-27 18:10:33 -0700289 switch (variant) {
290 case Variant::Native:
Marat Dukhan1f29b802020-05-15 23:46:39 -0700291 chw_params = xnn_init_f32_chw_params(input_width(), output_min, output_max);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700292 break;
293 case Variant::Scalar:
Marat Dukhan1f29b802020-05-15 23:46:39 -0700294 chw_params = xnn_init_scalar_f32_chw_params(input_width(), output_min, output_max);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700295 break;
296 }
297
298 // Clamp reference results.
299 for (float& output_val : output_ref) {
300 output_val = std::max(std::min(output_val, output_max), output_min);
301 }
302
303 // Call optimized micro-kernel.
304 dwconv(
Erich Elseneda9c112020-05-11 04:40:25 -0700305 input_height(), input_width(),
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700306 input.data(), packed_weights.data(), zero.data(), output.data(),
307 padding_top(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700308 input_tuple_stride() * sizeof(float), output_tuple_stride() * sizeof(float),
309 input_width_stride() * sizeof(float), output_width_stride() * sizeof(float),
Marat Dukhan1f29b802020-05-15 23:46:39 -0700310 &chw_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700311
312 // Verify results.
313 for (size_t y = 0; y < output_height(); y++) {
314 for (size_t x = 0; x < output_width(); x++) {
315 ASSERT_NEAR(
316 output_ref[y * output_width() + x],
317 output[y * output_width_stride() + x / output_tuple_size() * output_tuple_stride() + x % output_tuple_size()],
318 std::abs(output_ref[y * output_width() + x]) * 1.0e-5)
319 << "x = " << x << ", y = " << y;
320 }
321 }
322
323 // Verify that remainder of the last tile left unchanged.
324 if (output_width() % output_tuple_size() != 0) {
325 for (size_t i = output.size() - output_tuple_size() + output_width() % output_tuple_size(); i < output.size(); i++) {
Marat Dukhan629a33e2019-10-01 10:39:14 -0700326 ASSERT_TRUE(std::isnan(output[i]))
XNNPACK Teamb455b122019-09-27 18:10:33 -0700327 << "i = " << i << ", output = " << output[i];
328 }
329 }
330 }
331 }
332
333 private:
334 uint32_t input_tuple_size_{1};
335 uint32_t output_tuple_size_{1};
336 uint32_t padding_left_{0};
337 uint32_t padding_right_{0};
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700338 uint32_t padding_top_{0};
339 uint32_t padding_bottom_{0};
XNNPACK Teamb455b122019-09-27 18:10:33 -0700340 uint32_t output_height_{1};
341 uint32_t input_width_{1};
342 uint32_t subsampling_{1};
343 uint32_t kernel_height_{1};
344 uint32_t kernel_width_{1};
345 uint32_t input_tuple_stride_{0};
346 uint32_t output_tuple_stride_{0};
347 uint32_t input_width_stride_{0};
348 uint32_t output_width_stride_{0};
349 uint8_t qmin_{0};
350 uint8_t qmax_{255};
351 size_t iterations_{1};
352};