XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | // Copyright (c) Facebook, Inc. and its affiliates. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Copyright 2019 Google LLC |
| 5 | // |
| 6 | // This source code is licensed under the BSD-style license found in the |
| 7 | // LICENSE file in the root directory of this source tree. |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include <gtest/gtest.h> |
| 12 | |
| 13 | #include <algorithm> |
| 14 | #include <cassert> |
| 15 | #include <cmath> |
| 16 | #include <cstddef> |
| 17 | #include <cstdlib> |
| 18 | #include <functional> |
| 19 | #include <random> |
| 20 | #include <vector> |
| 21 | |
| 22 | #include <xnnpack.h> |
| 23 | #include <xnnpack/AlignedAllocator.h> |
| 24 | #include <xnnpack/math.h> |
| 25 | #include <xnnpack/pack.h> |
Marat Dukhan | eeaa7bd | 2019-10-25 17:31:25 -0700 | [diff] [blame] | 26 | #include <xnnpack/params-init.h> |
Frank Barchard | e0601b5 | 2019-10-25 17:43:34 -0700 | [diff] [blame] | 27 | #include <xnnpack/params.h> |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 28 | |
| 29 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 30 | class DWConv2DMicrokernelTester { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 31 | public: |
| 32 | enum class Variant { |
| 33 | Native, |
| 34 | Scalar, |
| 35 | }; |
| 36 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 37 | inline DWConv2DMicrokernelTester& padding_left(uint32_t padding_left) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 38 | this->padding_left_ = padding_left; |
| 39 | return *this; |
| 40 | } |
| 41 | |
| 42 | inline uint32_t padding_left() const { |
| 43 | return this->padding_left_; |
| 44 | } |
| 45 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 46 | inline DWConv2DMicrokernelTester& padding_right(uint32_t padding_right) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 47 | this->padding_right_ = padding_right; |
| 48 | return *this; |
| 49 | } |
| 50 | |
| 51 | inline uint32_t padding_right() const { |
| 52 | return this->padding_right_; |
| 53 | } |
| 54 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 55 | inline DWConv2DMicrokernelTester& padding_top(uint32_t padding_top) { |
Erich Elsen | 4e5db3d | 2020-05-07 08:57:47 -0700 | [diff] [blame] | 56 | this->padding_top_ = padding_top; |
| 57 | return *this; |
| 58 | } |
| 59 | |
| 60 | inline uint32_t padding_top() const { |
| 61 | return this->padding_top_; |
| 62 | } |
| 63 | |
| 64 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 65 | inline DWConv2DMicrokernelTester& padding_bottom(uint32_t padding_bottom) { |
Erich Elsen | 4e5db3d | 2020-05-07 08:57:47 -0700 | [diff] [blame] | 66 | this->padding_bottom_ = padding_bottom; |
| 67 | return *this; |
| 68 | } |
| 69 | inline uint32_t padding_bottom() const { |
| 70 | return this->padding_bottom_; |
| 71 | } |
| 72 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 73 | inline DWConv2DMicrokernelTester& input_height(uint32_t input_height) { |
Marat Dukhan | dc6c77f | 2020-10-23 19:09:10 -0700 | [diff] [blame] | 74 | assert(input_height >= 1); |
| 75 | this->input_height_ = input_height; |
| 76 | return *this; |
| 77 | } |
| 78 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 79 | inline uint32_t input_height() const { |
Marat Dukhan | dc6c77f | 2020-10-23 19:09:10 -0700 | [diff] [blame] | 80 | return this->input_height_; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 81 | } |
| 82 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 83 | inline DWConv2DMicrokernelTester& input_width(uint32_t input_width) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 84 | assert(input_width >= 1); |
| 85 | this->input_width_ = input_width; |
| 86 | return *this; |
| 87 | } |
| 88 | |
| 89 | inline uint32_t input_width() const { |
| 90 | return this->input_width_; |
| 91 | } |
| 92 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 93 | inline DWConv2DMicrokernelTester& subsampling(uint32_t subsampling) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 94 | assert(subsampling >= 1); |
| 95 | this->subsampling_ = subsampling; |
| 96 | return *this; |
| 97 | } |
| 98 | |
| 99 | inline uint32_t subsampling() const { |
| 100 | return this->subsampling_; |
| 101 | } |
| 102 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 103 | inline DWConv2DMicrokernelTester& kernel_height(uint32_t kernel_height) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 104 | assert(kernel_height != 0); |
| 105 | this->kernel_height_ = kernel_height; |
| 106 | return *this; |
| 107 | } |
| 108 | |
| 109 | inline uint32_t kernel_height() const { |
| 110 | return this->kernel_height_; |
| 111 | } |
| 112 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 113 | inline DWConv2DMicrokernelTester& kernel_width(uint32_t kernel_width) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 114 | assert(kernel_width != 0); |
| 115 | this->kernel_width_ = kernel_width; |
| 116 | return *this; |
| 117 | } |
| 118 | |
| 119 | inline uint32_t kernel_width() const { |
| 120 | return this->kernel_width_; |
| 121 | } |
| 122 | |
| 123 | inline uint32_t kernel_size() const { |
| 124 | return kernel_height() * kernel_width(); |
| 125 | } |
| 126 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 127 | inline uint32_t output_height() const { |
Marat Dukhan | dc6c77f | 2020-10-23 19:09:10 -0700 | [diff] [blame] | 128 | const uint32_t padded_input_height = padding_top() + input_height() + padding_bottom(); |
| 129 | if (padded_input_height <= kernel_height()) { |
| 130 | return 1; |
| 131 | } else { |
| 132 | return (padded_input_height - kernel_height()) / subsampling() + 1; |
| 133 | } |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 134 | } |
| 135 | |
| 136 | inline uint32_t output_width() const { |
| 137 | const uint32_t padded_input_width = padding_left() + input_width() + padding_right(); |
| 138 | if (padded_input_width <= kernel_width()) { |
| 139 | return 1; |
| 140 | } else { |
| 141 | return (padded_input_width - kernel_width()) / subsampling() + 1; |
| 142 | } |
| 143 | } |
| 144 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 145 | inline DWConv2DMicrokernelTester& qmin(uint8_t qmin) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 146 | this->qmin_ = qmin; |
| 147 | return *this; |
| 148 | } |
| 149 | |
| 150 | inline uint8_t qmin() const { |
| 151 | return this->qmin_; |
| 152 | } |
| 153 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 154 | inline DWConv2DMicrokernelTester& qmax(uint8_t qmax) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 155 | this->qmax_ = qmax; |
| 156 | return *this; |
| 157 | } |
| 158 | |
| 159 | inline uint8_t qmax() const { |
| 160 | return this->qmax_; |
| 161 | } |
| 162 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 163 | inline DWConv2DMicrokernelTester& iterations(size_t iterations) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 164 | this->iterations_ = iterations; |
| 165 | return *this; |
| 166 | } |
| 167 | |
| 168 | inline size_t iterations() const { |
| 169 | return this->iterations_; |
| 170 | } |
| 171 | |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 172 | void Test(xnn_f32_dwconv2d_chw_ukernel_function dwconv, Variant variant = Variant::Native) const { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 173 | std::random_device random_device; |
| 174 | auto rng = std::mt19937(random_device()); |
| 175 | auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); |
| 176 | |
Marat Dukhan | ae7e8b2 | 2020-10-20 17:51:51 -0700 | [diff] [blame] | 177 | std::vector<float, AlignedAllocator<float, 64>> input(input_height() * input_width() + 2 * XNN_EXTRA_BYTES); |
| 178 | std::vector<float> zero(input_width() + 2 * XNN_EXTRA_BYTES); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 179 | std::vector<float> packed_weights(kernel_size() + 1); |
Marat Dukhan | ae7e8b2 | 2020-10-20 17:51:51 -0700 | [diff] [blame] | 180 | std::vector<float, AlignedAllocator<float, 64>> output(output_height() * output_width()); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 181 | std::vector<float> output_ref(output_height() * output_width()); |
| 182 | |
| 183 | for (size_t iteration = 0; iteration < iterations(); iteration++) { |
| 184 | std::generate(input.begin(), input.end(), std::ref(f32rng)); |
| 185 | std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng)); |
| 186 | std::fill(output.begin(), output.end(), nanf("")); |
| 187 | |
| 188 | for (size_t oy = 0; oy < output_height(); oy++) { |
| 189 | for (size_t ox = 0; ox < output_width(); ox++) { |
| 190 | float acc = packed_weights[0]; |
| 191 | for (size_t ky = 0; ky < kernel_height(); ky++) { |
Erich Elsen | 4e5db3d | 2020-05-07 08:57:47 -0700 | [diff] [blame] | 192 | const size_t iy = oy * subsampling() + ky - padding_top(); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 193 | for (size_t kx = 0; kx < kernel_width(); kx++) { |
| 194 | const size_t ix = ox * subsampling() + kx - padding_left(); |
Marat Dukhan | ae7e8b2 | 2020-10-20 17:51:51 -0700 | [diff] [blame] | 195 | if (ix < input_width() && iy < input_height()) { |
| 196 | const float input_val = input[iy * input_width() + ix]; |
| 197 | const float kernel_val = packed_weights[1 + ky * kernel_width() + kx]; |
Erich Elsen | 4e5db3d | 2020-05-07 08:57:47 -0700 | [diff] [blame] | 198 | acc += input_val * kernel_val; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 199 | } |
| 200 | } |
| 201 | } |
| 202 | output_ref[oy * output_width() + ox] = acc; |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | // Compute clamping parameters. |
| 207 | const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); |
| 208 | const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); |
| 209 | const float accumulated_range = accumulated_max - accumulated_min; |
| 210 | const float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin()); |
| 211 | const float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); |
| 212 | |
Frank Barchard | 9f3a843 | 2020-06-02 13:59:35 -0700 | [diff] [blame] | 213 | // Prepare parameters. |
Marat Dukhan | 1f29b80 | 2020-05-15 23:46:39 -0700 | [diff] [blame] | 214 | xnn_f32_chw_params chw_params = { }; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 215 | switch (variant) { |
| 216 | case Variant::Native: |
Marat Dukhan | 1f29b80 | 2020-05-15 23:46:39 -0700 | [diff] [blame] | 217 | chw_params = xnn_init_f32_chw_params(input_width(), output_min, output_max); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 218 | break; |
| 219 | case Variant::Scalar: |
Marat Dukhan | 1f29b80 | 2020-05-15 23:46:39 -0700 | [diff] [blame] | 220 | chw_params = xnn_init_scalar_f32_chw_params(input_width(), output_min, output_max); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 221 | break; |
| 222 | } |
| 223 | |
| 224 | // Clamp reference results. |
| 225 | for (float& output_val : output_ref) { |
| 226 | output_val = std::max(std::min(output_val, output_max), output_min); |
| 227 | } |
| 228 | |
| 229 | // Call optimized micro-kernel. |
| 230 | dwconv( |
Marat Dukhan | 7515777 | 2020-10-21 01:46:28 -0700 | [diff] [blame] | 231 | input_height(), input_width() * sizeof(float), |
Erich Elsen | 4e5db3d | 2020-05-07 08:57:47 -0700 | [diff] [blame] | 232 | input.data(), packed_weights.data(), zero.data(), output.data(), |
| 233 | padding_top(), |
Marat Dukhan | 1f29b80 | 2020-05-15 23:46:39 -0700 | [diff] [blame] | 234 | &chw_params); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 235 | |
| 236 | // Verify results. |
| 237 | for (size_t y = 0; y < output_height(); y++) { |
| 238 | for (size_t x = 0; x < output_width(); x++) { |
| 239 | ASSERT_NEAR( |
| 240 | output_ref[y * output_width() + x], |
Marat Dukhan | ae7e8b2 | 2020-10-20 17:51:51 -0700 | [diff] [blame] | 241 | output[y * output_width() + x], |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 242 | std::abs(output_ref[y * output_width() + x]) * 1.0e-5) |
| 243 | << "x = " << x << ", y = " << y; |
| 244 | } |
| 245 | } |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 246 | } |
| 247 | } |
| 248 | |
| 249 | private: |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 250 | uint32_t padding_left_{0}; |
| 251 | uint32_t padding_right_{0}; |
Erich Elsen | 4e5db3d | 2020-05-07 08:57:47 -0700 | [diff] [blame] | 252 | uint32_t padding_top_{0}; |
| 253 | uint32_t padding_bottom_{0}; |
Marat Dukhan | dc6c77f | 2020-10-23 19:09:10 -0700 | [diff] [blame] | 254 | uint32_t input_height_{1}; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 255 | uint32_t input_width_{1}; |
| 256 | uint32_t subsampling_{1}; |
| 257 | uint32_t kernel_height_{1}; |
| 258 | uint32_t kernel_width_{1}; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 259 | uint8_t qmin_{0}; |
| 260 | uint8_t qmax_{255}; |
| 261 | size_t iterations_{1}; |
| 262 | }; |