Marat Dukhan | a212eac | 2021-08-02 09:58:04 -0700 | [diff] [blame] | 1 | // Copyright 2021 Google LLC |
| 2 | // |
| 3 | // This source code is licensed under the BSD-style license found in the |
| 4 | // LICENSE file in the root directory of this source tree. |
| 5 | |
| 6 | #pragma once |
| 7 | |
| 8 | #include <gtest/gtest.h> |
| 9 | |
| 10 | #include <algorithm> |
| 11 | #include <cassert> |
| 12 | #include <cstddef> |
| 13 | #include <cstdlib> |
| 14 | #include <functional> |
| 15 | #include <limits> |
| 16 | #include <random> |
| 17 | #include <vector> |
| 18 | |
| 19 | #include <xnnpack.h> |
| 20 | #include <xnnpack/params-init.h> |
| 21 | #include <xnnpack/params.h> |
| 22 | #include <xnnpack/requantization.h> |
| 23 | |
| 24 | |
| 25 | class VMulCMicrokernelTester { |
| 26 | public: |
| 27 | inline VMulCMicrokernelTester& batch_size(size_t batch_size) { |
| 28 | assert(batch_size != 0); |
| 29 | this->batch_size_ = batch_size; |
| 30 | return *this; |
| 31 | } |
| 32 | |
| 33 | inline size_t batch_size() const { |
| 34 | return this->batch_size_; |
| 35 | } |
| 36 | |
| 37 | inline VMulCMicrokernelTester& inplace(bool inplace) { |
| 38 | this->inplace_ = inplace; |
| 39 | return *this; |
| 40 | } |
| 41 | |
| 42 | inline bool inplace() const { |
| 43 | return this->inplace_; |
| 44 | } |
| 45 | |
| 46 | inline VMulCMicrokernelTester& a_scale(float a_scale) { |
| 47 | assert(a_scale > 0.0f); |
| 48 | assert(std::isnormal(a_scale)); |
| 49 | this->a_scale_ = a_scale; |
| 50 | return *this; |
| 51 | } |
| 52 | |
| 53 | inline float a_scale() const { |
| 54 | return this->a_scale_; |
| 55 | } |
| 56 | |
| 57 | inline VMulCMicrokernelTester& a_zero_point(uint8_t a_zero_point) { |
| 58 | this->a_zero_point_ = a_zero_point; |
| 59 | return *this; |
| 60 | } |
| 61 | |
| 62 | inline uint8_t a_zero_point() const { |
| 63 | return this->a_zero_point_; |
| 64 | } |
| 65 | |
| 66 | inline VMulCMicrokernelTester& b_scale(float b_scale) { |
| 67 | assert(b_scale > 0.0f); |
| 68 | assert(std::isnormal(b_scale)); |
| 69 | this->b_scale_ = b_scale; |
| 70 | return *this; |
| 71 | } |
| 72 | |
| 73 | inline float b_scale() const { |
| 74 | return this->b_scale_; |
| 75 | } |
| 76 | |
| 77 | inline VMulCMicrokernelTester& b_zero_point(uint8_t b_zero_point) { |
| 78 | this->b_zero_point_ = b_zero_point; |
| 79 | return *this; |
| 80 | } |
| 81 | |
| 82 | inline uint8_t b_zero_point() const { |
| 83 | return this->b_zero_point_; |
| 84 | } |
| 85 | |
| 86 | inline VMulCMicrokernelTester& y_scale(float y_scale) { |
| 87 | assert(y_scale > 0.0f); |
| 88 | assert(std::isnormal(y_scale)); |
| 89 | this->y_scale_ = y_scale; |
| 90 | return *this; |
| 91 | } |
| 92 | |
| 93 | inline float y_scale() const { |
| 94 | return this->y_scale_; |
| 95 | } |
| 96 | |
| 97 | inline VMulCMicrokernelTester& y_zero_point(uint8_t y_zero_point) { |
| 98 | this->y_zero_point_ = y_zero_point; |
| 99 | return *this; |
| 100 | } |
| 101 | |
| 102 | inline uint8_t y_zero_point() const { |
| 103 | return this->y_zero_point_; |
| 104 | } |
| 105 | |
| 106 | inline VMulCMicrokernelTester& qmin(uint8_t qmin) { |
| 107 | this->qmin_ = qmin; |
| 108 | return *this; |
| 109 | } |
| 110 | |
| 111 | inline uint8_t qmin() const { |
| 112 | return this->qmin_; |
| 113 | } |
| 114 | |
| 115 | inline VMulCMicrokernelTester& qmax(uint8_t qmax) { |
| 116 | this->qmax_ = qmax; |
| 117 | return *this; |
| 118 | } |
| 119 | |
| 120 | inline uint8_t qmax() const { |
| 121 | return this->qmax_; |
| 122 | } |
| 123 | |
| 124 | inline VMulCMicrokernelTester& iterations(size_t iterations) { |
| 125 | this->iterations_ = iterations; |
| 126 | return *this; |
| 127 | } |
| 128 | |
| 129 | inline size_t iterations() const { |
| 130 | return this->iterations_; |
| 131 | } |
| 132 | |
| 133 | void Test( |
| 134 | xnn_qu8_vmul_minmax_ukernel_function vmul_minmax, |
| 135 | xnn_init_qu8_mul_minmax_params_fn init_params, |
Marat Dukhan | a212eac | 2021-08-02 09:58:04 -0700 | [diff] [blame] | 136 | xnn_qu8_requantize_fn requantize) const |
| 137 | { |
| 138 | std::random_device random_device; |
| 139 | auto rng = std::mt19937(random_device()); |
| 140 | auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng); |
| 141 | |
| 142 | std::vector<uint8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t)); |
| 143 | std::vector<uint8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0)); |
| 144 | std::vector<float> y_fp(batch_size()); |
| 145 | std::vector<uint8_t> y_ref(batch_size()); |
| 146 | for (size_t iteration = 0; iteration < iterations(); iteration++) { |
| 147 | std::generate(a.begin(), a.end(), std::ref(u8rng)); |
| 148 | const uint8_t b = u8rng(); |
| 149 | if (inplace()) { |
| 150 | std::generate(y.begin(), y.end(), std::ref(u8rng)); |
| 151 | } else { |
| 152 | std::fill(y.begin(), y.end(), 0xA5); |
| 153 | } |
| 154 | const uint8_t* a_data = inplace() ? y.data() : a.data(); |
| 155 | |
| 156 | // Prepare parameters. |
| 157 | const float product_scale = a_scale() * b_scale(); |
| 158 | const float product_output_scale = product_scale / y_scale(); |
| 159 | xnn_qu8_mul_minmax_params quantization_params; |
| 160 | init_params( |
| 161 | &quantization_params, |
| 162 | a_zero_point(), b_zero_point(), y_zero_point(), |
| 163 | product_output_scale, qmin(), qmax()); |
Marat Dukhan | a212eac | 2021-08-02 09:58:04 -0700 | [diff] [blame] | 164 | |
| 165 | // Compute reference results. |
| 166 | for (size_t i = 0; i < batch_size(); i++) { |
| 167 | const int32_t acc = |
| 168 | (int32_t(a_data[i]) - int32_t(a_zero_point())) * (int32_t(b) - int32_t(b_zero_point())); |
| 169 | y_fp[i] = float(y_zero_point()) + product_output_scale * float(acc); |
| 170 | y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax()))); |
| 171 | y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin()))); |
Marat Dukhan | 50323b8 | 2022-01-11 00:12:01 -0800 | [diff] [blame] | 172 | y_ref[i] = requantize( |
| 173 | acc, product_output_scale, y_zero_point(), qmin(), qmax()); |
Marat Dukhan | a212eac | 2021-08-02 09:58:04 -0700 | [diff] [blame] | 174 | } |
| 175 | |
| 176 | // Call optimized micro-kernel. |
| 177 | vmul_minmax(batch_size(), a_data, &b, y.data(), &quantization_params); |
| 178 | |
| 179 | // Verify results. |
| 180 | for (size_t i = 0; i < batch_size(); i++) { |
| 181 | ASSERT_LE(uint32_t(y[i]), uint32_t(qmax())) |
| 182 | << "at element " << i << " / " << batch_size(); |
| 183 | ASSERT_GE(uint32_t(y[i]), uint32_t(qmin())) |
| 184 | << "at element " << i << " / " << batch_size(); |
| 185 | ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f) |
| 186 | << "at element " << i << " / " << batch_size(); |
| 187 | ASSERT_EQ(uint32_t(y[i]), uint32_t(y_ref[i])) |
| 188 | << "at element " << i << " / " << batch_size(); |
| 189 | } |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | void Test( |
| 194 | xnn_qs8_vmul_minmax_ukernel_function vmul_minmax, |
| 195 | xnn_init_qs8_mul_minmax_params_fn init_params, |
Marat Dukhan | a212eac | 2021-08-02 09:58:04 -0700 | [diff] [blame] | 196 | xnn_qs8_requantize_fn requantize) const |
| 197 | { |
| 198 | std::random_device random_device; |
| 199 | auto rng = std::mt19937(random_device()); |
| 200 | auto i8rng = std::bind( |
| 201 | std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), |
| 202 | rng); |
| 203 | |
| 204 | std::vector<int8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t)); |
| 205 | std::vector<int8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0)); |
| 206 | std::vector<float> y_fp(batch_size()); |
| 207 | std::vector<int8_t> y_ref(batch_size()); |
| 208 | for (size_t iteration = 0; iteration < iterations(); iteration++) { |
| 209 | std::generate(a.begin(), a.end(), std::ref(i8rng)); |
| 210 | const int8_t b = i8rng(); |
| 211 | if (inplace()) { |
| 212 | std::generate(y.begin(), y.end(), std::ref(i8rng)); |
| 213 | } else { |
| 214 | std::fill(y.begin(), y.end(), 0xA5); |
| 215 | } |
| 216 | const int8_t* a_data = inplace() ? y.data() : a.data(); |
| 217 | |
| 218 | // Prepare parameters. |
| 219 | const float product_scale = a_scale() * b_scale(); |
| 220 | const float product_output_scale = product_scale / y_scale(); |
| 221 | EXPECT_GE(product_output_scale, 0x1.0p-32f); |
| 222 | xnn_qs8_mul_minmax_params quantization_params; |
| 223 | init_params( |
| 224 | &quantization_params, |
| 225 | int8_t(a_zero_point() - 0x80), int8_t(b_zero_point() - 0x80), int8_t(y_zero_point() - 0x80), |
| 226 | product_output_scale, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); |
Marat Dukhan | a212eac | 2021-08-02 09:58:04 -0700 | [diff] [blame] | 227 | |
| 228 | // Compute reference results. |
| 229 | for (size_t i = 0; i < batch_size(); i++) { |
| 230 | const int32_t acc = |
| 231 | (int32_t(a_data[i]) - int32_t(a_zero_point() - 0x80)) * (int32_t(b) - int32_t(b_zero_point() - 0x80)); |
| 232 | y_fp[i] = float(y_zero_point() - 0x80) + product_output_scale * float(acc); |
| 233 | y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax() - 0x80))); |
| 234 | y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin() - 0x80))); |
Marat Dukhan | 50323b8 | 2022-01-11 00:12:01 -0800 | [diff] [blame] | 235 | y_ref[i] = requantize( |
| 236 | acc, product_output_scale, int8_t(y_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); |
Marat Dukhan | a212eac | 2021-08-02 09:58:04 -0700 | [diff] [blame] | 237 | } |
| 238 | |
| 239 | // Call optimized micro-kernel. |
| 240 | vmul_minmax(batch_size(), a_data, &b, y.data(), &quantization_params); |
| 241 | |
| 242 | // Verify results. |
| 243 | for (size_t i = 0; i < batch_size(); i++) { |
| 244 | ASSERT_LE(int32_t(y[i]), int32_t(qmax() - 0x80)) |
| 245 | << "at element " << i << " / " << batch_size(); |
| 246 | ASSERT_GE(int32_t(y[i]), int32_t(qmin() - 0x80)) |
| 247 | << "at element " << i << " / " << batch_size(); |
| 248 | ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i])) |
| 249 | << "at element " << i << " / " << batch_size(); |
| 250 | ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f) |
| 251 | << "at element " << i << " / " << batch_size(); |
| 252 | } |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | private: |
| 257 | size_t batch_size_{1}; |
| 258 | bool inplace_{false}; |
| 259 | float a_scale_{0.75f}; |
| 260 | float b_scale_{1.25f}; |
| 261 | float y_scale_{0.96875f}; |
| 262 | uint8_t a_zero_point_{121}; |
| 263 | uint8_t b_zero_point_{127}; |
| 264 | uint8_t y_zero_point_{133}; |
| 265 | uint8_t qmin_{0}; |
| 266 | uint8_t qmax_{255}; |
| 267 | size_t iterations_{15}; |
| 268 | }; |