blob: 7bba98a0c64cdb8d748e8bbc47d0744de5115afb [file] [log] [blame]
Anthony Barbier8140e1e2017-12-14 23:48:46 +00001/*
Jenkins36ccc902020-02-21 11:10:48 +00002 * Copyright (c) 2017-2020 ARM Limited.
Anthony Barbier8140e1e2017-12-14 23:48:46 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "DepthwiseConvolutionLayer.h"
25
26#include "ConvolutionLayer.h"
27#include "Utils.h"
28
Anthony Barbier8140e1e2017-12-14 23:48:46 +000029#include "tests/validation/Helpers.h"
30#include "tests/validation/reference/Utils.h"
31#include "tests/validation/reference/UtilsQuantizedAsymm.h"
32
33#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
34
35namespace arm_compute
36{
37namespace test
38{
39namespace validation
40{
41namespace reference
42{
Jenkins0e205f72019-11-28 16:53:35 +000043namespace
44{
45/** Perform a depthwise convolution for floating-point types
Anthony Barbier8140e1e2017-12-14 23:48:46 +000046 *
47 * - Three dimensions tensors
48 * - Third dimention is number of channels
49 * - Depths of input tensor and filter are equals
50 * - Padding, stride and output shape "match"
51 *
52 */
Jenkins0e205f72019-11-28 16:53:35 +000053template <typename T>
54SimpleTensor<T> depthwise_convolution_fp(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
55 unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
Anthony Barbier8140e1e2017-12-14 23:48:46 +000056{
Jenkins4ba87db2019-05-23 17:11:51 +010057 ARM_COMPUTE_UNUSED(out_quant_info);
58
Jenkins52ba29e2018-08-29 15:32:11 +000059 SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
Anthony Barbier8140e1e2017-12-14 23:48:46 +000060
61 // Compute reference
62 const int filter_width = weights.shape().x();
63 const int filter_height = weights.shape().y();
64 const int filter_plane = filter_width * filter_height;
65 const int input_width = src.shape().x();
66 const int input_height = src.shape().y();
67 const int input_depth = src.shape().z();
68 const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth);
69
Jenkins36ccc902020-02-21 11:10:48 +000070 const int pad_left = conv_info.pad_left();
71 const int pad_top = conv_info.pad_top();
Anthony Barbier8140e1e2017-12-14 23:48:46 +000072
Jenkins4ba87db2019-05-23 17:11:51 +010073 const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
74 const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
75
76 const int patch_half_width_floor = patch_width / 2;
77 const int patch_half_height_floor = patch_height / 2;
78
79 const auto patch_half_width_ceil = static_cast<int>(std::ceil(patch_width / 2));
80 const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
81
82 const int minimum_x = -pad_left + patch_half_width_floor;
83 const int minimum_y = -pad_top + patch_half_height_floor;
Jenkins36ccc902020-02-21 11:10:48 +000084 const int maximum_x = (conv_info.stride().first * (dst_shape[0] - 1));
85 const int maximum_y = (conv_info.stride().second * (dst_shape[1] - 1));
Anthony Barbier8140e1e2017-12-14 23:48:46 +000086
Jenkinsb3a371b2018-05-23 11:36:53 +010087 const T border_value(0);
88
Anthony Barbier8140e1e2017-12-14 23:48:46 +000089 int out_pos = 0;
90 for(int r = 0; r < num_batches; ++r)
91 {
92 for(int z = 0; z < input_depth; ++z)
93 {
Jenkinsb3a371b2018-05-23 11:36:53 +010094 for(unsigned int m = 0; m < depth_multiplier; ++m)
Anthony Barbier8140e1e2017-12-14 23:48:46 +000095 {
Jenkinsb3a371b2018-05-23 11:36:53 +010096 const int out_z = z * depth_multiplier + m;
Anthony Barbier8140e1e2017-12-14 23:48:46 +000097
Jenkins4ba87db2019-05-23 17:11:51 +010098 for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
Jenkinsb3a371b2018-05-23 11:36:53 +010099 {
Jenkins4ba87db2019-05-23 17:11:51 +0100100 for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000101 {
Jenkinsb3a371b2018-05-23 11:36:53 +0100102 Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
103 size_t filter_offset = filter_plane * out_z;
104
105 T val(0);
Jenkins4ba87db2019-05-23 17:11:51 +0100106 for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000107 {
Jenkins4ba87db2019-05-23 17:11:51 +0100108 for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
Jenkinsb3a371b2018-05-23 11:36:53 +0100109 {
110 coords.set(0, i);
111 coords.set(1, j);
Jenkinsb3a371b2018-05-23 11:36:53 +0100112 val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
113 ++filter_offset;
114 }
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000115 }
Jenkinsb3a371b2018-05-23 11:36:53 +0100116
Jenkins0e205f72019-11-28 16:53:35 +0000117 dst[out_pos++] = saturate_cast<T>(val + *static_cast<const T *>(biases(Coordinates(out_z))));
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000118 }
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000119 }
120 }
121 }
122 }
123
124 return dst;
125}
126
Jenkins0e205f72019-11-28 16:53:35 +0000127/** Perform a quantized depthwise convolution
128 *
129 * - Three dimensions tensors
130 * - Third dimention is number of channels
131 * - Depths of input tensor and filter are equals
132 * - Padding, stride and output shape "match"
Jenkins36ccc902020-02-21 11:10:48 +0000133 * - QASYMM8/QASYMM8_SIGNED input, output
134 * - QASYMM8/QASYMM8_SIGNED or QSYMM8_PER_CHANNEL filter
Jenkins0e205f72019-11-28 16:53:35 +0000135 *
136 */
137template <typename T, typename TW, typename TB>
138SimpleTensor<T> depthwise_convolution_quantized(const SimpleTensor<T> &src, const SimpleTensor<TW> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
139 const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000140{
Jenkins4ba87db2019-05-23 17:11:51 +0100141 // if no explicit quantization has been set you the same as src
Jenkins975dfe12019-09-02 11:47:54 +0100142 const QuantizationInfo &dst_qinfo = out_quant_info.uniform().empty() ? src.quantization_info() : out_quant_info;
Jenkins0e205f72019-11-28 16:53:35 +0000143 SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, dst_qinfo };
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000144
Jenkinsb3a371b2018-05-23 11:36:53 +0100145 // Create reference
Jenkins975dfe12019-09-02 11:47:54 +0100146 const int input_offset = -src.quantization_info().uniform().offset;
147 const float input_scale = src.quantization_info().uniform().scale;
148 const int weights_offset = -weights.quantization_info().uniform().offset;
Jenkins975dfe12019-09-02 11:47:54 +0100149 const int output_offset = dst_qinfo.uniform().offset;
150 const float output_scale = dst_qinfo.uniform().scale;
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000151
Jenkins0e205f72019-11-28 16:53:35 +0000152 const std::vector<float> weights_scale_vec = weights.quantization_info().scale();
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000153
154 // Compute reference
155 const int filter_width = weights.shape().x();
156 const int filter_height = weights.shape().y();
157 const int filter_plane = filter_width * filter_height;
158 const int input_width = src.shape().x();
159 const int input_height = src.shape().y();
160 const int input_depth = src.shape().z();
161 const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth);
162
Jenkins36ccc902020-02-21 11:10:48 +0000163 const int pad_left = conv_info.pad_left();
164 const int pad_top = conv_info.pad_top();
Anthony Barbier06ea0482018-02-22 15:45:35 +0000165
Jenkins4ba87db2019-05-23 17:11:51 +0100166 const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
167 const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
168
169 const int patch_half_width_floor = patch_width / 2;
170 const int patch_half_height_floor = patch_height / 2;
171
172 const auto patch_half_width_ceil = static_cast<int>(std::ceil(patch_width / 2));
173 const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
174
175 const int minimum_x = -pad_left + patch_half_width_floor;
176 const int minimum_y = -pad_top + patch_half_height_floor;
Jenkins36ccc902020-02-21 11:10:48 +0000177 const int maximum_x = (conv_info.stride().first * (dst_shape[0] - 1));
178 const int maximum_y = (conv_info.stride().second * (dst_shape[1] - 1));
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000179
Jenkins0e205f72019-11-28 16:53:35 +0000180 const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights.data_type());
181
Jenkins36ccc902020-02-21 11:10:48 +0000182 const int min = std::numeric_limits<T>::lowest();
183 const int max = std::numeric_limits<T>::max();
184
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000185 int out_pos = 0;
186 for(int r = 0; r < num_batches; ++r)
187 {
188 for(int z = 0; z < input_depth; ++z)
189 {
Jenkinsb3a371b2018-05-23 11:36:53 +0100190 for(unsigned int m = 0; m < depth_multiplier; ++m)
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000191 {
Jenkinsb3a371b2018-05-23 11:36:53 +0100192 const int out_z = z * depth_multiplier + m;
193 const int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(out_z)));
194
Jenkins0e205f72019-11-28 16:53:35 +0000195 int output_multiplier = 0;
196 int output_shift = 0;
197 const float weights_scale = (is_quantized_per_channel) ? weights_scale_vec[out_z] : weights_scale_vec[0];
198 const float multiplier = input_scale * weights_scale / output_scale;
Jenkins36ccc902020-02-21 11:10:48 +0000199 arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
Jenkins0e205f72019-11-28 16:53:35 +0000200
Jenkins4ba87db2019-05-23 17:11:51 +0100201 for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000202 {
Jenkins4ba87db2019-05-23 17:11:51 +0100203 for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000204 {
Jenkinsb3a371b2018-05-23 11:36:53 +0100205 Coordinates coords(x, y, z, r);
206 int filter_offset = filter_plane * out_z;
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000207
Jenkinsb3a371b2018-05-23 11:36:53 +0100208 int32_t val = 0;
Jenkins4ba87db2019-05-23 17:11:51 +0100209 for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
Jenkinsb3a371b2018-05-23 11:36:53 +0100210 {
Jenkins4ba87db2019-05-23 17:11:51 +0100211 for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
Jenkinsb3a371b2018-05-23 11:36:53 +0100212 {
213 coords.set(0, i);
214 coords.set(1, j);
Jenkins0e205f72019-11-28 16:53:35 +0000215 const auto in_val = tensor_elem_at<T>(src, coords, BorderMode::CONSTANT, -input_offset);
216 const TW w_val = *(weights.data() + filter_offset);
Jenkinsb3a371b2018-05-23 11:36:53 +0100217 val += (in_val + input_offset) * (w_val + weights_offset);
218 ++filter_offset;
219 }
220 }
221 val += bias_val;
Jenkins36ccc902020-02-21 11:10:48 +0000222 // Quantize down
223 val = quantize_down_scale_by_fixedpoint(val, output_multiplier, output_shift, output_offset, min, max);
Jenkinsb3a371b2018-05-23 11:36:53 +0100224
225 // Store the result
226 dst[out_pos++] = val;
227 }
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000228 }
229 }
230 }
231 }
232
233 return dst;
234}
Jenkins0e205f72019-11-28 16:53:35 +0000235} // namespace
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000236
Jenkins0e205f72019-11-28 16:53:35 +0000237template <>
238SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
239 const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
240{
241 return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
242}
Anthony Barbierf45d5a92018-01-24 16:23:15 +0000243
Jenkins0e205f72019-11-28 16:53:35 +0000244template <>
245SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
246 const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
247{
248 return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
249}
250
251template <>
252SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
253 const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
254{
255 return depthwise_convolution_quantized<uint8_t, uint8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
256}
257
258template <>
259SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
260 const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
261{
262 return depthwise_convolution_quantized<uint8_t, int8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
263}
Jenkins36ccc902020-02-21 11:10:48 +0000264
265template <>
266SimpleTensor<int8_t> depthwise_convolution(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
267 const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
268{
269 return depthwise_convolution_quantized<int8_t, int8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
270}
Anthony Barbier8140e1e2017-12-14 23:48:46 +0000271} // namespace reference
272} // namespace validation
273} // namespace test
274} // namespace arm_compute