arm_compute v18.05
diff --git a/tests/validation/reference/BatchNormalizationLayer.cpp b/tests/validation/reference/BatchNormalizationLayer.cpp
index a9d9f03..c8badac 100644
--- a/tests/validation/reference/BatchNormalizationLayer.cpp
+++ b/tests/validation/reference/BatchNormalizationLayer.cpp
@@ -106,7 +106,6 @@
const float numerator = src[pos] - mean[i];
const float x_bar = numerator / denominator;
result[pos] = beta[i] + x_bar * gamma[i];
- ;
}
}
}
diff --git a/tests/validation/reference/ChannelCombine.cpp b/tests/validation/reference/ChannelCombine.cpp
new file mode 100644
index 0000000..c1ec3ec
--- /dev/null
+++ b/tests/validation/reference/ChannelCombine.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ChannelCombine.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+template <typename T>
+inline std::vector<SimpleTensor<T>> create_image_planes(const TensorShape &shape, Format format)
+{
+ TensorShape image_shape = adjust_odd_shape(shape, format);
+
+ std::vector<SimpleTensor<T>> image_planes;
+
+ switch(format)
+ {
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ {
+ image_planes.emplace_back(image_shape, format);
+ break;
+ }
+ case Format::NV12:
+ case Format::NV21:
+ {
+ TensorShape shape_uv88 = calculate_subsampled_shape(image_shape, Format::UV88);
+
+ image_planes.emplace_back(image_shape, Format::U8);
+ image_planes.emplace_back(shape_uv88, Format::UV88);
+ break;
+ }
+ case Format::IYUV:
+ {
+ TensorShape shape_sub2 = calculate_subsampled_shape(image_shape, Format::IYUV);
+
+ image_planes.emplace_back(image_shape, Format::U8);
+ image_planes.emplace_back(shape_sub2, Format::U8);
+ image_planes.emplace_back(shape_sub2, Format::U8);
+ break;
+ }
+ case Format::YUV444:
+ {
+ image_planes.emplace_back(image_shape, Format::U8);
+ image_planes.emplace_back(image_shape, Format::U8);
+ image_planes.emplace_back(image_shape, Format::U8);
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported");
+ break;
+ }
+
+ return image_planes;
+}
+} // namespace
+
+template <typename T>
+std::vector<SimpleTensor<T>> channel_combine(const TensorShape &shape, const std::vector<SimpleTensor<T>> &image_planes, Format format)
+{
+ std::vector<SimpleTensor<T>> dst = create_image_planes<T>(shape, format);
+
+ for(unsigned int plane_idx = 0; plane_idx < dst.size(); ++plane_idx)
+ {
+ SimpleTensor<T> &dst_tensor = dst[plane_idx];
+
+ for(int element_idx = 0; element_idx < dst_tensor.num_elements(); ++element_idx)
+ {
+ Coordinates coord = index2coord(dst_tensor.shape(), element_idx);
+
+ switch(format)
+ {
+ case Format::RGB888:
+ case Format::RGBA8888:
+ {
+ // Copy R/G/B or A channel
+ for(int channel_idx = 0; channel_idx < dst_tensor.num_channels(); ++channel_idx)
+ {
+ const T &src_value = reinterpret_cast<const T *>(image_planes[channel_idx](coord))[0];
+ T &dst_value = reinterpret_cast<T *>(dst_tensor(coord))[channel_idx];
+
+ dst_value = src_value;
+ }
+ break;
+ }
+ case Format::YUYV422:
+ case Format::UYVY422:
+ {
+ // Find coordinates of the sub-sampled pixel
+ const Coordinates coord_hori(coord.x() / 2, coord.y());
+
+ const T &src0 = reinterpret_cast<const T *>(image_planes[0](coord))[0];
+ const T &src1 = reinterpret_cast<const T *>(image_planes[1](coord_hori))[0];
+
+ const int shift = (Format::YUYV422 == format) ? 1 : 0;
+ T &dst0 = reinterpret_cast<T *>(dst_tensor(coord))[1 - shift];
+ T &dst1 = reinterpret_cast<T *>(dst_tensor(coord))[0 + shift];
+
+ dst0 = src0;
+ dst1 = src1;
+
+ Coordinates coord2 = index2coord(dst_tensor.shape(), ++element_idx);
+
+ const T &src2 = reinterpret_cast<const T *>(image_planes[0](coord2))[0];
+ const T &src3 = reinterpret_cast<const T *>(image_planes[2](coord_hori))[0];
+
+ T &dst2 = reinterpret_cast<T *>(dst_tensor(coord2))[1 - shift];
+ T &dst3 = reinterpret_cast<T *>(dst_tensor(coord2))[0 + shift];
+
+ dst2 = src2;
+ dst3 = src3;
+
+ break;
+ }
+ case Format::NV12:
+ case Format::NV21:
+ {
+ if(0U == plane_idx)
+ {
+ // Get and combine Y channel from plane0 of destination multi-image
+ dst_tensor[element_idx] = image_planes[0][element_idx];
+ }
+ else
+ {
+ const int shift = (Format::NV12 == format) ? 0 : 1;
+
+ // Get U channel from plane1 and V channel from plane2 of the source
+ const T &src_u0 = reinterpret_cast<const T *>(image_planes[1](coord))[0];
+ const T &src_v0 = reinterpret_cast<const T *>(image_planes[2](coord))[0];
+
+ // Get U and V channel from plane1 of destination multi-image
+ T &dst_u0 = reinterpret_cast<T *>(dst_tensor(coord))[0 + shift];
+ T &dst_v0 = reinterpret_cast<T *>(dst_tensor(coord))[1 - shift];
+
+ // Combine channel U and V
+ dst_u0 = src_u0;
+ dst_v0 = src_v0;
+ }
+
+ break;
+ }
+ case Format::IYUV:
+ case Format::YUV444:
+ {
+ // Get Y/U/V element
+ const T &src = reinterpret_cast<const T *>(image_planes[plane_idx](coord))[0];
+ T &dst = reinterpret_cast<T *>(dst_tensor(coord))[0];
+
+ // Copy Y/U/V plane
+ dst = src;
+
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported");
+ break;
+ }
+ }
+ }
+
+ return dst;
+}
+
+template std::vector<SimpleTensor<uint8_t>> channel_combine(const TensorShape &shape, const std::vector<SimpleTensor<uint8_t>> &image_planes, Format format);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/ChannelCombine.h b/tests/validation/reference/ChannelCombine.h
new file mode 100644
index 0000000..cc6607d
--- /dev/null
+++ b/tests/validation/reference/ChannelCombine.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CHANNEL_COMBINE_H__
+#define __ARM_COMPUTE_TEST_CHANNEL_COMBINE_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+std::vector<SimpleTensor<T>> channel_combine(const TensorShape &shape, const std::vector<SimpleTensor<T>> &image_planes, Format format);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_CHANNEL_COMBINE_H__ */
diff --git a/tests/validation/reference/ChannelShuffle.cpp b/tests/validation/reference/ChannelShuffle.cpp
new file mode 100644
index 0000000..c4d8d50
--- /dev/null
+++ b/tests/validation/reference/ChannelShuffle.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ChannelShuffle.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+// Refence implementation for channel shuffle taken from https://github.com/pytorch/pytorch/blob/master/caffe2/operators/channel_shuffle_op.h
+template <typename T>
+SimpleTensor<T> channel_shuffle(const SimpleTensor<T> &src, int num_groups)
+{
+ // Create reference
+ SimpleTensor<T> dst{ src.shape(), src.data_type(), src.num_channels(), src.fixed_point_position(), src.quantization_info() };
+
+ const int M = src.shape()[0];
+ const int N = src.shape()[1];
+ const int num_channels = src.shape()[2];
+ const int batches = src.shape()[3];
+ const int MxN = M * N;
+ const int channels_in_group = num_channels / num_groups;
+
+ const T *src_ref = src.data();
+ T *dst_ref = dst.data();
+
+ for(int n = 0; n < batches; ++n)
+ {
+ for(int g = 0; g < num_groups; ++g)
+ {
+ // Gather the group g block (of size channels_in_group * MxN) from output channels
+ // g + 0 * G, g + 1 * G, g + 2 * G, g + G * (K - 1) etc.
+ const T *src_ptr = src_ref + g * channels_in_group * MxN + n * num_channels * MxN;
+ T *dst_ptr = dst_ref + g * MxN + n * num_channels * MxN;
+ for(int i = 0; i < channels_in_group; ++i)
+ {
+ std::copy(src_ptr + i * MxN,
+ src_ptr + (i + 1) * MxN,
+ dst_ptr + i * num_groups * MxN);
+ }
+ }
+ }
+
+ return dst;
+}
+
+template SimpleTensor<uint8_t> channel_shuffle(const SimpleTensor<uint8_t> &src, int num_groups);
+template SimpleTensor<uint16_t> channel_shuffle(const SimpleTensor<uint16_t> &src, int num_groups);
+template SimpleTensor<uint32_t> channel_shuffle(const SimpleTensor<uint32_t> &src, int num_groups);
+template SimpleTensor<half> channel_shuffle(const SimpleTensor<half> &src, int num_groups);
+template SimpleTensor<float> channel_shuffle(const SimpleTensor<float> &src, int num_groups);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/ChannelShuffle.h b/tests/validation/reference/ChannelShuffle.h
new file mode 100644
index 0000000..52df19e
--- /dev/null
+++ b/tests/validation/reference/ChannelShuffle.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CHANNEL_SHUFFLE_H__
+#define __ARM_COMPUTE_TEST_CHANNEL_SHUFFLE_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> channel_shuffle(const SimpleTensor<T> &src, int num_groups);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_CHANNEL_SHUFFLE_H__ */
diff --git a/tests/validation/reference/ConvertFullyConnectedWeights.cpp b/tests/validation/reference/ConvertFullyConnectedWeights.cpp
new file mode 100644
index 0000000..b0f537f
--- /dev/null
+++ b/tests/validation/reference/ConvertFullyConnectedWeights.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ConvertFullyConnectedWeights.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> convert_fully_connected_weights(const SimpleTensor<T> &src, const TensorShape &original_input_shape, const DataLayout training_data_layout)
+{
+ SimpleTensor<T> dst(src.shape(), src.data_type());
+
+ const bool is_nchw_to_nhwc = training_data_layout == DataLayout::NCHW;
+ const unsigned int num_elems_per_input_plane = original_input_shape.x() * original_input_shape.y();
+ const unsigned int num_channels = original_input_shape.z();
+ const unsigned int factor_1 = is_nchw_to_nhwc ? num_elems_per_input_plane : num_channels;
+ const unsigned int factor_2 = is_nchw_to_nhwc ? num_channels : num_elems_per_input_plane;
+
+ for(int i = 0; i < src.num_elements(); ++i)
+ {
+ const Coordinates coords_in = index2coords(src.shape(), i);
+ const Coordinates coords_out(coords_in.x(), coords_in.y() % factor_1 * factor_2 + coords_in.y() / factor_1);
+
+ dst[coords2index(dst.shape(), coords_out)] = src[i];
+ }
+
+ return dst;
+}
+
+template SimpleTensor<uint8_t> convert_fully_connected_weights(const SimpleTensor<uint8_t> &src, const TensorShape &original_input_shape,
+ const DataLayout training_data_layout);
+template SimpleTensor<half> convert_fully_connected_weights(const SimpleTensor<half> &src, const TensorShape &original_input_shape,
+ const DataLayout training_data_layout);
+template SimpleTensor<float> convert_fully_connected_weights(const SimpleTensor<float> &src, const TensorShape &original_input_shape,
+ const DataLayout training_data_layout);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/ConvertFullyConnectedWeights.h b/tests/validation/reference/ConvertFullyConnectedWeights.h
new file mode 100644
index 0000000..a9bbf13
--- /dev/null
+++ b/tests/validation/reference/ConvertFullyConnectedWeights.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CONVERT_FULLY_CONNECTED_WEIGHTS_H__
+#define __ARM_COMPUTE_TEST_CONVERT_FULLY_CONNECTED_WEIGHTS_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> convert_fully_connected_weights(const SimpleTensor<T> &src, const TensorShape &original_input_shape, const DataLayout training_data_layout);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_CONVERT_FULLY_CONNECTED_WEIGHTS_H__ */
diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h
new file mode 100644
index 0000000..7001758
--- /dev/null
+++ b/tests/validation/reference/Convolution3d.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *asymm_int_mult
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, asymm_int_multDAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H__
+#define __ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H__
+
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/UtilsQuantizedAsymm.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace convolution_3d
+{
+namespace detail
+{
+inline bool is_valid_pixel(int i, int min, int max)
+{
+ return (i >= min && i < max);
+}
+
+// 3D convolution for floating point type
+template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 >
+inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
+ int i_offset, int w_offset, int b_offset, int o_offset,
+ int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1)
+{
+ const T *in_ptr = in.data() + i_offset;
+ const T *w_ptr = weights.data() + w_offset;
+ const TB *b_ptr = bias.data() + b_offset;
+ T *out_ptr = out.data() + o_offset;
+
+ const int half_width_weights_start = width_weights / 2;
+ const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
+ const int half_height_weights_start = height_weights / 2;
+ const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
+
+ // Reset accumulator
+ T acc(0);
+
+ // Compute a 2D convolution for each IFM and accumulate the result
+ for(int ifm = 0; ifm < depth_in; ++ifm)
+ {
+ // Compute the offset for the input slice
+ const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
+
+ // Compute 2D convolution
+ for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
+ {
+ for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
+ {
+ // Check if the pixel is out-of-bound
+ if(is_valid_pixel(xi + xk * dilation_x, 0, width_in) && is_valid_pixel(yi + yk * dilation_y, 0, height_in))
+ {
+ const int idx = xk + half_width_weights_start;
+ const int idy = yk + half_height_weights_start;
+
+ const T i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
+ const T w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
+
+ acc += i_value * w_value;
+ }
+ }
+ }
+ }
+
+ // Accumulate the bias and store the result
+ *out_ptr = acc + (*b_ptr);
+}
+
+// 3D convolution for fixed point type
+template < typename T, typename TB, typename std::enable_if < std::is_integral<T>::value &&std::is_integral<TB>::value, int >::type = 0 >
+inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
+ int i_offset, int w_offset, int b_offset, int o_offset,
+ int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1)
+{
+ const T *in_ptr = in.data() + i_offset;
+ const T *w_ptr = weights.data() + w_offset;
+ const T *b_ptr = bias.data() + b_offset;
+ T *out_ptr = out.data() + o_offset;
+ int fixed_point_position = in.fixed_point_position();
+
+ const int half_width_weights_start = width_weights / 2;
+ const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
+ const int half_height_weights_start = height_weights / 2;
+ const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
+
+ using namespace fixed_point_arithmetic;
+ using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
+
+ // Reset accumulator
+ fixed_point<promoted_type> acc(0, fixed_point_position);
+
+ // Compute a 2D convolution for each IFM and accumulate the result
+ for(int ifm = 0; ifm < depth_in; ++ifm)
+ {
+ // Compute the offset for the input slice
+ const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
+
+ // Compute 2D convolution
+ for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
+ {
+ for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
+ {
+ // Check if the pixel is out-of-bound
+ if(is_valid_pixel(xi + xk * dilation_x, 0, width_in) && is_valid_pixel(yi + yk * dilation_y, 0, height_in))
+ {
+ const int idx = xk + half_width_weights_start;
+ const int idy = yk + half_height_weights_start;
+
+ const fixed_point<promoted_type> i_value(in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in], fixed_point_position, true);
+ const fixed_point<promoted_type> w_value(w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
+ const fixed_point<promoted_type> iw = i_value * w_value;
+ acc = iw + acc;
+ }
+ }
+ }
+ }
+
+ // Get the bias
+ const fixed_point<promoted_type> b(*b_ptr, fixed_point_position, true);
+
+ // Accumulate the bias and covert back
+ acc = acc + b;
+ fixed_point<T> res(acc);
+ *out_ptr = res.raw();
+}
+
+// 3D convolution for QASYMM8 type
+template <>
+inline void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &out,
+ int i_offset, int w_offset, int b_offset, int o_offset,
+ int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x, int dilation_y)
+{
+ const uint8_t *in_ptr = in.data() + i_offset;
+ const uint8_t *w_ptr = weights.data() + w_offset;
+ const int32_t *b_ptr = bias.data() + b_offset;
+ uint8_t *out_ptr = out.data() + o_offset;
+
+ const int input_offset = -in.quantization_info().offset;
+ const float input_scale = in.quantization_info().scale;
+ const int weights_offset = -weights.quantization_info().offset;
+ const float weights_scale = weights.quantization_info().scale;
+ const int output_offset = out.quantization_info().offset;
+ const float output_scale = out.quantization_info().scale;
+
+ int output_multiplier = 0;
+ int output_shift = 0;
+ const float multiplier = input_scale * weights_scale / output_scale;
+ arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
+ const int half_width_weights_start = width_weights / 2;
+ const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
+ const int half_height_weights_start = height_weights / 2;
+ const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
+
+ // Reset accumulator
+ int32_t acc(0);
+
+ // Compute a 2D convolution for each IFM and accumulate the result
+ for(int ifm = 0; ifm < depth_in; ++ifm)
+ {
+ // Compute the offset for the input slice
+ const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
+
+ // Compute 2D convolution
+ for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
+ {
+ for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
+ {
+ // Check if the pixel is out-of-bound
+ if(is_valid_pixel(xi + xk * dilation_x, 0, width_in) && is_valid_pixel(yi + yk * dilation_y, 0, height_in))
+ {
+ const int idx = xk + half_width_weights_start;
+ const int idy = yk + half_height_weights_start;
+
+ const uint8_t i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
+ const uint8_t w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
+
+ acc += (i_value + input_offset) * (w_value + weights_offset);
+ }
+ }
+ }
+ }
+
+ // Accumulate the bias
+ acc += (*b_ptr);
+
+ acc = validation::asymm_rounding_divide_by_pow2(validation::asymm_int_mult(acc, output_multiplier), output_shift);
+ acc += output_offset;
+ acc = utility::clamp<int32_t>(acc, 0, 255);
+
+ // Store the result
+ *out_ptr = acc;
+}
+} // namespace detail
+} // namespace convolution_3d
+} // namespace test
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H__ */
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
index b7ed2f5..fe558ba 100644
--- a/tests/validation/reference/ConvolutionLayer.cpp
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -25,6 +25,8 @@
#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Convolution3d.h"
+#include "tests/validation/reference/Permute.h"
#include "tests/validation/reference/Utils.h"
#include "tests/validation/reference/UtilsQuantizedAsymm.h"
@@ -42,193 +44,12 @@
{
namespace
{
-inline bool is_valid_pixel(int i, int min, int max)
-{
- return (i >= min && i < max);
-}
-
-// 3D convolution for floating point type
-template < typename T, typename TB, typename std::enable_if < is_floating_point<T>::value &&is_floating_point<TB>::value, int >::type = 0 >
-void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
- int i_offset, int w_offset, int b_offset, int o_offset,
- int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights)
-{
- const T *in_ptr = in.data() + i_offset;
- const T *w_ptr = weights.data() + w_offset;
- const TB *b_ptr = bias.data() + b_offset;
- T *out_ptr = out.data() + o_offset;
-
- const int half_width_weights_start = width_weights / 2;
- const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
- const int half_height_weights_start = height_weights / 2;
- const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
-
- // Reset accumulator
- T acc(0);
-
- // Compute a 2D convolution for each IFM and accumulate the result
- for(int ifm = 0; ifm < depth_in; ++ifm)
- {
- // Compute the offset for the input slice
- const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
-
- // Compute 2D convolution
- for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
- {
- for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
- {
- // Check if the pixel is out-of-bound
- if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
- {
- const int idx = xk + half_width_weights_start;
- const int idy = yk + half_height_weights_start;
-
- const T i_value = in_ptr[offset_slice_in + xk + yk * width_in];
- const T w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
-
- acc += i_value * w_value;
- }
- }
- }
- }
-
- // Accumulate the bias and store the result
- *out_ptr = acc + (*b_ptr);
-}
-
-// 3D convolution for fixed point type
-template < typename T, typename TB, typename std::enable_if < std::is_integral<T>::value &&std::is_integral<TB>::value, int >::type = 0 >
-void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
- int i_offset, int w_offset, int b_offset, int o_offset,
- int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights)
-{
- const T *in_ptr = in.data() + i_offset;
- const T *w_ptr = weights.data() + w_offset;
- const T *b_ptr = bias.data() + b_offset;
- T *out_ptr = out.data() + o_offset;
- int fixed_point_position = in.fixed_point_position();
-
- const int half_width_weights_start = width_weights / 2;
- const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
- const int half_height_weights_start = height_weights / 2;
- const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
-
- using namespace fixed_point_arithmetic;
- using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
-
- // Reset accumulator
- fixed_point<promoted_type> acc(0, fixed_point_position);
-
- // Compute a 2D convolution for each IFM and accumulate the result
- for(int ifm = 0; ifm < depth_in; ++ifm)
- {
- // Compute the offset for the input slice
- const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
-
- // Compute 2D convolution
- for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
- {
- for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
- {
- // Check if the pixel is out-of-bound
- if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
- {
- const int idx = xk + half_width_weights_start;
- const int idy = yk + half_height_weights_start;
-
- const fixed_point<promoted_type> i_value(in_ptr[offset_slice_in + xk + yk * width_in], fixed_point_position, true);
- const fixed_point<promoted_type> w_value(w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
- const fixed_point<promoted_type> iw = i_value * w_value;
- acc = iw + acc;
- }
- }
- }
- }
-
- // Get the bias
- const fixed_point<promoted_type> b(*b_ptr, fixed_point_position, true);
-
- // Accumulate the bias and covert back
- acc = acc + b;
- fixed_point<T> res(acc);
- *out_ptr = res.raw();
-}
-
-// 3D convolution for QASYMM8 type
-template <>
-void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &out,
- int i_offset, int w_offset, int b_offset, int o_offset,
- int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights)
-{
- const uint8_t *in_ptr = in.data() + i_offset;
- const uint8_t *w_ptr = weights.data() + w_offset;
- const int32_t *b_ptr = bias.data() + b_offset;
- uint8_t *out_ptr = out.data() + o_offset;
-
- const int input_offset = -in.quantization_info().offset;
- const float input_scale = in.quantization_info().scale;
- const int weights_offset = -weights.quantization_info().offset;
- const float weights_scale = weights.quantization_info().scale;
- const int output_offset = out.quantization_info().offset;
- const float output_scale = out.quantization_info().scale;
-
- int output_multiplier = 0;
- int output_shift = 0;
- const float multiplier = input_scale * weights_scale / output_scale;
- arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
-
- const int half_width_weights_start = width_weights / 2;
- const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
- const int half_height_weights_start = height_weights / 2;
- const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
-
- // Reset accumulator
- int32_t acc(0);
-
- // Compute a 2D convolution for each IFM and accumulate the result
- for(int ifm = 0; ifm < depth_in; ++ifm)
- {
- // Compute the offset for the input slice
- const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
-
- // Compute 2D convolution
- for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
- {
- for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
- {
- // Check if the pixel is out-of-bound
- if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
- {
- const int idx = xk + half_width_weights_start;
- const int idy = yk + half_height_weights_start;
-
- const uint8_t i_value = in_ptr[offset_slice_in + xk + yk * width_in];
- const uint8_t w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
-
- acc += (i_value + input_offset) * (w_value + weights_offset);
- }
- }
- }
- }
-
- // Accumulate the bias
- acc += (*b_ptr);
-
- acc = asymm_rounding_divide_by_pow2(asymm_int_mult(acc, output_multiplier), output_shift);
- acc += output_offset;
- acc = utility::clamp<int32_t>(acc, 0, 255);
-
- // Store the result
- *out_ptr = acc;
-}
} // namespace
template <typename T, typename TB>
-SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info)
+SimpleTensor<T> convolution_layer_nchw(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const PadStrideInfo &info,
+ const Size2D &dilation)
{
- // Create reference
- SimpleTensor<T> dst{ output_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
-
// Compute reference
const int width_in = src.shape().x();
const int height_in = src.shape().y();
@@ -244,10 +65,10 @@
const int stride_xi = info.stride().first;
const int stride_yi = info.stride().second;
- auto output_wh = scaled_dimensions(width_in, height_in, width_weights, height_weights, info);
+ auto output_wh = scaled_dimensions(width_in, height_in, width_weights, height_weights, info, dilation);
- const int start_xi = width_weights / 2 - pad_left;
- const int start_yi = height_weights / 2 - pad_top;
+ const int start_xi = (dilation.x() * (width_weights - 1) + 1) / 2 - pad_left;
+ const int start_yi = (dilation.y() * (height_weights - 1) + 1) / 2 - pad_top;
const int end_xi = output_wh.first * stride_xi;
const int end_yi = output_wh.second * stride_yi;
const int num_batches = src.shape().total_size() / (width_in * height_in * depth_in);
@@ -270,11 +91,11 @@
ARM_COMPUTE_ASSERT(yo < height_out);
// Compute 3D convolution
- convolution3d(src, weights, bias, dst,
- offset_in, ofm * width_weights * height_weights * depth_weights, ofm, offset_out,
- xi, yi,
- width_in, height_in, depth_in,
- width_weights, height_weights);
+ convolution_3d::detail::convolution3d(src, weights, bias, dst,
+ offset_in, ofm * width_weights * height_weights * depth_weights, ofm, offset_out,
+ xi, yi,
+ width_in, height_in, depth_in,
+ width_weights, height_weights, dilation.x(), dilation.y());
}
}
}
@@ -282,18 +103,38 @@
return dst;
}
+template <typename T, typename TB>
+SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
+ const Size2D &dilation)
+{
+ // Create reference
+ SimpleTensor<T> dst{ output_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
+
+ if(src.data_layout() == DataLayout::NHWC)
+ {
+ SimpleTensor<T> src_nchw = reference::permute<T>(src, PermutationVector(1U, 2U, 0U));
+ SimpleTensor<T> weights_nchw = reference::permute<T>(weights, PermutationVector(1U, 2U, 0U));
+ SimpleTensor<T> dst_nchw = reference::permute<T>(dst, PermutationVector(1U, 2U, 0U));
+
+ return reference::permute<T>(convolution_layer_nchw(src_nchw, weights_nchw, bias, dst_nchw, info, dilation), PermutationVector(2U, 0U, 1U));
+ }
+ else
+ {
+ return convolution_layer_nchw(src, weights, bias, dst, info, dilation);
+ }
+}
template SimpleTensor<float> convolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
- const PadStrideInfo &info);
+ const PadStrideInfo &info, const Size2D &dilation);
template SimpleTensor<half> convolution_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &output_shape,
- const PadStrideInfo &info);
+ const PadStrideInfo &info, const Size2D &dilation);
template SimpleTensor<qint8_t> convolution_layer(const SimpleTensor<qint8_t> &src, const SimpleTensor<qint8_t> &weights, const SimpleTensor<qint8_t> &bias, const TensorShape &output_shape,
- const PadStrideInfo &info);
+ const PadStrideInfo &info, const Size2D &dilation);
template SimpleTensor<qint16_t> convolution_layer(const SimpleTensor<qint16_t> &src, const SimpleTensor<qint16_t> &weights, const SimpleTensor<qint16_t> &bias, const TensorShape &output_shape,
- const PadStrideInfo &info);
+ const PadStrideInfo &info, const Size2D &dilation);
template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
- const PadStrideInfo &info);
+ const PadStrideInfo &info, const Size2D &dilation);
} // namespace reference
} // namespace validation
} // namespace test
-} // namespace arm_compute
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/validation/reference/ConvolutionLayer.h b/tests/validation/reference/ConvolutionLayer.h
index 57455ba..ff3b153 100644
--- a/tests/validation/reference/ConvolutionLayer.h
+++ b/tests/validation/reference/ConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,7 +36,8 @@
namespace reference
{
template <typename T, typename TB>
-SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info);
+SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
+ const Size2D &dilation = Size2D(1U, 1U));
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Copy.cpp b/tests/validation/reference/Copy.cpp
new file mode 100644
index 0000000..dc519a4
--- /dev/null
+++ b/tests/validation/reference/Copy.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Copy.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> copy(const SimpleTensor<T> &src, const TensorShape &output_shape)
+{
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(src.shape(), output_shape);
+
+ SimpleTensor<T> dst(output_shape, src.data_type());
+ std::copy_n(src.data(), src.num_elements(), dst.data());
+ return dst;
+}
+
+template SimpleTensor<uint8_t> copy(const SimpleTensor<uint8_t> &src, const TensorShape &output_shape);
+template SimpleTensor<int8_t> copy(const SimpleTensor<int8_t> &src, const TensorShape &output_shape);
+template SimpleTensor<uint16_t> copy(const SimpleTensor<uint16_t> &src, const TensorShape &output_shape);
+template SimpleTensor<int16_t> copy(const SimpleTensor<int16_t> &src, const TensorShape &output_shape);
+template SimpleTensor<uint32_t> copy(const SimpleTensor<uint32_t> &src, const TensorShape &output_shape);
+template SimpleTensor<int32_t> copy(const SimpleTensor<int32_t> &src, const TensorShape &output_shape);
+template SimpleTensor<half> copy(const SimpleTensor<half> &src, const TensorShape &output_shape);
+template SimpleTensor<float> copy(const SimpleTensor<float> &src, const TensorShape &output_shape);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Copy.h b/tests/validation/reference/Copy.h
new file mode 100644
index 0000000..362af03
--- /dev/null
+++ b/tests/validation/reference/Copy.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_COPY_H__
+#define __ARM_COMPUTE_TEST_COPY_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> copy(const SimpleTensor<T> &src, const TensorShape &output_shape);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_COPY_H__ */
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index b2a7067..10c617e 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -50,9 +50,9 @@
*
*/
template <typename T, typename TB>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier)
{
- // Create reference
SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() };
// Compute reference
@@ -77,33 +77,39 @@
const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
+ const T border_value(0);
+
int out_pos = 0;
for(int r = 0; r < num_batches; ++r)
{
for(int z = 0; z < input_depth; ++z)
{
- for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+ for(unsigned int m = 0; m < depth_multiplier; ++m)
{
- for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
- {
- Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
- size_t filter_offset = filter_plane * z;
+ const int out_z = z * depth_multiplier + m;
- T val(0);
- for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
+ for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+ {
+ for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
{
- for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
+ Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
+ size_t filter_offset = filter_plane * out_z;
+
+ T val(0);
+ for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
{
- coords.set(0, i);
- coords.set(1, j);
- T border_value(0);
- val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
- ++filter_offset;
+ for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
+ {
+ coords.set(0, i);
+ coords.set(1, j);
+
+ val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
+ ++filter_offset;
+ }
}
+
+ dst[out_pos++] = saturate_cast<T>(val + *static_cast<const TB *>(biases(Coordinates(out_z))));
}
- coords.set(0, x);
- coords.set(1, y);
- dst[out_pos++] = saturate_cast<T>(val + *static_cast<const TB *>(biases(Coordinates(z))));
}
}
}
@@ -114,11 +120,11 @@
template <>
SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
- const PadStrideInfo &conv_info)
+ const PadStrideInfo &conv_info, unsigned int depth_multiplier)
{
- // Create reference
SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
+ // Create reference
const int input_offset = -src.quantization_info().offset;
const float input_scale = src.quantization_info().scale;
const int weights_offset = -weights.quantization_info().offset;
@@ -158,35 +164,40 @@
{
for(int z = 0; z < input_depth; ++z)
{
- int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(z)));
- for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+ for(unsigned int m = 0; m < depth_multiplier; ++m)
{
- for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
+ const int out_z = z * depth_multiplier + m;
+ const int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(out_z)));
+
+ for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
{
- Coordinates coords(x, y, z, r);
- int filter_offset = filter_plane * z;
-
- int32_t val = 0;
- for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j)
+ for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
{
- for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i)
- {
- coords.set(0, i);
- coords.set(1, j);
- auto in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, -input_offset);
- uint8_t w_val = *(weights.data() + filter_offset);
- val += (in_val + input_offset) * (w_val + weights_offset);
- ++filter_offset;
- }
- }
- val += bias_val;
- val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
- val += output_offset;
- val = std::max<int32_t>(val, 0);
- val = std::min<int32_t>(val, 255);
+ Coordinates coords(x, y, z, r);
+ int filter_offset = filter_plane * out_z;
- // Store the result
- dst[out_pos++] = val;
+ int32_t val = 0;
+ for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j)
+ {
+ for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i)
+ {
+ coords.set(0, i);
+ coords.set(1, j);
+ const auto in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, -input_offset);
+ const uint8_t w_val = *(weights.data() + filter_offset);
+ val += (in_val + input_offset) * (w_val + weights_offset);
+ ++filter_offset;
+ }
+ }
+ val += bias_val;
+ val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
+ val += output_offset;
+ val = std::max<int32_t>(val, 0);
+ val = std::min<int32_t>(val, 255);
+
+ // Store the result
+ dst[out_pos++] = val;
+ }
}
}
}
@@ -196,10 +207,10 @@
}
template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
- const PadStrideInfo &conv_info);
+ const PadStrideInfo &conv_info, unsigned int depth_multiplier);
template SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
- const PadStrideInfo &conv_info);
+ const PadStrideInfo &conv_info, unsigned int depth_multiplier);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.h b/tests/validation/reference/DepthwiseConvolutionLayer.h
index df743a5..bab3387 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.h
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,7 +36,8 @@
namespace reference
{
template <typename T, typename TB>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info);
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp
index ca6c168..8bc6ddb 100644
--- a/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,7 @@
const SimpleTensor<T> &pointwise_biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info)
{
// Compute reference
- SimpleTensor<T> depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_biases, depthwise_out_shape, depthwise_conv_info);
+ SimpleTensor<T> depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_biases, depthwise_out_shape, depthwise_conv_info, 1);
SimpleTensor<T> dst = convolution_layer(depthwise_out, pointwise_weights, pointwise_biases, dst_shape, pointwise_conv_info);
return dst;
diff --git a/tests/validation/reference/FlattenLayer.cpp b/tests/validation/reference/FlattenLayer.cpp
index 611701d..44f4d93 100644
--- a/tests/validation/reference/FlattenLayer.cpp
+++ b/tests/validation/reference/FlattenLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,12 +34,8 @@
namespace reference
{
template <typename T>
-SimpleTensor<T> flatten_layer(const SimpleTensor<T> &src)
+SimpleTensor<T> flatten_layer(const SimpleTensor<T> &src, const TensorShape &shape_flatten)
{
- TensorShape shape_flatten(src.shape());
- shape_flatten.set(0, src.shape()[0] * src.shape()[1] * src.shape()[2]);
- shape_flatten.remove_dimension(1);
- shape_flatten.remove_dimension(1);
SimpleTensor<T> dst(shape_flatten, src.data_type(), 1, src.fixed_point_position());
// Note: Since the reference implementation does not use padding bytes, we can copy directly the content of the source tensor
@@ -48,10 +44,10 @@
return dst;
}
-template SimpleTensor<float> flatten_layer(const SimpleTensor<float> &src);
-template SimpleTensor<half> flatten_layer(const SimpleTensor<half> &src);
-template SimpleTensor<qint8_t> flatten_layer(const SimpleTensor<qint8_t> &src);
-template SimpleTensor<qint16_t> flatten_layer(const SimpleTensor<qint16_t> &src);
+template SimpleTensor<float> flatten_layer(const SimpleTensor<float> &src, const TensorShape &shape_flatten);
+template SimpleTensor<half> flatten_layer(const SimpleTensor<half> &src, const TensorShape &shape_flatten);
+template SimpleTensor<qint8_t> flatten_layer(const SimpleTensor<qint8_t> &src, const TensorShape &shape_flatten);
+template SimpleTensor<qint16_t> flatten_layer(const SimpleTensor<qint16_t> &src, const TensorShape &shape_flatten);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/FlattenLayer.h b/tests/validation/reference/FlattenLayer.h
index b1286fe..5ccd429 100644
--- a/tests/validation/reference/FlattenLayer.h
+++ b/tests/validation/reference/FlattenLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,7 +36,7 @@
namespace reference
{
template <typename T>
-SimpleTensor<T> flatten_layer(const SimpleTensor<T> &src);
+SimpleTensor<T> flatten_layer(const SimpleTensor<T> &src, const TensorShape &shape_flatten);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/GEMM.cpp b/tests/validation/reference/GEMM.cpp
index 77d025e..f9dcfcb 100644
--- a/tests/validation/reference/GEMM.cpp
+++ b/tests/validation/reference/GEMM.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,23 +41,44 @@
SimpleTensor<T> dst{ c.shape(), c.data_type(), 1, c.fixed_point_position() };
// Compute reference
- const int M = dst.shape().y();
- const int N = dst.shape().x();
+ const int M = a.shape().y();
+ const int N = b.shape().x();
const int K = a.shape().x();
+ const int D = a.shape().z(); // Number of matrices in a batch
+ const int W = a.shape()[3]; // Number of batched-gemm (Winograd case)
- for(int row = 0; row < M; ++row)
+ const int a_stride_z = K * M;
+ const int a_stride_w = K * M * D;
+
+ const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
+ const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+
+ const int c_stride_z = N * M;
+ const int c_stride_w = N * M * D;
+
+ for(int w = 0; w < W; ++w)
{
- for(int col = 0; col < N; ++col)
+ for(int depth = 0; depth < D; ++depth)
{
- T acc(0);
+ const int base_addr_a = depth * a_stride_z + w * a_stride_w;
+ const int base_addr_b = depth * b_stride_z + w * b_stride_w;
+ const int base_addr_c = depth * c_stride_z + w * c_stride_w;
- for(int k = 0; k < K; ++k)
+ for(int row = 0; row < M; ++row)
{
- acc += a[row * K + k] * b[k * N + col];
- }
+ for(int col = 0; col < N; ++col)
+ {
+ T acc(0);
- // Finalize the result: alpha * A * B + beta * C
- dst[col + row * N] = alpha * acc + beta * c[col + row * N];
+ for(int k = 0; k < K; ++k)
+ {
+ acc += a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N];
+ }
+
+ // Finalize the result: alpha * A * B + beta * C
+ dst[base_addr_c + col + row * N] = alpha * acc + beta * c[base_addr_c + col + row * N];
+ }
+ }
}
}
@@ -75,37 +96,58 @@
// Compute reference
using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
- const int M = dst.shape().y();
- const int N = dst.shape().x();
- const int K = a.shape().x();
- const int fixed_point_position = a.fixed_point_position();
+ const int M = dst.shape().y();
+ const int N = dst.shape().x();
+ const int K = a.shape().x();
+ const int D = a.shape().z(); // Number of matrices in a batch
+ const int W = a.shape()[3]; // Number of batched-gemm (Winograd case)
+ const int a_stride_z = K * M;
+ const int a_stride_w = K * M * D;
+
+ const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
+ const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+
+ const int c_stride_z = N * M;
+ const int c_stride_w = N * M * D;
+
+ const int fixed_point_position = a.fixed_point_position();
const fixed_point<T> alpha_q(alpha, fixed_point_position);
const fixed_point<T> beta_q(beta, fixed_point_position);
- for(int row = 0; row < M; ++row)
+ for(int w = 0; w < W; ++w)
{
- for(int col = 0; col < N; ++col)
+ for(int depth = 0; depth < D; ++depth)
{
- fixed_point<promoted_type> acc_q(0, fixed_point_position);
+ const int base_addr_a = depth * a_stride_z + w * a_stride_w;
+ const int base_addr_b = depth * b_stride_z + w * b_stride_w;
+ const int base_addr_c = depth * c_stride_z + w * c_stride_w;
- for(int k = 0; k < K; ++k)
+ for(int row = 0; row < M; ++row)
{
- const fixed_point<promoted_type> a0_q(a[row * K + k], fixed_point_position, true);
- const fixed_point<promoted_type> b0_q(b[k * N + col], fixed_point_position, true);
+ for(int col = 0; col < N; ++col)
+ {
+ fixed_point<promoted_type> acc_q(0, fixed_point_position);
- acc_q = acc_q + (a0_q * b0_q);
+ for(int k = 0; k < K; ++k)
+ {
+ const fixed_point<promoted_type> a0_q(a[base_addr_a + row * K + k], fixed_point_position, true);
+ const fixed_point<promoted_type> b0_q(b[base_addr_b + k * N + col], fixed_point_position, true);
+
+ acc_q = acc_q + (a0_q * b0_q);
+ }
+
+ // Finalize the result: alpha * A * B + beta * C
+ const fixed_point<T> c0_q(c[base_addr_c + col + row * N], fixed_point_position, true);
+
+ fixed_point<T> res_q(acc_q);
+ res_q = alpha_q * res_q;
+ res_q = res_q + (beta_q * c0_q);
+
+ // Store the result
+ dst[base_addr_c + col + row * N] = res_q.raw();
+ }
}
-
- // Finalize the result: alpha * A * B + beta * C
- const fixed_point<T> c0_q(c[col + row * N], fixed_point_position, true);
-
- fixed_point<T> res_q(acc_q);
- res_q = alpha_q * res_q;
- res_q = res_q + (beta_q * c0_q);
-
- // Store the result
- dst[col + row * N] = res_q.raw();
}
}
diff --git a/tests/validation/reference/GaussianPyramidHalf.cpp b/tests/validation/reference/GaussianPyramidHalf.cpp
index 0a68ded..7d5eb07 100644
--- a/tests/validation/reference/GaussianPyramidHalf.cpp
+++ b/tests/validation/reference/GaussianPyramidHalf.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/reference/HOGDescriptor.cpp b/tests/validation/reference/HOGDescriptor.cpp
index 105eb83..ed22695 100644
--- a/tests/validation/reference/HOGDescriptor.cpp
+++ b/tests/validation/reference/HOGDescriptor.cpp
@@ -255,6 +255,8 @@
return desc;
}
+template void hog_orientation_binning(const SimpleTensor<int16_t> &mag, const SimpleTensor<uint8_t> &phase, SimpleTensor<float> &hog_space, const HOGInfo &hog_info);
+template void hog_block_normalization(SimpleTensor<float> &desc, const SimpleTensor<float> &hog_space, const HOGInfo &hog_info);
template SimpleTensor<float> hog_descriptor(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value, const HOGInfo &hog_info);
} // namespace reference
} // namespace validation
diff --git a/tests/validation/reference/HOGDescriptor.h b/tests/validation/reference/HOGDescriptor.h
index e886445..6ea83fe 100644
--- a/tests/validation/reference/HOGDescriptor.h
+++ b/tests/validation/reference/HOGDescriptor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,6 +34,12 @@
{
namespace reference
{
+template <typename T, typename U, typename V>
+void hog_orientation_binning(const SimpleTensor<T> &mag, const SimpleTensor<U> &phase, SimpleTensor<V> &hog_space, const HOGInfo &hog_info);
+
+template <typename T>
+void hog_block_normalization(SimpleTensor<T> &desc, const SimpleTensor<T> &hog_space, const HOGInfo &hog_info);
+
template <typename T, typename U>
SimpleTensor<T> hog_descriptor(const SimpleTensor<U> &src, BorderMode border_mode, U constant_border_value, const HOGInfo &hog_info);
} // namespace reference
diff --git a/tests/validation/reference/HOGDetector.cpp b/tests/validation/reference/HOGDetector.cpp
new file mode 100644
index 0000000..5a5ae37
--- /dev/null
+++ b/tests/validation/reference/HOGDetector.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "HOGDetector.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+/** Computes the number of detection windows to iterate over in the feature vector. */
+Size2D num_detection_windows(const TensorShape &shape, const Size2D &window_step, const HOGInfo &hog_info)
+{
+ const size_t num_block_strides_width = hog_info.detection_window_size().width / hog_info.block_stride().width;
+ const size_t num_block_strides_height = hog_info.detection_window_size().height / hog_info.block_stride().height;
+
+ return Size2D(floor_to_multiple(shape.x() - num_block_strides_width, window_step.width) + window_step.width,
+ floor_to_multiple(shape.y() - num_block_strides_height, window_step.height) + window_step.height);
+}
+} // namespace
+
+template <typename T>
+std::vector<DetectionWindow> hog_detector(const SimpleTensor<T> &src, const std::vector<T> &descriptor, unsigned int max_num_detection_windows,
+ const HOGInfo &hog_info, const Size2D &detection_window_stride, float threshold, uint16_t idx_class)
+{
+ ARM_COMPUTE_ERROR_ON_MSG((detection_window_stride.width % hog_info.block_stride().width != 0),
+ "Detection window stride width must be multiple of block stride width");
+ ARM_COMPUTE_ERROR_ON_MSG((detection_window_stride.height % hog_info.block_stride().height != 0),
+ "Detection window stride height must be multiple of block stride height");
+
+ // Create vector for identifying each detection window
+ std::vector<DetectionWindow> windows;
+
+ // Calculate detection window step
+ const Size2D window_step(detection_window_stride.width / hog_info.block_stride().width,
+ detection_window_stride.height / hog_info.block_stride().height);
+
+ // Calculate number of detection windows
+ const Size2D num_windows = num_detection_windows(src.shape(), window_step, hog_info);
+
+ // Calculate detection window and row offsets in feature vector
+ const size_t src_offset_x = window_step.width * hog_info.num_bins() * hog_info.num_cells_per_block().area();
+ const size_t src_offset_y = window_step.height * hog_info.num_bins() * hog_info.num_cells_per_block().area() * src.shape().x();
+ const size_t src_offset_row = src.num_channels() * src.shape().x();
+
+ // Calculate detection window attributes
+ const Size2D num_block_positions_per_detection_window = hog_info.num_block_positions_per_image(hog_info.detection_window_size());
+ const unsigned int num_bins_per_descriptor_x = num_block_positions_per_detection_window.width * src.num_channels();
+ const unsigned int num_blocks_per_descriptor_y = num_block_positions_per_detection_window.height;
+
+ ARM_COMPUTE_ERROR_ON((num_bins_per_descriptor_x * num_blocks_per_descriptor_y + 1) != hog_info.descriptor_size());
+
+ size_t win_id = 0;
+
+ // Traverse feature vector in detection window steps
+ for(auto win_y = 0u, offset_y = 0u; win_y < num_windows.height; win_y += window_step.height, offset_y += src_offset_y)
+ {
+ for(auto win_x = 0u, offset_x = 0u; win_x < num_windows.width; win_x += window_step.width, offset_x += src_offset_x)
+ {
+ // Reset the score
+ float score = 0.0f;
+
+ // Traverse detection window
+ for(auto y = 0u, offset_row = 0u; y < num_blocks_per_descriptor_y; ++y, offset_row += src_offset_row)
+ {
+ const int bin_offset = y * num_bins_per_descriptor_x;
+
+ for(auto x = 0u; x < num_bins_per_descriptor_x; ++x)
+ {
+ // Compute Linear SVM
+ const float a = src[x + offset_x + offset_y + offset_row];
+ const float b = descriptor[x + bin_offset];
+ score += a * b;
+ }
+ }
+
+ // Add the bias. The bias is located at the position (descriptor_size() - 1)
+ score += descriptor[num_bins_per_descriptor_x * num_blocks_per_descriptor_y];
+
+ if(score > threshold)
+ {
+ DetectionWindow window;
+
+ if(win_id++ < max_num_detection_windows)
+ {
+ window.x = win_x * hog_info.block_stride().width;
+ window.y = win_y * hog_info.block_stride().height;
+ window.width = hog_info.detection_window_size().width;
+ window.height = hog_info.detection_window_size().height;
+ window.idx_class = idx_class;
+ window.score = score;
+
+ windows.push_back(window);
+ }
+ }
+ }
+ }
+
+ return windows;
+}
+
+template std::vector<DetectionWindow> hog_detector(const SimpleTensor<float> &src, const std::vector<float> &descriptor, unsigned int max_num_detection_windows,
+ const HOGInfo &hog_info, const Size2D &detection_window_stride, float threshold, uint16_t idx_class);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/HOGDetector.h b/tests/validation/reference/HOGDetector.h
new file mode 100644
index 0000000..e88acb8
--- /dev/null
+++ b/tests/validation/reference/HOGDetector.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_HOG_DETECTOR_H__
+#define __ARM_COMPUTE_TEST_HOG_DETECTOR_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "tests/SimpleTensor.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+std::vector<DetectionWindow> hog_detector(const SimpleTensor<T> &src, const std::vector<T> &descriptor, unsigned int max_num_detection_windows,
+ const HOGInfo &hog_info, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_HOG_DETECTOR_H__ */
diff --git a/tests/validation/reference/HOGMultiDetection.cpp b/tests/validation/reference/HOGMultiDetection.cpp
new file mode 100644
index 0000000..2f5e439
--- /dev/null
+++ b/tests/validation/reference/HOGMultiDetection.cpp
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "HOGMultiDetection.h"
+
+#include "Derivative.h"
+#include "HOGDescriptor.h"
+#include "HOGDetector.h"
+#include "Magnitude.h"
+#include "Phase.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+void validate_models(const std::vector<HOGInfo> &models)
+{
+ ARM_COMPUTE_ERROR_ON(0 == models.size());
+
+ for(size_t i = 1; i < models.size(); ++i)
+ {
+ ARM_COMPUTE_ERROR_ON_MSG(models[0].phase_type() != models[i].phase_type(),
+ "All HOG parameters must have the same phase type");
+
+ ARM_COMPUTE_ERROR_ON_MSG(models[0].normalization_type() != models[i].normalization_type(),
+ "All HOG parameters must have the same normalization_type");
+
+ ARM_COMPUTE_ERROR_ON_MSG((models[0].l2_hyst_threshold() != models[i].l2_hyst_threshold()) && (models[0].normalization_type() == arm_compute::HOGNormType::L2HYS_NORM),
+ "All HOG parameters must have the same l2 hysteresis threshold if you use L2 hysteresis normalization type");
+ }
+}
+} // namespace
+
+void detection_windows_non_maxima_suppression(std::vector<DetectionWindow> &multi_windows, float min_distance)
+{
+ const size_t num_candidates = multi_windows.size();
+ size_t num_detections = 0;
+
+ // Sort by idx_class first and by score second
+ std::sort(multi_windows.begin(), multi_windows.end(), [](const DetectionWindow & lhs, const DetectionWindow & rhs)
+ {
+ if(lhs.idx_class < rhs.idx_class)
+ {
+ return true;
+ }
+ if(rhs.idx_class < lhs.idx_class)
+ {
+ return false;
+ }
+
+ // idx_classes are equal so compare by score
+ if(lhs.score > rhs.score)
+ {
+ return true;
+ }
+ if(rhs.score > lhs.score)
+ {
+ return false;
+ }
+
+ return false;
+ });
+
+ const float min_distance_pow2 = min_distance * min_distance;
+
+ // Euclidean distance
+ for(size_t i = 0; i < num_candidates; ++i)
+ {
+ if(0.0f != multi_windows.at(i).score)
+ {
+ DetectionWindow cur;
+ cur.x = multi_windows.at(i).x;
+ cur.y = multi_windows.at(i).y;
+ cur.width = multi_windows.at(i).width;
+ cur.height = multi_windows.at(i).height;
+ cur.idx_class = multi_windows.at(i).idx_class;
+ cur.score = multi_windows.at(i).score;
+
+ // Store window
+ multi_windows.at(num_detections) = cur;
+ ++num_detections;
+
+ const float xc = cur.x + cur.width * 0.5f;
+ const float yc = cur.y + cur.height * 0.5f;
+
+ for(size_t k = i + 1; k < (num_candidates) && (cur.idx_class == multi_windows.at(k).idx_class); ++k)
+ {
+ const float xn = multi_windows.at(k).x + multi_windows.at(k).width * 0.5f;
+ const float yn = multi_windows.at(k).y + multi_windows.at(k).height * 0.5f;
+
+ const float dx = std::fabs(xn - xc);
+ const float dy = std::fabs(yn - yc);
+
+ if(dx < min_distance && dy < min_distance)
+ {
+ const float d = dx * dx + dy * dy;
+
+ if(d < min_distance_pow2)
+ {
+ // Invalidate detection window
+ multi_windows.at(k).score = 0.0f;
+ }
+ }
+ }
+ }
+ }
+
+ multi_windows.resize(num_detections);
+}
+
+template <typename T>
+std::vector<DetectionWindow> hog_multi_detection(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value,
+ const std::vector<HOGInfo> &models, std::vector<std::vector<float>> descriptors,
+ unsigned int max_num_detection_windows, float threshold, bool non_maxima_suppression, float min_distance)
+{
+ ARM_COMPUTE_ERROR_ON(descriptors.size() != models.size());
+ validate_models(models);
+
+ const size_t width = src.shape().x();
+ const size_t height = src.shape().y();
+ const size_t num_models = models.size();
+
+ // Initialize previous values
+ size_t prev_num_bins = models[0].num_bins();
+ Size2D prev_cell_size = models[0].cell_size();
+ Size2D prev_block_size = models[0].block_size();
+ Size2D prev_block_stride = models[0].block_stride();
+
+ std::vector<size_t> input_orient_bin;
+ std::vector<size_t> input_hog_detect;
+ std::vector<std::pair<size_t, size_t>> input_block_norm;
+
+ input_orient_bin.push_back(0);
+ input_hog_detect.push_back(0);
+ input_block_norm.emplace_back(0, 0);
+
+ // Iterate through the number of models and check if orientation binning
+ // and block normalization steps can be skipped
+ for(size_t i = 1; i < num_models; ++i)
+ {
+ size_t cur_num_bins = models[i].num_bins();
+ Size2D cur_cell_size = models[i].cell_size();
+ Size2D cur_block_size = models[i].block_size();
+ Size2D cur_block_stride = models[i].block_stride();
+
+ // Check if binning and normalization steps are required
+ if((cur_num_bins != prev_num_bins) || (cur_cell_size.width != prev_cell_size.width) || (cur_cell_size.height != prev_cell_size.height))
+ {
+ prev_num_bins = cur_num_bins;
+ prev_cell_size = cur_cell_size;
+ prev_block_size = cur_block_size;
+ prev_block_stride = cur_block_stride;
+
+ // Compute orientation binning and block normalization. Update input to process
+ input_orient_bin.push_back(i);
+ input_block_norm.emplace_back(i, input_orient_bin.size() - 1);
+ }
+ else if((cur_block_size.width != prev_block_size.width) || (cur_block_size.height != prev_block_size.height) || (cur_block_stride.width != prev_block_stride.width)
+ || (cur_block_stride.height != prev_block_stride.height))
+ {
+ prev_block_size = cur_block_size;
+ prev_block_stride = cur_block_stride;
+
+ // Compute block normalization. Update input to process
+ input_block_norm.emplace_back(i, input_orient_bin.size() - 1);
+ }
+
+ // Update input to process for hog detector
+ input_hog_detect.push_back(input_block_norm.size() - 1);
+ }
+
+ size_t num_orient_bin = input_orient_bin.size();
+ size_t num_block_norm = input_block_norm.size();
+ size_t num_hog_detect = input_hog_detect.size();
+
+ std::vector<SimpleTensor<float>> hog_spaces(num_orient_bin);
+ std::vector<SimpleTensor<float>> hog_norm_spaces(num_block_norm);
+
+ // Calculate derivative
+ SimpleTensor<int16_t> grad_x;
+ SimpleTensor<int16_t> grad_y;
+ std::tie(grad_x, grad_y) = derivative<int16_t>(src, border_mode, constant_border_value, GradientDimension::GRAD_XY);
+
+ // Calculate magnitude and phase
+ SimpleTensor<int16_t> _mag = magnitude(grad_x, grad_y, MagnitudeType::L2NORM);
+ SimpleTensor<uint8_t> _phase = phase(grad_x, grad_y, models[0].phase_type());
+
+ // Calculate Tensors for the HOG space and orientation binning
+ for(size_t i = 0; i < num_orient_bin; ++i)
+ {
+ const size_t idx_multi_hog = input_orient_bin[i];
+
+ const size_t num_bins = models[idx_multi_hog].num_bins();
+ const size_t num_cells_x = width / models[idx_multi_hog].cell_size().width;
+ const size_t num_cells_y = height / models[idx_multi_hog].cell_size().height;
+
+ // TensorShape of hog space
+ TensorShape hog_space_shape(num_cells_x, num_cells_y);
+
+ // Initialise HOG space
+ TensorInfo info_hog_space(hog_space_shape, num_bins, DataType::F32);
+ hog_spaces.at(i) = SimpleTensor<float>(info_hog_space.tensor_shape(), DataType::F32, info_hog_space.num_channels());
+
+ // For each cell create histogram based on magnitude and phase
+ hog_orientation_binning(_mag, _phase, hog_spaces[i], models[idx_multi_hog]);
+ }
+
+ // Calculate Tensors for the normalized HOG space and block normalization
+ for(size_t i = 0; i < num_block_norm; ++i)
+ {
+ const size_t idx_multi_hog = input_block_norm[i].first;
+ const size_t idx_orient_bin = input_block_norm[i].second;
+
+ // Create tensor info for HOG descriptor
+ TensorInfo tensor_info(models[idx_multi_hog], src.shape().x(), src.shape().y());
+ hog_norm_spaces.at(i) = SimpleTensor<float>(tensor_info.tensor_shape(), DataType::F32, tensor_info.num_channels());
+
+ // Normalize histograms based on block size
+ hog_block_normalization(hog_norm_spaces[i], hog_spaces[idx_orient_bin], models[idx_multi_hog]);
+ }
+
+ std::vector<DetectionWindow> multi_windows;
+
+ // Calculate Detection Windows for HOG detector
+ for(size_t i = 0; i < num_hog_detect; ++i)
+ {
+ const size_t idx_block_norm = input_hog_detect[i];
+
+ // NOTE: Detection window stride fixed to block stride
+ const Size2D detection_window_stride = models[i].block_stride();
+
+ std::vector<DetectionWindow> windows = hog_detector(hog_norm_spaces[idx_block_norm], descriptors[i],
+ max_num_detection_windows, models[i], detection_window_stride, threshold, i);
+
+ multi_windows.insert(multi_windows.end(), windows.begin(), windows.end());
+ }
+
+ // Suppress Non-maxima detection windows
+ if(non_maxima_suppression)
+ {
+ detection_windows_non_maxima_suppression(multi_windows, min_distance);
+ }
+
+ return multi_windows;
+}
+
+template std::vector<DetectionWindow> hog_multi_detection(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value,
+ const std::vector<HOGInfo> &models, std::vector<std::vector<float>> descriptors,
+ unsigned int max_num_detection_windows, float threshold, bool non_maxima_suppression, float min_distance);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/HOGMultiDetection.h b/tests/validation/reference/HOGMultiDetection.h
new file mode 100644
index 0000000..6d75bf4
--- /dev/null
+++ b/tests/validation/reference/HOGMultiDetection.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_HOG_MULTI_DETECTION_H__
+#define __ARM_COMPUTE_TEST_HOG_MULTI_DETECTION_H__
+
+#include "arm_compute/core/Types.h"
+#include "tests/SimpleTensor.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+std::vector<DetectionWindow> hog_multi_detection(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value,
+ const std::vector<HOGInfo> &models, std::vector<std::vector<float>> descriptors,
+ unsigned int max_num_detection_windows, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_HOG_MULTI_DETECTION_H__ */
diff --git a/tests/validation/reference/Im2Col.cpp b/tests/validation/reference/Im2Col.cpp
new file mode 100644
index 0000000..5685b60
--- /dev/null
+++ b/tests/validation/reference/Im2Col.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Im2Col.h"
+
+#include "Permute.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+void im2col_nchw(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+{
+ ARM_COMPUTE_ERROR_ON(src.data_layout() != DataLayout::NCHW);
+ // Create reference
+ const int pad_x = conv_info.pad().first;
+ const int pad_y = conv_info.pad().second;
+ const int stride_x = conv_info.stride().first;
+ const int stride_y = conv_info.stride().second;
+ const int kernel_width = kernel_dims.width;
+ const int kernel_height = kernel_dims.height;
+ const int src_width = src.shape().x();
+ const int src_height = src.shape().y();
+ const int src_depth = src.shape().z();
+ const int batches = src.shape().total_size_upper(3);
+ const int pad_val = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0;
+
+ int dst_idx = 0;
+ for(int b = 0; b < batches; ++b)
+ {
+ for(int y = -pad_y; y <= (src_height + pad_y - kernel_height); y += stride_y)
+ {
+ for(int x = -pad_x; x <= (src_width + pad_x - kernel_width); x += stride_x)
+ {
+ for(int z = 0; z < src_depth; ++z)
+ {
+ for(int patch_y = y; patch_y < (y + kernel_height); ++patch_y)
+ {
+ for(int patch_x = x; patch_x < (x + kernel_width); ++patch_x)
+ {
+ dst[dst_idx++] = tensor_elem_at(src, Coordinates(patch_x, patch_y, z, b), BorderMode::CONSTANT, static_cast<T>(pad_val));
+ }
+ }
+ }
+
+ if(has_bias)
+ {
+ dst[dst_idx++] = static_cast<T>(1);
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+void im2col_nhwc(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+{
+ ARM_COMPUTE_ERROR_ON(src.data_layout() != DataLayout::NHWC);
+ const int pad_x = conv_info.pad().first;
+ const int pad_y = conv_info.pad().second;
+ const int stride_x = conv_info.stride().first;
+ const int stride_y = conv_info.stride().second;
+ const int kernel_width = kernel_dims.width;
+ const int kernel_height = kernel_dims.height;
+ const int src_width = src.shape().y();
+ const int src_height = src.shape().z();
+ const int src_depth = src.shape().x();
+ const int batches = src.shape().total_size_upper(3);
+ const int pad_val = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0;
+ int dst_idx = 0;
+ for(int b = 0; b < batches; ++b)
+ {
+ for(int y = -pad_y; y <= (src_height + pad_y - kernel_height); y += stride_y)
+ {
+ for(int x = -pad_x; x <= (src_width + pad_x - kernel_width); x += stride_x)
+ {
+ for(int z = 0; z < src_depth; ++z)
+ {
+ for(int patch_y = y; patch_y < (y + kernel_height); ++patch_y)
+ {
+ for(int patch_x = x; patch_x < (x + kernel_width); ++patch_x)
+ {
+ dst[dst_idx++] = tensor_elem_at(src, Coordinates(z, patch_x, patch_y, b), BorderMode::CONSTANT, static_cast<T>(pad_val));
+ }
+ }
+ }
+
+ if(has_bias)
+ {
+ dst[dst_idx++] = static_cast<T>(1);
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+void im2col(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+{
+ switch(src.data_layout())
+ {
+ case DataLayout::NCHW:
+ {
+ im2col_nchw(src, dst, kernel_dims, conv_info, has_bias);
+ break;
+ }
+ case DataLayout::NHWC:
+ {
+ im2col_nhwc(src, dst, kernel_dims, conv_info, has_bias);
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Not supported.");
+ break;
+ }
+ }
+}
+
+template void im2col(const SimpleTensor<uint8_t> &src, SimpleTensor<uint8_t> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+template void im2col(const SimpleTensor<half> &src, SimpleTensor<half> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+template void im2col(const SimpleTensor<float> &src, SimpleTensor<float> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Im2Col.h b/tests/validation/reference/Im2Col.h
new file mode 100644
index 0000000..5277171
--- /dev/null
+++ b/tests/validation/reference/Im2Col.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_IM2COL_H__
+#define __ARM_COMPUTE_TEST_IM2COL_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+void im2col(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_IM2COL_H__ */
diff --git a/tests/validation/reference/LocallyConnected.cpp b/tests/validation/reference/LocallyConnected.cpp
new file mode 100644
index 0000000..08e3f02
--- /dev/null
+++ b/tests/validation/reference/LocallyConnected.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "LocallyConnected.h"
+
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Convolution3d.h"
+#include "tests/validation/reference/Utils.h"
+
+#include "tests/framework/Asserts.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename TB>
+SimpleTensor<T> locally_connected(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info)
+{
+ // Create reference
+ SimpleTensor<T> dst{ output_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
+
+ // Compute reference
+ const int width_in = src.shape().x();
+ const int height_in = src.shape().y();
+ const int depth_in = src.shape().z();
+
+ const int width_out = dst.shape().x();
+ const int height_out = dst.shape().y();
+ const int depth_out = dst.shape().z();
+
+ const int width_weights = weights.shape().x();
+ const int height_weights = weights.shape().y();
+ const int depth_weights = weights.shape().z();
+
+ const int pad_left = info.pad_left();
+ const int pad_top = info.pad_top();
+ const int stride_xi = info.stride().first;
+ const int stride_yi = info.stride().second;
+
+ auto output_wh = scaled_dimensions(width_in, height_in, width_weights, height_weights, info);
+
+ const int start_xi = width_weights / 2 - pad_left;
+ const int start_yi = height_weights / 2 - pad_top;
+ const int end_xi = output_wh.first * stride_xi;
+ const int end_yi = output_wh.second * stride_yi;
+ const int num_batches = src.shape().total_size() / (width_in * height_in * depth_in);
+
+ for(int r = 0; r < num_batches; ++r)
+ {
+ int count = 0;
+ for(int yi = start_yi; yi < start_yi + end_yi; yi += stride_yi)
+ {
+ for(int xi = start_xi; xi < start_xi + end_xi; xi += stride_xi)
+ {
+ for(int ofm = 0; ofm < depth_out; ++ofm)
+ {
+ // Compute input and output offsets
+ const int offset_in = r * width_in * height_in * depth_in;
+ const int xo = (xi - start_xi) / stride_xi;
+ const int yo = (yi - start_yi) / stride_yi;
+ const int offset_out = xo + yo * width_out + ofm * width_out * height_out + r * width_out * height_out * depth_out;
+
+ ARM_COMPUTE_ASSERT(xo < width_out);
+ ARM_COMPUTE_ASSERT(yo < height_out);
+
+ // Compute 3D convolution
+ convolution_3d::detail::convolution3d(src, weights, bias, dst,
+ offset_in, count * width_weights * height_weights * depth_weights, count, offset_out,
+ xi, yi,
+ width_in, height_in, depth_in,
+ width_weights, height_weights);
+ count++;
+ }
+ }
+ }
+ }
+
+ return dst;
+}
+
+// Locally Connected only supports F32
+template SimpleTensor<float> locally_connected(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
+ const PadStrideInfo &info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/LocallyConnected.h b/tests/validation/reference/LocallyConnected.h
new file mode 100644
index 0000000..bf78d2c
--- /dev/null
+++ b/tests/validation/reference/LocallyConnected.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_LOCALLY_CONNECTED_H__
+#define __ARM_COMPUTE_TEST_LOCALLY_CONNECTED_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename TB>
+SimpleTensor<T> locally_connected(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_LOCALLY_CONNECTED_H__ */
diff --git a/tests/validation/reference/OpticalFlow.cpp b/tests/validation/reference/OpticalFlow.cpp
new file mode 100644
index 0000000..da0b9f9
--- /dev/null
+++ b/tests/validation/reference/OpticalFlow.cpp
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "OpticalFlow.h"
+
+#include "GaussianPyramidHalf.h"
+#include "Scharr.h"
+#include "Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+using KeyPointArray = std::vector<KeyPoint>;
+using InternalKeyPointArray = std::vector<InternalKeyPoint>;
+
+// Constants used for Lucas-Kanade Algorithm
+constexpr int W_BITS = 14;
+constexpr float D0 = 1 << W_BITS;
+constexpr float DETERMINANT_THRESHOLD = 1.0e-07f;
+constexpr float EIGENVALUE_THRESHOLD = 1.0e-04f;
+constexpr float FLT_SCALE = 1.0f / (1 << 20);
+
+// Creates an InternalKeyPointArray for tracking non-integral pixel coordinates
+InternalKeyPointArray create_internal_keypoints(const KeyPointArray &keypoints)
+{
+ InternalKeyPointArray internal_keypoints;
+
+ for(auto keypoint : keypoints)
+ {
+ InternalKeyPoint internal_keypoint;
+
+ internal_keypoint.x = static_cast<float>(keypoint.x);
+ internal_keypoint.y = static_cast<float>(keypoint.y);
+ internal_keypoint.tracking_status = static_cast<bool>(keypoint.tracking_status);
+
+ internal_keypoints.push_back(internal_keypoint);
+ }
+
+ return internal_keypoints;
+}
+
+// Scale tracked points based on Pyramid level
+void scale_tracked_points(size_t level, size_t num_levels, bool use_initial_estimate,
+ InternalKeyPointArray &old_points_internal, InternalKeyPointArray &new_points_internal,
+ const KeyPointArray &old_points, const KeyPointArray &new_points_estimates)
+{
+ if(level == num_levels - 1) // lowest resolution
+ {
+ const float scale = std::pow(SCALE_PYRAMID_HALF, level);
+
+ for(size_t i = 0; i < old_points.size(); ++i)
+ {
+ old_points_internal.at(i).x = old_points.at(i).x * scale;
+ old_points_internal.at(i).y = old_points.at(i).y * scale;
+ old_points_internal.at(i).tracking_status = true;
+
+ InternalKeyPoint keypoint_to_track;
+
+ if(use_initial_estimate)
+ {
+ keypoint_to_track.x = new_points_estimates.at(i).x * scale;
+ keypoint_to_track.y = new_points_estimates.at(i).y * scale;
+ keypoint_to_track.tracking_status = (new_points_estimates.at(i).tracking_status == 1);
+ }
+ else
+ {
+ keypoint_to_track.x = old_points_internal.at(i).x;
+ keypoint_to_track.y = old_points_internal.at(i).y;
+ keypoint_to_track.tracking_status = true;
+ }
+
+ new_points_internal.at(i) = keypoint_to_track;
+ }
+ }
+ else
+ {
+ for(size_t i = 0; i < old_points.size(); ++i)
+ {
+ old_points_internal.at(i).x /= SCALE_PYRAMID_HALF;
+ old_points_internal.at(i).y /= SCALE_PYRAMID_HALF;
+ new_points_internal.at(i).x /= SCALE_PYRAMID_HALF;
+ new_points_internal.at(i).y /= SCALE_PYRAMID_HALF;
+ }
+ }
+}
+
+bool is_invalid_keypoint(const InternalKeyPoint &keypoint, const ValidRegion &valid_region, size_t window_dimension)
+{
+ const int half_window = window_dimension / 2;
+ const int x = std::floor(keypoint.x);
+ const int y = std::floor(keypoint.y);
+
+ return (x - half_window < valid_region.start(0)) || (x + half_window >= valid_region.end(0) - 1) || (y - half_window < valid_region.start(1)) || (y + half_window >= valid_region.end(1) - 1);
+}
+
+template <typename T>
+constexpr int INT_ROUND(T x, int n)
+{
+ return (x + (1 << (n - 1))) >> n;
+}
+
+// Return the bilinear value at a specified coordinate with different border modes
+template <typename T>
+int bilinear_interpolate(const SimpleTensor<T> &in, Coordinates id, float wx, float wy, BorderMode border_mode, T constant_border_value, int scale)
+{
+ const int level = id.x();
+ const int idy = id.y();
+
+ const float dx = wx;
+ const float dy = wy;
+ const float dx_1 = 1.0f - dx;
+ const float dy_1 = 1.0f - dy;
+
+ const T border_value = constant_border_value;
+
+ id.set(0, level);
+ id.set(1, idy);
+ const T tl = tensor_elem_at(in, id, border_mode, border_value);
+ id.set(0, level + 1);
+ id.set(1, idy);
+ const T tr = tensor_elem_at(in, id, border_mode, border_value);
+ id.set(0, level);
+ id.set(1, idy + 1);
+ const T bl = tensor_elem_at(in, id, border_mode, border_value);
+ id.set(0, level + 1);
+ id.set(1, idy + 1);
+ const T br = tensor_elem_at(in, id, border_mode, border_value);
+
+ // weights
+ const int w00 = roundf(dx_1 * dy_1 * D0);
+ const int w01 = roundf(dx * dy_1 * D0);
+ const int w10 = roundf(dx_1 * dy * D0);
+ const int w11 = D0 - w00 - w01 - w10;
+
+ return static_cast<int>(INT_ROUND(tl * w00 + tr * w01 + bl * w10 + br * w11, scale));
+}
+
+template <typename T>
+std::vector<int> compute_derivative(const SimpleTensor<T> &input, const InternalKeyPoint &keypoint,
+ BorderMode border_mode, uint8_t constant_border_value, size_t window_dimension, int scale)
+{
+ std::vector<int> bilinear_values;
+
+ const int half_window = window_dimension / 2;
+
+ float keypoint_int_x = 0;
+ float keypoint_int_y = 0;
+
+ const float wx = std::modf(keypoint.x, &keypoint_int_x);
+ const float wy = std::modf(keypoint.y, &keypoint_int_y);
+
+ Coordinates tl_window(static_cast<int>(keypoint_int_x) - half_window, static_cast<int>(keypoint_int_y) - half_window);
+ Coordinates br_window(static_cast<int>(keypoint_int_x) + half_window, static_cast<int>(keypoint_int_y) + half_window);
+
+ for(int y = tl_window.y(); y <= br_window.y(); ++y)
+ {
+ for(int x = tl_window.x(); x <= br_window.x(); ++x)
+ {
+ bilinear_values.push_back(bilinear_interpolate(input, Coordinates(x, y), wx, wy, border_mode, static_cast<T>(constant_border_value), scale));
+ }
+ }
+
+ return bilinear_values;
+}
+
+std::tuple<float, float, float> compute_spatial_gradient_matrix(const std::vector<int> &bilinear_ix, const std::vector<int> &bilinear_iy)
+{
+ ARM_COMPUTE_ERROR_ON(bilinear_ix.size() != bilinear_iy.size());
+
+ int iA11 = 0;
+ int iA12 = 0;
+ int iA22 = 0;
+
+ for(size_t i = 0; i < bilinear_ix.size(); ++i)
+ {
+ int ixval = bilinear_ix[i];
+ int iyval = bilinear_iy[i];
+
+ iA11 += ixval * ixval;
+ iA12 += ixval * iyval;
+ iA22 += iyval * iyval;
+ }
+
+ return std::make_tuple(iA11 * FLT_SCALE, iA12 * FLT_SCALE, iA22 * FLT_SCALE);
+}
+
+std::tuple<double, double> compute_temporal_gradient_vector(const std::vector<int> &bilinear_it_old,
+ const std::vector<int> &bilinear_it_new,
+ const std::vector<int> &bilinear_ix,
+ const std::vector<int> &bilinear_iy)
+{
+ ARM_COMPUTE_ERROR_ON(bilinear_ix.size() != bilinear_iy.size());
+ ARM_COMPUTE_ERROR_ON(bilinear_it_old.size() != bilinear_it_new.size());
+
+ int ib1 = 0;
+ int ib2 = 0;
+
+ for(size_t i = 0; i < bilinear_ix.size(); ++i)
+ {
+ int ixval = bilinear_ix[i];
+ int iyval = bilinear_iy[i];
+ int ival = bilinear_it_old[i];
+ int jval = bilinear_it_new[i];
+
+ const int diff = jval - ival;
+
+ ib1 += diff * ixval;
+ ib2 += diff * iyval;
+ }
+
+ const double b1 = ib1 * FLT_SCALE;
+ const double b2 = ib2 * FLT_SCALE;
+
+ return std::make_tuple(b1, b2);
+}
+} // namespace
+
+template <typename T>
+std::vector<KeyPoint> optical_flow(const SimpleTensor<T> &old_input, const SimpleTensor<T> &new_input,
+ const OpticalFlowParameters ¶ms, size_t num_levels,
+ const std::vector<KeyPoint> &old_points, const std::vector<KeyPoint> &new_points_estimates,
+ BorderMode border_mode, uint8_t constant_border_value)
+{
+ const int filter_size = 3; // scharr filter size
+ const size_t max_iterations = 1000; // fixed by kernel
+ const size_t window_dimension = params.window_dimension;
+ const size_t num_iterations = (params.termination == Termination::TERM_CRITERIA_EPSILON) ? max_iterations : params.num_iterations;
+
+ KeyPointArray new_points(old_points.size());
+
+ InternalKeyPointArray old_points_internal = create_internal_keypoints(old_points);
+ InternalKeyPointArray new_points_internal = create_internal_keypoints(new_points_estimates);
+
+ SimpleTensor<int16_t> scharr_gx;
+ SimpleTensor<int16_t> scharr_gy;
+
+ // Create pyramids
+ std::vector<SimpleTensor<T>> old_pyramid = gaussian_pyramid_half(old_input, border_mode, constant_border_value, num_levels);
+ std::vector<SimpleTensor<T>> new_pyramid = gaussian_pyramid_half(new_input, border_mode, constant_border_value, num_levels);
+
+ // Iterate over each level of the pyramid
+ for(size_t idx = num_levels; idx > 0; --idx)
+ {
+ const size_t level = idx - 1;
+
+ // Calculate scharr gradients
+ std::tie(scharr_gx, scharr_gy) = scharr<int16_t, T>(old_pyramid[level], filter_size, border_mode, constant_border_value, GradientDimension::GRAD_XY);
+
+ scale_tracked_points(level, num_levels, params.use_initial_estimate, old_points_internal, new_points_internal, old_points, new_points_estimates);
+
+ // Calculate valid region based on image dimensions of current pyramid level
+ const ValidRegion valid_region = shape_to_valid_region(old_pyramid[level].shape(), (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
+
+ for(size_t i = 0; i < old_points.size(); ++i)
+ {
+ InternalKeyPoint &old_keypoint = old_points_internal.at(i);
+ InternalKeyPoint &new_keypoint = new_points_internal.at(i);
+
+ // Helper function for untracking keypoints when on the lowest pyramid level (high resolution)
+ const auto untrack_keypoint = [&](bool predicate)
+ {
+ if(predicate && (level == 0))
+ {
+ new_keypoint.tracking_status = false;
+ return true;
+ }
+ return predicate;
+ };
+
+ if(!old_keypoint.tracking_status)
+ {
+ continue;
+ }
+
+ // Check if tracked coordinate is outside image coordinate
+ if(untrack_keypoint(is_invalid_keypoint(old_keypoint, valid_region, window_dimension)))
+ {
+ continue;
+ }
+
+ // Compute spatial derivative
+ std::vector<int> bilinear_ix = compute_derivative(scharr_gx, old_keypoint, border_mode, constant_border_value, window_dimension, W_BITS);
+ std::vector<int> bilinear_iy = compute_derivative(scharr_gy, old_keypoint, border_mode, constant_border_value, window_dimension, W_BITS);
+
+ float A11 = 0.f;
+ float A12 = 0.f;
+ float A22 = 0.f;
+ std::tie(A11, A12, A22) = compute_spatial_gradient_matrix(bilinear_ix, bilinear_iy);
+
+ // Calculate criteria for lost tracking : Matrix A is invertible
+ // 1. The determinant of the matrix is less than DETERMINANT_THRESHOLD
+ // 2. The minimum eigenvalue of the matrix is less than EIGENVALUE_THRESHOLD
+ const float trace_A = A11 + A22;
+ const float determinant = A11 * A22 - A12 * A12;
+ const float discriminant = (trace_A * trace_A) - 4.0f * (determinant);
+ const float eigenvalue_A = (trace_A - std::sqrt(discriminant)) / 2.0f;
+
+ // Divide by window_dimension squared to reduce the floating point accummulation error
+ const float eigenvalue = eigenvalue_A / (window_dimension * window_dimension);
+
+ // Check if it is a good point to track
+ if(untrack_keypoint(eigenvalue < EIGENVALUE_THRESHOLD || determinant < DETERMINANT_THRESHOLD))
+ {
+ continue;
+ }
+
+ float prev_delta_x = 0.f;
+ float prev_delta_y = 0.f;
+
+ for(size_t j = 0; j < num_iterations; ++j)
+ {
+ // Check if tracked coordinate is outside image coordinate
+ if(untrack_keypoint(is_invalid_keypoint(new_keypoint, valid_region, window_dimension)))
+ {
+ break;
+ }
+
+ // Compute temporal derivative
+ std::vector<int> bilinear_it_old = compute_derivative(old_pyramid[level], old_keypoint, border_mode, constant_border_value, window_dimension, W_BITS - 5);
+ std::vector<int> bilinear_it_new = compute_derivative(new_pyramid[level], new_keypoint, border_mode, constant_border_value, window_dimension, W_BITS - 5);
+
+ double b1 = 0.f;
+ double b2 = 0.f;
+ std::tie(b1, b2) = compute_temporal_gradient_vector(bilinear_it_old, bilinear_it_new, bilinear_ix, bilinear_iy);
+
+ // Compute motion vector -> A^-1 * -b
+ const float delta_x = (A12 * b2 - A22 * b1) / determinant;
+ const float delta_y = (A12 * b1 - A11 * b2) / determinant;
+
+ // Update the new position
+ new_keypoint.x += delta_x;
+ new_keypoint.y += delta_y;
+
+ const float magnitude_squared = delta_x * delta_x + delta_y * delta_y;
+
+ // Check if termination criteria is EPSILON and if it is satisfied
+ if(magnitude_squared <= params.epsilon && (params.termination == Termination::TERM_CRITERIA_EPSILON || params.termination == Termination::TERM_CRITERIA_BOTH))
+ {
+ break;
+ }
+
+ // Check convergence analyzing the previous delta
+ if(j > 0 && (std::fabs(delta_x + prev_delta_x) < 0.01f && std::fabs(delta_y + prev_delta_y) < 0.01f))
+ {
+ new_keypoint.x -= delta_x * SCALE_PYRAMID_HALF;
+ new_keypoint.y -= delta_y * SCALE_PYRAMID_HALF;
+
+ break;
+ }
+
+ prev_delta_x = delta_x;
+ prev_delta_y = delta_y;
+ }
+ }
+ }
+
+ // Copy optical flow coordinates to output vector
+ for(size_t i = 0; i < old_points.size(); ++i)
+ {
+ const InternalKeyPoint &new_keypoint = new_points_internal.at(i);
+
+ new_points.at(i).x = roundf(new_keypoint.x);
+ new_points.at(i).y = roundf(new_keypoint.y);
+ new_points.at(i).tracking_status = new_keypoint.tracking_status ? 1 : 0;
+ }
+
+ return new_points;
+}
+
+template std::vector<KeyPoint> optical_flow(const SimpleTensor<uint8_t> &old_input, const SimpleTensor<uint8_t> &new_input,
+ const OpticalFlowParameters ¶ms, size_t num_levels,
+ const std::vector<KeyPoint> &old_points, const std::vector<KeyPoint> &new_points_estimates,
+ BorderMode border_mode, uint8_t constant_border_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/OpticalFlow.h b/tests/validation/reference/OpticalFlow.h
new file mode 100644
index 0000000..ad6e2a9
--- /dev/null
+++ b/tests/validation/reference/OpticalFlow.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_OPTICAL_FLOW_H__
+#define __ARM_COMPUTE_TEST_OPTICAL_FLOW_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/Types.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+std::vector<KeyPoint> optical_flow(const SimpleTensor<T> &old_input, const SimpleTensor<T> &new_input,
+ const OpticalFlowParameters ¶ms, size_t num_levels,
+ const std::vector<KeyPoint> &old_points, const std::vector<KeyPoint> &new_points_estimates,
+ BorderMode border_mode, uint8_t constant_border_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_OPTICAL_FLOW_H__ */
diff --git a/tests/validation/reference/Permute.cpp b/tests/validation/reference/Permute.cpp
index 4a12ca6..bbb2e8d 100644
--- a/tests/validation/reference/Permute.cpp
+++ b/tests/validation/reference/Permute.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,7 +42,7 @@
permute(dst_shape, perm);
// Create reference
- SimpleTensor<T> dst{ dst_shape, src.data_type() };
+ SimpleTensor<T> dst{ dst_shape, src.data_type(), src.num_channels(), src.fixed_point_position(), src.quantization_info() };
// Compute reference
for(int i = 0; i < src.num_elements(); ++i)
@@ -57,9 +57,13 @@
return dst;
}
+template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, PermutationVector perm);
template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm);
+template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm);
template SimpleTensor<uint16_t> permute(const SimpleTensor<uint16_t> &src, PermutationVector perm);
template SimpleTensor<uint32_t> permute(const SimpleTensor<uint32_t> &src, PermutationVector perm);
+template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm);
+template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index c14ab98..6973454 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -24,6 +24,7 @@
#include "PoolingLayer.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
@@ -35,30 +36,16 @@
{
namespace reference
{
-namespace
-{
-TensorShape calculate_output_shape(TensorShape shape, const PoolingLayerInfo &info)
-{
- TensorShape dst_shape = shape;
- const int pool_size_x = info.is_global_pooling() ? shape.x() : info.pool_size().width;
- const int pool_size_y = info.is_global_pooling() ? shape.y() : info.pool_size().height;
- const std::pair<unsigned int, unsigned int> scaled_dims = arm_compute::scaled_dimensions(shape.x(),
- shape.y(),
- pool_size_x,
- pool_size_y,
- info.pad_stride_info());
- dst_shape.set(0, scaled_dims.first);
- dst_shape.set(1, scaled_dims.second);
-
- return dst_shape;
-}
-} // namespace
+using namespace arm_compute::misc::shape_calculator;
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info)
{
ARM_COMPUTE_ERROR_ON(info.is_global_pooling() && (src.shape().x() != src.shape().y()));
+ // Create reference
+ SimpleTensor<T> dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type(), src.fixed_point_position()), info), src.data_type(), 1, src.fixed_point_position() };
+
const int pool_size_x = info.is_global_pooling() ? src.shape().x() : info.pool_size().width;
const int pool_size_y = info.is_global_pooling() ? src.shape().y() : info.pool_size().height;
PoolingType type = info.pool_type();
@@ -74,9 +61,6 @@
const auto h_src = static_cast<int>(src.shape()[1]);
const int upper_dims = src.shape().total_size() / (w_src * h_src);
- // Create reference
- SimpleTensor<T> dst{ calculate_output_shape(src.shape(), info), src.data_type(), 1, src.fixed_point_position() };
-
const auto w_dst = static_cast<int>(dst.shape()[0]);
const auto h_dst = static_cast<int>(dst.shape()[1]);
@@ -173,6 +157,10 @@
{
ARM_COMPUTE_ERROR_ON(info.is_global_pooling() && (src.shape().x() != src.shape().y()));
+ const auto w_src = static_cast<int>(src.shape()[0]);
+ const auto h_src = static_cast<int>(src.shape()[1]);
+ const int upper_dims = src.shape().total_size() / (w_src * h_src);
+
const int pool_size_x = info.is_global_pooling() ? src.shape().x() : info.pool_size().width;
const int pool_size_y = info.is_global_pooling() ? src.shape().y() : info.pool_size().height;
PoolingType type = info.pool_type();
@@ -184,12 +172,8 @@
int pad_bottom = info.pad_stride_info().pad_bottom();
bool exclude_padding = info.exclude_padding();
- const auto w_src = static_cast<int>(src.shape()[0]);
- const auto h_src = static_cast<int>(src.shape()[1]);
- const int upper_dims = src.shape().total_size() / (w_src * h_src);
-
// Create reference
- SimpleTensor<T> dst{ calculate_output_shape(src.shape(), info), src.data_type(), 1, src.fixed_point_position() };
+ SimpleTensor<T> dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type(), src.fixed_point_position()), info), src.data_type(), 1, src.fixed_point_position() };
const auto w_dst = static_cast<int>(dst.shape()[0]);
const auto h_dst = static_cast<int>(dst.shape()[1]);
diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp
index 0cc96ab..f8a8b88 100644
--- a/tests/validation/reference/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,9 @@
*/
#include "Scale.h"
+
#include "Utils.h"
-#include "arm_compute/core/utils/misc/utility.h"
+#include "arm_compute/core/utils/misc/Utility.h"
#include "support/ToolchainSupport.h"
namespace arm_compute
diff --git a/tests/validation/reference/Utils.h b/tests/validation/reference/Utils.h
index 2aa77c6..0e98bbe 100644
--- a/tests/validation/reference/Utils.h
+++ b/tests/validation/reference/Utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -62,11 +62,13 @@
{
const int x = coord.x();
const int y = coord.y();
+ const int z = coord.z();
const int width = src.shape().x();
const int height = src.shape().y();
+ const int depth = src.shape().z();
// If coordinates beyond range of tensor's width or height
- if(x < 0 || y < 0 || x >= width || y >= height)
+ if(x < 0 || y < 0 || z < 0 || x >= width || y >= height || z >= depth)
{
if(border_mode == BorderMode::REPLICATE)
{
diff --git a/tests/validation/reference/WidthConcatenateLayer.cpp b/tests/validation/reference/WidthConcatenateLayer.cpp
new file mode 100644
index 0000000..fe79b4a
--- /dev/null
+++ b/tests/validation/reference/WidthConcatenateLayer.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "WidthConcatenateLayer.h"
+
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs)
+{
+ // Create reference
+ std::vector<TensorShape> shapes;
+
+ for(const auto &src : srcs)
+ {
+ shapes.emplace_back(src.shape());
+ }
+
+ DataType dst_type = srcs.empty() ? DataType::UNKNOWN : srcs[0].data_type();
+ TensorShape dst_shape = calculate_width_concatenate_shape(shapes);
+ SimpleTensor<T> dst(dst_shape, dst_type);
+
+ // Compute reference
+ int width_offset = 0;
+ const int width_out = dst.shape().x();
+
+ // Set output tensor to 0
+ std::fill_n(dst.data(), dst.num_elements(), 0);
+
+ for(const auto &src : srcs)
+ {
+ ARM_COMPUTE_ERROR_ON(width_offset >= width_out);
+
+ const int width = src.shape().x();
+ const int height = src.shape().y();
+ const int depth = src.shape().z();
+
+ const T *src_ptr = src.data();
+ T *dst_ptr = dst.data();
+
+ for(int d = 0; d < depth; ++d)
+ {
+ for(int r = 0; r < height; ++r)
+ {
+ int offset = d * height + r;
+ std::copy(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out);
+ src_ptr += width;
+ }
+ }
+
+ width_offset += width;
+ }
+
+ return dst;
+}
+
+template SimpleTensor<float> widthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs);
+template SimpleTensor<half> widthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs);
+template SimpleTensor<qint8_t> widthconcatenate_layer(const std::vector<SimpleTensor<qint8_t>> &srcs);
+template SimpleTensor<qint16_t> widthconcatenate_layer(const std::vector<SimpleTensor<qint16_t>> &srcs);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/WidthConcatenateLayer.h b/tests/validation/reference/WidthConcatenateLayer.h
new file mode 100644
index 0000000..237e72b
--- /dev/null
+++ b/tests/validation/reference/WidthConcatenateLayer.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
+#define __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
+
+#include "tests/SimpleTensor.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__ */
diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp
new file mode 100644
index 0000000..194a78e
--- /dev/null
+++ b/tests/validation/reference/Winograd.cpp
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Winograd.h"
+
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Utils.h"
+
+#include "arm_compute/core/Types.h"
+
+#include <algorithm>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+template <typename T>
+void initialize_matrix_transform(SimpleTensor<T> &src, const Size2D &output_tile_size, const Size2D &kernel_size, WinogradTransformType winograd_transform_type)
+{
+ // Winograd input transform matrices
+ static const float imatrix2x2_3x3[] =
+ {
+ 1.0f, 0.0f, -1.0f, 0.0f,
+ 0.0f, 1.0f, 1.0f, 0.0f,
+ 0.0f, -1.0f, 1.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, -1.0f
+ };
+
+ static const float imatrix4x4_3x3[] =
+ {
+ 4.0f, 0.0f, -5.0f, 0.0f, 1.0f, 0.0f,
+ 0.0f, -4.0f, -4.0f, 1.0f, 1.0f, 0.0f,
+ 0.0f, 4.0f, -4.0f, -1.0f, 1.0f, 0.0f,
+ 0.0f, -2.0f, -1.0f, 2.0f, 1.0f, 0.0f,
+ 0.0f, 2.0f, -1.0f, -2.0f, 1.0f, 0.0f,
+ 0.0f, 4.0f, 0.0f, -5.0f, 0.0f, 1.0f,
+ };
+
+ static const float imatrix4x4_5x5[] =
+ {
+ 1.f, 0.f, -21.f / 4.f, 0.f, 21.f / 4.f, 0.f, -1.f, 0.f,
+ 0.f, 1.f, 1.f, -17.f / 4.f, -17.f / 4.f, 1.f, 1.f, 0.f,
+ 0.f, -1.f, 1.f, 17.f / 4.f, -17.f / 4.f, -1.f, 1.f, 0.f,
+ 0.f, 1.f / 2.f, 1.f / 4.f, -5.f / 2.f, -5.f / 4.f, 2.f, 1.f, 0.f,
+ 0.f, -1.f / 2.f, 1.f / 4.f, 5.f / 2.f, -5.f / 4.f, -2.f, 1.f, 0.f,
+ 0.f, 2.f, 4.f, -5.f / 2.f, -5.f, 1.f / 2.f, 1.f, 0.f,
+ 0.f, -2.f, 4.f, 5.f / 2.f, -5.f, -1.f / 2.f, 1.f, 0.f,
+ 0.f, -1.f, 0.f, 21.f / 4.f, 0.f, -21.f / 4.f, 0.f, 1.f
+ };
+
+ // ------------------------------------------
+
+ // Winograd filter transform matrices
+ static const float fmatrix2x2_3x3[] =
+ {
+ 1.0f, 0.0f, 0.0f,
+ 0.5f, 0.5f, 0.5f,
+ 0.5f, -0.5f, 0.5f,
+ 0.0f, 0.0f, 1.0f
+ };
+
+ static const float fmatrix4x4_3x3[] =
+ {
+ 0.25f, 0.0f, 0.0f,
+ -1.0f / 6.0f, -1.0f / 6.0f, -1.0f / 6.0f,
+ -1.0f / 6.0f, 1.0f / 6.0f, -1.0f / 6.0f,
+ 1.0f / 24.0f, 1.0f / 12.0f, 1.0f / 6.0f,
+ 1.0f / 24.0f, -1.0f / 12.0f, 1.0f / 6.0f,
+ 0.0f, 0.0f, 1.0f
+ };
+
+ static const float fmatrix4x4_5x5[] =
+ {
+ 1.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+ -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f,
+ -2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f,
+ 1.0f / 90.0f, 1.0f / 45.0f, 2.0f / 45.0f, 4.0f / 45.0f, 8.0f / 45.0f,
+ 1.0f / 90.0f, -1.0f / 45.0f, 2.0f / 45.0f, -4.0f / 45.0f, 8.0f / 45.0f,
+ 4.0f / 45.0f, 2.0f / 45.0f, 1.0f / 45.0f, 1.0f / 90.0f, 1.0f / 180.0f,
+ 4.0f / 45.0f, -2.0f / 45.0f, 1.0f / 45.0f, -1.0f / 90.0f, 1.0f / 180.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f, 1.0f
+
+ };
+
+ // ------------------------------------------
+
+ // Winograd output transform matrices
+ static const float omatrix2x2_3x3[] =
+ {
+ 1.0f, 1.0f, 1.0f, 0.0f,
+ 0.0f, 1.0f, -1.0f, -1.0f
+ };
+
+ static const float omatrix4x4_3x3[] =
+ {
+ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,
+ 0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 0.0f,
+ 0.0f, 1.0f, 1.0f, 4.0f, 4.0f, 0.0f,
+ 0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f
+ };
+
+ static const float omatrix4x4_5x5[] =
+ {
+ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 8.0f, 0.0f,
+ 0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 4.0f, -4.0f, 0.0f,
+ 0.0f, 1.0f, 1.0f, 4.0f, 4.0f, 2.0f, 2.0f, 0.0f,
+ 0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f, -1.0f, 1.0f
+ };
+
+ // ------------------------------------------
+
+ using WinogradKey = std::tuple<std::pair<int, int>, std::pair<int, int>, WinogradTransformType>;
+
+ // Key = (Output tile size, Kernel size, Winograd transform type)
+ static std::map<WinogradKey, const float *> matrix_map =
+ {
+ { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },
+ { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },
+ { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },
+ { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
+ { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },
+ { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
+ { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
+ { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },
+ { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
+ };
+
+ // Find transformation matrix
+ std::map<WinogradKey, const float *>::iterator it;
+
+ it = matrix_map.find(WinogradKey(std::pair<int, int>(output_tile_size.width, output_tile_size.height),
+ std::pair<int, int>(kernel_size.width, kernel_size.height),
+ winograd_transform_type));
+
+ float const *matrix_values = nullptr;
+ if(it != matrix_map.end())
+ {
+ // Get matrix pointer
+ matrix_values = it->second;
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Winograd configuration not supported");
+ }
+
+ // Copy values
+ std::copy(&matrix_values[0], &matrix_values[0] + src.num_elements(), &src[0]);
+}
+} // namespace
+
+template <typename T>
+SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)
+{
+ ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NCHW);
+
+ const PadStrideInfo conv_info = winograd_info.convolution_info;
+ const Size2D output_tile_size = winograd_info.output_tile_size;
+ const Size2D kernel_size = winograd_info.kernel_size;
+
+ SimpleTensor<T> out{ output_shape, in.data_type() };
+
+ // Calculate dimensions for the tile
+ const unsigned int tile_w = output_tile_size.width + kernel_size.width - 1;
+ const unsigned int tile_h = output_tile_size.height + kernel_size.height - 1;
+
+ TensorShape tile_dims(tile_w, tile_h);
+
+ // Simple tensor for the input tile
+ SimpleTensor<T> src_tile{ tile_dims, in.data_type() };
+
+ // Simple tensor for the temporary tile
+ SimpleTensor<T> tmp_tile{ tile_dims, in.data_type() };
+
+ // Simple tensor for the output tile
+ SimpleTensor<T> dst_tile{ tile_dims, in.data_type() };
+
+ // Simple tensor for the transformation matrix
+ SimpleTensor<T> matrix{ tile_dims, in.data_type() };
+
+ // Simple tensor for the transformation matrix transposed
+ SimpleTensor<T> matrix_transposed{ tile_dims, in.data_type() };
+
+ // Initialize matrix for the input transform
+ initialize_matrix_transform(matrix, output_tile_size, kernel_size, WinogradTransformType::INPUT);
+
+ // Transpose matrix
+ transpose_matrix(matrix, matrix_transposed);
+
+ const int in_w = in.shape().x();
+ const int in_h = in.shape().y();
+ const int in_d = in.shape().z();
+ const int out_d = out.shape().z();
+ const int num_batches = in.shape().total_size() / (in_w * in_h * in_d);
+ const int num_tiles_x = std::ceil((in_w - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width));
+ const int num_tiles_y = std::ceil((in_h - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height));
+ const int step_x = output_tile_size.width;
+ const int step_y = output_tile_size.height;
+
+ ARM_COMPUTE_ERROR_ON((num_tiles_x * num_tiles_y) != static_cast<int>(out.shape().y()));
+
+ for(int b = 0; b < num_batches; ++b)
+ {
+ for(int z = 0; z < in_d; ++z)
+ {
+ for(int y = 0; y < num_tiles_y; ++y)
+ {
+ for(int x = 0; x < num_tiles_x; ++x)
+ {
+ int xi = x * step_x - conv_info.pad_left();
+ int yi = y * step_y - conv_info.pad_top();
+
+ // Get the tile from the input tensor
+ get_tile(in, src_tile, Coordinates(xi, yi, z, b));
+
+ // Compute the transformation
+ matrix_multiply(matrix, src_tile, tmp_tile);
+ matrix_multiply(tmp_tile, matrix_transposed, dst_tile);
+
+ // Store the output tile across the channels
+ for(int i = 0; i < out_d; ++i)
+ {
+ int xo = z;
+ int yo = x + y * num_tiles_x;
+ out[coords2index(out.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i];
+ }
+ }
+ }
+ }
+ }
+
+ return out;
+}
+
+template <typename T>
+SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format");
+
+ // Create reference
+ SimpleTensor<T> out{ output_shape, in.data_type(), 1 };
+
+ const Size2D output_tile_size = winograd_info.output_tile_size;
+ const Size2D kernel_size = winograd_info.kernel_size;
+
+ TensorShape kernel_tile_dims(kernel_size.width, kernel_size.height);
+
+ // Calculate dimensions for the tile
+ const unsigned int input_tile_w = output_tile_size.width + kernel_size.width - 1;
+ const unsigned int input_tile_h = output_tile_size.height + kernel_size.height - 1;
+ const unsigned int input_tile_area = input_tile_w * input_tile_h;
+
+ // Simple tensor for the input tile
+ SimpleTensor<T> input_tile{ kernel_tile_dims, in.data_type(), 1 };
+
+ // Simple tensor for the transformation matrix
+ SimpleTensor<T> trans_matrix{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 };
+
+ // Simple tensor for the transformation matrix transpose
+ SimpleTensor<T> trans_matrix_transposed{ TensorShape(input_tile_w, kernel_tile_dims[0]), in.data_type(), 1 };
+
+ // Simple tensor for the temporary tile
+ SimpleTensor<T> tmp_tile{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 };
+
+ // Simple tensor for the output tile
+ SimpleTensor<T> transf_tile{ TensorShape(input_tile_w, input_tile_w), in.data_type(), 1 };
+
+ // Initialize matrix for the filter transform
+ initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::FILTER);
+
+ // Transpose the transformation matrix
+ transpose_matrix(trans_matrix, trans_matrix_transposed);
+
+ const int num_channels = in.shape()[2];
+ const int num_filters = in.shape()[3];
+ const int num_batches = in.shape().total_size() / (kernel_size.area() * num_channels * num_filters);
+
+ for(int n = 0; n < num_batches; ++n)
+ {
+ for(int w = 0; w < num_filters; ++w)
+ {
+ for(int z = 0; z < num_channels; ++z)
+ {
+ // Load the tile from the input tensor
+ get_tile(in, input_tile, Coordinates(0, 0, z, w, n));
+
+ // First transformation
+ matrix_multiply(trans_matrix, input_tile, tmp_tile);
+
+ // Second transformation
+ matrix_multiply(tmp_tile, trans_matrix_transposed, transf_tile);
+
+ // Store the output tile across the channels
+ const int output_offset = w + z * num_filters;
+
+ // Store the values across the channels
+ for(unsigned int i = 0; i < input_tile_area; ++i)
+ {
+ out[output_offset + i * num_filters * num_channels] = transf_tile[i];
+ }
+ }
+ }
+ }
+
+ return out;
+}
+
+template <typename T>
+SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const SimpleTensor<T> &b, const TensorShape &output_shape, const WinogradInfo &winograd_info)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(winograd_info.output_data_layout != DataLayout::NCHW, "Only supported NCHW data format");
+
+ const PadStrideInfo conv_info = winograd_info.convolution_info;
+ const Size2D input_dimensions = winograd_info.input_dimensions;
+ const Size2D output_tile_size = winograd_info.output_tile_size;
+ const Size2D kernel_size = winograd_info.kernel_size;
+
+ // Create reference
+ SimpleTensor<T> out{ output_shape, in.data_type(), 1 };
+
+ // Calculate dimensions for the tiles
+ const unsigned int in_tile_w = output_tile_size.width + kernel_size.width - 1;
+ const unsigned int in_tile_h = output_tile_size.height + kernel_size.height - 1;
+ const unsigned int out_tile_w = output_tile_size.width;
+ const unsigned int out_tile_h = output_tile_size.height;
+
+ ARM_COMPUTE_ERROR_ON(in.shape()[2] != (in_tile_w * in_tile_h));
+ ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[2]);
+
+ // Compute tile dimensions
+ // Input tile dimensions
+ TensorShape in_tile_dims(in_tile_w, in_tile_h);
+
+ // Output tile dimensions
+ TensorShape out_tile_dims(output_tile_size.width, output_tile_size.height);
+
+ // Transformation matrix dimensions
+ TensorShape tr_tile_dims(in_tile_w, output_tile_size.width);
+
+ // Create tensors
+ // Simple tensor for the input tile
+ SimpleTensor<T> input_tile{ in_tile_dims, in.data_type(), 1 };
+
+ // Simple tensor for the transformation matrix
+ SimpleTensor<T> trans_matrix{ tr_tile_dims, in.data_type(), 1 };
+
+ // Simple tensor for the transformation matrix transpose
+ SimpleTensor<T> trans_matrix_transposed{ TensorShape(tr_tile_dims[1], tr_tile_dims[0]), in.data_type(), 1 };
+
+ // Simple tensor for the temporary tile
+ SimpleTensor<T> tmp_tile{ tr_tile_dims, in.data_type(), 1 };
+
+ // Simple tensor for the output tile
+ SimpleTensor<T> output_tile{ out_tile_dims, in.data_type(), 1 };
+
+ // Initialize matrix for the output transform
+ initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::OUTPUT);
+
+ // Transpose the transformation matrix
+ transpose_matrix(trans_matrix, trans_matrix_transposed);
+
+ const int w_in = in.shape()[0];
+ const int h_in = in.shape()[1];
+ const int c_in = in.shape()[2];
+ const int w_out = out.shape()[0];
+ const int h_out = out.shape()[1];
+ const int c_out = out.shape()[2];
+ const int num_batches = in.shape().total_size() / (w_in * h_in * c_in);
+
+ // Input strides
+ const int stridey_in = w_in;
+ const int stridez_in = stridey_in * h_in;
+ const int stridew_in = stridez_in * c_in;
+
+ // Output strides
+ const int stridey_out = w_out;
+ const int stridez_out = stridey_out * h_out;
+ const int stridew_out = stridez_out * c_out;
+
+ // Compute number of elements to process in the X and Y direction
+ const int num_elements_x = input_dimensions.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right();
+ const int num_elements_y = input_dimensions.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom();
+ const int num_tiles_x = std::ceil(num_elements_x / static_cast<float>(output_tile_size.width));
+ const int num_tiles_y = std::ceil(num_elements_y / static_cast<float>(output_tile_size.height));
+
+ ARM_COMPUTE_UNUSED(num_tiles_y);
+ ARM_COMPUTE_ERROR_ON(in.shape()[1] != static_cast<unsigned int>(num_tiles_x * num_tiles_y));
+
+ for(int n = 0; n < num_batches; ++n)
+ {
+ for(int y = 0; y < h_in; ++y)
+ {
+ for(int x = 0; x < w_in; ++x)
+ {
+ // Load the input tile tile across the channels of the input tensor
+ for(int z = 0; z < c_in; ++z)
+ {
+ input_tile[z] = in[x + (y * stridey_in) + (z * stridez_in) + (n * stridew_in)];
+ }
+
+ // First transformation
+ matrix_multiply(trans_matrix, input_tile, tmp_tile);
+
+ // Second transformation
+ matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile);
+
+ // Store the output tile
+ const int xo = (y % num_tiles_x) * out_tile_w;
+ const int yo = (y / num_tiles_x) * out_tile_h;
+ const int zo = x;
+
+ const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out);
+
+ for(int yi = 0; yi < static_cast<int>(out_tile_h); ++yi)
+ {
+ for(int xi = 0; xi < static_cast<int>(out_tile_w); ++xi)
+ {
+ // Check out-of-bound writes
+ if((xo + xi < w_out) && (yo + yi < h_out))
+ {
+ out[output_offset + yi * stridey_out + xi] = output_tile[xi + yi * out_tile_w];
+
+ // Add bias
+ out[output_offset + yi * stridey_out + xi] += b[zo];
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return out;
+}
+
+template SimpleTensor<float> winograd_filter_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info);
+template SimpleTensor<float> winograd_input_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info);
+template SimpleTensor<float> winograd_output_transform(const SimpleTensor<float> &in, const SimpleTensor<float> &b, const TensorShape &output_shape, const WinogradInfo &winograd_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Winograd.h b/tests/validation/reference/Winograd.h
new file mode 100644
index 0000000..b74c2c3
--- /dev/null
+++ b/tests/validation/reference/Winograd.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_WINOGRAD_H__
+#define __ARM_COMPUTE_TEST_WINOGRAD_H__
+
+#include "arm_compute/core/TensorShape.h"
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+/** Winograd transform type */
+enum class WinogradTransformType
+{
+ INPUT, /**< Winograd input transform */
+ FILTER, /**< Winograd filter transform */
+ OUTPUT /**< Winograd output transform */
+};
+
+template <typename T>
+SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info);
+
+template <typename T>
+SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info);
+
+template <typename T>
+SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const SimpleTensor<T> &b, const TensorShape &output_shape, const WinogradInfo &winograd_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_WINOGRAD_H__ */