arm_compute v18.01

Change-Id: I9bfa178c2e38bfd5fc812e62aab6760d87748e05
diff --git a/tests/validation/reference/Convolution.cpp b/tests/validation/reference/Convolution.cpp
new file mode 100644
index 0000000..777e2df
--- /dev/null
+++ b/tests/validation/reference/Convolution.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+
+#include "Convolution.h"
+#include "Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> convolution(const SimpleTensor<T> &src, const int16_t *conv, uint32_t scale, BorderMode border_mode, T constant_border_value, const unsigned int width, const unsigned int height)
+{
+    SimpleTensor<T>       dst(src.shape(), src.data_type());
+    SimpleTensor<int32_t> sum(src.shape(), src.data_type());
+
+    for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx)
+    {
+        const Coordinates id = index2coord(src.shape(), element_idx);
+        apply_2d_spatial_filter(id, src, sum, TensorShape(width, height), conv, 1, border_mode, constant_border_value);
+
+        if(tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) < 0)
+        {
+            dst[element_idx] = 0;
+        }
+        else if((tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / scale) > 255)
+        {
+            dst[element_idx] = 255;
+        }
+        else
+        {
+            dst[element_idx] = tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / scale;
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> convolution(const SimpleTensor<uint8_t> &src, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
+                                           const unsigned int widht, const unsigned int height);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Convolution.h b/tests/validation/reference/Convolution.h
new file mode 100644
index 0000000..ea9f4e4
--- /dev/null
+++ b/tests/validation/reference/Convolution.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CONVOLUTION_H__
+#define __ARM_COMPUTE_TEST_CONVOLUTION_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> convolution(const SimpleTensor<T> &src, const int16_t *conv, uint32_t scale, BorderMode border_mode, T constant_border_value, const unsigned int width, const unsigned int height);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_CONVOLUTION_H__ */
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
index 1066411..567fac0 100644
--- a/tests/validation/reference/ConvolutionLayer.cpp
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -210,7 +210,7 @@
 
     acc = asymm_rounding_divide_by_pow2(asymm_int_mult(acc, output_multiplier), output_shift);
     acc += output_offset;
-    acc = clamp<int32_t>(acc, 0, 255);
+    acc = utility::clamp<int32_t>(acc, 0, 255);
 
     // Store the result
     *out_ptr = acc;
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 82c2188..0cf1087 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,26 +39,27 @@
                                     const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a)
 {
     // Create reference
+    const int   stride_x     = info.stride().first;
+    const int   stride_y     = info.stride().second;
     TensorShape scaled_shape = src.shape();
-    scaled_shape.set(0, output_shape.x());
-    scaled_shape.set(1, output_shape.y());
+    int         out_x        = src.shape().x() + (src.shape().x() - 1) * (stride_x - 1) + a.first + 2 * info.pad().first;
+    int         out_y        = src.shape().y() + (src.shape().y() - 1) * (stride_y - 1) + a.second + 2 * info.pad().second;
+    scaled_shape.set(0, out_x);
+    scaled_shape.set(1, out_y);
     SimpleTensor<T> scaled{ scaled_shape, src.data_type(), 1, src.fixed_point_position() };
 
-    const int          width_in      = src.shape().x();
-    const int          height_in     = src.shape().y();
-    const int          width_scaled  = scaled.shape().x();
-    const int          height_scaled = scaled.shape().y();
-    const int          num_2d_slices = src.shape().total_size() / (width_in * height_in);
-    const float        width_ratio   = static_cast<float>(width_in) / static_cast<float>(width_scaled);
-    const float        height_ratio  = static_cast<float>(height_in) / static_cast<float>(height_scaled);
-    const int          ax            = a.first;  // The number of zeros added to right edge of the input.
-    const int          ay            = a.second; // The number of zeros added to bottom edge of the input.
-    const unsigned int kernel_size   = weights.shape().x();
-    ARM_COMPUTE_ERROR_ON(info.pad().first > (kernel_size - 1));
-    const int transposed_convolution_padx = kernel_size - info.pad().first - 1;
-    const int transposed_convolution_pady = kernel_size - info.pad().second - 1;
-    const int stridex                     = info.stride().first;
-    const int stridey                     = info.stride().second;
+    const int width_in      = src.shape().x();
+    const int height_in     = src.shape().y();
+    const int width_scaled  = scaled.shape().x();
+    const int height_scaled = scaled.shape().y();
+    const int num_2d_slices = src.shape().total_size() / (width_in * height_in);
+    const int ax            = a.first;  // The number of zeros added to right edge of the input.
+    const int ay            = a.second; // The number of zeros added to top edge of the input.
+    ARM_COMPUTE_ERROR_ON(info.pad().first > (weights.shape().x() - 1));
+
+    ARM_COMPUTE_ERROR_ON_MSG(ax > stride_x - 1, "ax must be smaller than stride_x");
+    ARM_COMPUTE_ERROR_ON_MSG(ay > stride_y - 1, "ay must be smaller than stride_y");
+
     for(int j = 0; j < scaled.num_elements(); ++j)
     {
         scaled[j] = T(0);
@@ -68,34 +69,23 @@
     {
         const int offset_slice_in  = slice * width_in * height_in;
         const int offset_slice_out = slice * width_scaled * height_scaled;
-        for(int yi = ay; yi < height_scaled; yi += stridey)
+        const int start_x          = info.pad().first;
+        const int start_y          = ay + info.pad().second;
+        const int end_y            = height_scaled - info.pad().second;
+        const int end_x            = width_scaled - ax - info.pad().first;
+
+        for(int yi = start_y, in_y = 0; yi < end_y; yi += stride_y, in_y++)
         {
-            for(int xi = transposed_convolution_padx; xi < width_scaled; xi += stridex)
+            for(int xi = start_x, in_x = 0; xi < end_x; xi += stride_x, in_x++)
             {
-                const float x_src     = (xi + 0.5f) * width_ratio - 0.5f;
-                const float y_src     = (yi + 0.5f) * height_ratio - 0.5f;
-                T          *out       = scaled.data() + offset_slice_out + xi + yi * width_scaled;
-                const bool  in_bounds = x_src > -1 && y_src > -1 && x_src < width_in && y_src < height_in;
-                const bool  in_axy    = xi < transposed_convolution_padx || xi >= (width_scaled - ax)  // this is checking if the x coordinate is in the padded left/right area
-                                        || yi < ay || yi >= (height_scaled - transposed_convolution_pady); // like above but top and bottom padding in the upscaled XY plane
-                if(!in_axy)
-                {
-                    if(in_bounds)
-                    {
-                        const int in_scaled_x = (x_src < 0.f) ? static_cast<int>(x_src - 0.5f) : static_cast<int>(x_src + 0.5f);
-                        const int in_scaled_y = (y_src < 0.f) ? static_cast<int>(y_src - 0.5f) : static_cast<int>(y_src + 0.5f);
-                        const T *in          = src.data() + offset_slice_in + in_scaled_x + in_scaled_y * width_in;
-                        *out                  = *in;
-                    }
-                    else
-                    {
-                        *out = T(0);
-                    }
-                }
+                const T *in  = src.data() + offset_slice_in + in_y * width_in + in_x;
+                T       *out = scaled.data() + offset_slice_out + xi + yi * width_scaled;
+                *out         = *in;
             }
         }
     }
-    const PadStrideInfo conv_info(1, 1, 1, 1, DimensionRoundingType::CEIL);
+
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
     return convolution_layer(scaled, weights, bias, output_shape, conv_info);
 }
 
diff --git a/tests/validation/reference/DeconvolutionLayer.h b/tests/validation/reference/DeconvolutionLayer.h
index 8222e32..c0bc1fa 100644
--- a/tests/validation/reference/DeconvolutionLayer.h
+++ b/tests/validation/reference/DeconvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,7 +42,7 @@
  * bias             Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
  * output_shape     Output tensor shape. The output has the same number of dimensions as the @p input.
  * info             Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
- * a                The number of zeros added to right edge of the input.
+ * a                The number of zeros added to right and top edges of the input.
  *
  */
 template <typename T>
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index 0e88d3d..6ca347f 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -89,14 +89,15 @@
                     Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
                     size_t      filter_offset = filter_plane * z;
 
-                    T val = 0;
+                    T val(0);
                     for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
                     {
                         for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
                         {
                             coords.set(0, i);
                             coords.set(1, j);
-                            val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, 0.f);
+                            T border_value(0);
+                            val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
                             ++filter_offset;
                         }
                     }
@@ -155,17 +156,17 @@
             {
                 for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first)
                 {
-                    Coordinates coords(x, y, z);
+                    Coordinates coords(x, y, z, r);
                     int         filter_offset = filter_plane * z;
 
-                    uint32_t val = 0;
+                    int32_t val = 0;
                     for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j)
                     {
                         for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i)
                         {
                             coords.set(0, i);
                             coords.set(1, j);
-                            auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, 0);
+                            auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, -input_offset);
                             uint8_t w_val  = *(weights.data() + filter_offset);
                             val += (in_val + input_offset) * (w_val + weights_offset);
                             ++filter_offset;
@@ -189,6 +190,9 @@
 
 template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
                                                    const PadStrideInfo &conv_info);
+
+template SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
+                                                  const PadStrideInfo &conv_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/Derivative.cpp b/tests/validation/reference/Derivative.cpp
new file mode 100644
index 0000000..0ef8fc2
--- /dev/null
+++ b/tests/validation/reference/Derivative.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Derivative.h"
+
+#include "Utils.h"
+#include "tests/Types.h"
+
+#include <array>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+const std::array<int8_t, 9> derivative_3_x{ { 0, 0, 0, -1, 0, 1, 0, 0, 0 } };
+const std::array<int8_t, 9> derivative_3_y{ { 0, -1, 0, 0, 0, 0, 0, 1, 0 } };
+
+template <typename T>
+struct data_type;
+
+template <>
+struct data_type<int16_t>
+{
+    const static DataType value = DataType::S16;
+};
+} // namespace
+
+template <typename T, typename U>
+std::pair<SimpleTensor<T>, SimpleTensor<T>> derivative(const SimpleTensor<U> &src, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension)
+{
+    const unsigned int filter_size = 3;
+
+    SimpleTensor<T> dst_x(src.shape(), data_type<T>::value, src.num_channels());
+    SimpleTensor<T> dst_y(src.shape(), data_type<T>::value, src.num_channels());
+
+    ValidRegion valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(filter_size / 2));
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        Coordinates coord = index2coord(src.shape(), i);
+
+        if(!is_in_valid_region(valid_region, coord))
+        {
+            continue;
+        }
+
+        switch(gradient_dimension)
+        {
+            case GradientDimension::GRAD_X:
+                apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ filter_size, filter_size }, derivative_3_x.data(), 1.f, border_mode,
+                                        constant_border_value);
+                break;
+            case GradientDimension::GRAD_Y:
+                apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ filter_size, filter_size }, derivative_3_y.data(), 1.f, border_mode,
+                                        constant_border_value);
+                break;
+            case GradientDimension::GRAD_XY:
+                apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ filter_size, filter_size }, derivative_3_x.data(), 1.f, border_mode,
+                                        constant_border_value);
+                apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ filter_size, filter_size }, derivative_3_y.data(), 1.f, border_mode,
+                                        constant_border_value);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Gradient dimension not supported");
+        }
+    }
+
+    return std::make_pair(dst_x, dst_y);
+}
+
+template std::pair<SimpleTensor<int16_t>, SimpleTensor<int16_t>> derivative(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value,
+                                                                            GradientDimension gradient_dimension);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Derivative.h b/tests/validation/reference/Derivative.h
new file mode 100644
index 0000000..27664a7
--- /dev/null
+++ b/tests/validation/reference/Derivative.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DERIVATIVE_H__
+#define __ARM_COMPUTE_TEST_DERIVATIVE_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename U>
+std::pair<SimpleTensor<T>, SimpleTensor<T>> derivative(const SimpleTensor<U> &src, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_DERIVATIVE_H__ */
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
index c24881e..5384715 100644
--- a/tests/validation/reference/FullyConnectedLayer.cpp
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -138,7 +138,7 @@
 
         acc = asymm_rounding_divide_by_pow2(asymm_int_mult(acc, output_multiplier), output_shift);
         acc += output_offset;
-        acc = clamp<int32_t>(acc, 0, 255);
+        acc = utility::clamp<int32_t>(acc, 0, 255);
 
         // Store the result
         dst_ptr[y] = static_cast<uint8_t>(acc);
diff --git a/tests/validation/reference/GEMMTranspose1xW.h b/tests/validation/reference/GEMMTranspose1xW.h
new file mode 100644
index 0000000..d6a2e89
--- /dev/null
+++ b/tests/validation/reference/GEMMTranspose1xW.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GEMM.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gemm_transpose_1xW(const SimpleTensor<T> &in)
+{
+    const int         W = 16 / sizeof(T);
+    const TensorShape shape_out(static_cast<size_t>(in.shape().y() * W), static_cast<size_t>(std::ceil(in.shape().x() / static_cast<float>(W))));
+    SimpleTensor<T>   out(shape_out, in.data_type());
+    const int32_t     in_height     = in.shape().y();
+    const int32_t     in_width      = in.shape().x();
+    const int32_t     out_width     = out.shape().x();
+    const T          *in_base_addr  = reinterpret_cast<const T *>(in.data());
+    T                *out_base_addr = reinterpret_cast<T *>(out.data());
+    int               x             = 0;
+    for(; x < in_width; x += W)
+    {
+        for(int y = 0; y < in_height; y++)
+        {
+            const T *in_addr  = (in_base_addr + x + y * in_width);
+            T       *out_addr = (out_base_addr + y * W + (x / W) * out_width);
+
+            for(int k = 0; k < W; ++k)
+            {
+                // If the input width is not multiple of W, we fill the reference with 0s
+                if((x + k) >= in_width)
+                {
+                    out_addr[k] = T(0);
+                }
+                else
+                {
+                    out_addr[k] = in_addr[k];
+                }
+            }
+        }
+    }
+    return out;
+}
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/HOGDescriptor.cpp b/tests/validation/reference/HOGDescriptor.cpp
new file mode 100644
index 0000000..369ac74
--- /dev/null
+++ b/tests/validation/reference/HOGDescriptor.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "HOGDescriptor.h"
+
+#include "Derivative.h"
+#include "Magnitude.h"
+#include "Phase.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+template <typename T>
+void hog_orientation_compute(const SimpleTensor<T> &mag, const SimpleTensor<T> &phase, std::vector<T> &bins, const HOGInfo &hog_info)
+{
+    const size_t num_bins    = hog_info.num_bins();
+    const size_t cell_height = hog_info.cell_size().height;
+    const size_t cell_width  = hog_info.cell_size().width;
+
+    float phase_scale = (PhaseType::SIGNED == hog_info.phase_type() ? num_bins / 360.0f : num_bins / 180.0f);
+    phase_scale *= (PhaseType::SIGNED == hog_info.phase_type() ? 360.0f / 255.0f : 1.0f);
+
+    int row_idx = 0;
+    for(size_t yc = 0; yc < cell_height; ++yc)
+    {
+        for(size_t xc = 0; xc < cell_height; xc++)
+        {
+            const float mag_value   = mag[(row_idx + xc)];
+            const float phase_value = phase[(row_idx + xc)] * phase_scale + 0.5f;
+            const float w1          = phase_value - floor(phase_value);
+
+            // The quantised phase is the histogram index [0, num_bins - 1]
+            // Check limit of histogram index. If hidx == num_bins, hidx = 0
+            const auto hidx = static_cast<unsigned int>(phase_value) % num_bins;
+
+            // Weighted vote between 2 bins
+            bins[hidx] += mag_value * (1.0f - w1);
+            bins[(hidx + 1) % num_bins] += mag_value * w1;
+        }
+
+        row_idx += cell_width;
+    }
+}
+
+template <typename T>
+void hog_block_normalization_compute(SimpleTensor<T> &block, SimpleTensor<T> &desc, const HOGInfo &hog_info, size_t block_idx)
+{
+    const int         num_bins_per_block = desc.num_channels();
+    const HOGNormType norm_type          = hog_info.normalization_type();
+    const Coordinates id                 = index2coord(desc.shape(), block_idx);
+
+    float sum = 0.0f;
+
+    // Calculate sum
+    for(int i = 0; i < num_bins_per_block; ++i)
+    {
+        const float val = block[i];
+        sum += (norm_type == HOGNormType::L1_NORM) ? std::fabs(val) : val * val;
+    }
+
+    // Calculate normalization scale
+    float scale = 1.0f / (std::sqrt(sum) + num_bins_per_block * 0.1f);
+
+    if(norm_type == HOGNormType::L2HYS_NORM)
+    {
+        // Reset sum
+        sum = 0.0f;
+        for(int i = 0; i < num_bins_per_block; ++i)
+        {
+            float val = block[i] * scale;
+
+            // Clip scaled input_value if over l2_hyst_threshold
+            val = fmin(val, hog_info.l2_hyst_threshold());
+            sum += val * val;
+            block[i] = val;
+        }
+
+        // We use the same constants of OpenCV
+        scale = 1.0f / (std::sqrt(sum) + 1e-3f);
+    }
+
+    for(int i = 0; i < num_bins_per_block; ++i)
+    {
+        block[i] *= scale;
+        reinterpret_cast<float *>(desc(id))[i] = block[i];
+    }
+}
+} // namespace
+
+template <typename T, typename U, typename V>
+void hog_orientation_binning(const SimpleTensor<T> &mag, const SimpleTensor<U> &phase, SimpleTensor<V> &hog_space, const HOGInfo &hog_info)
+{
+    const size_t cell_width   = hog_info.cell_size().width;
+    const size_t cell_height  = hog_info.cell_size().height;
+    const size_t shape_width  = hog_space.shape().x() * hog_info.cell_size().width;
+    const size_t shape_height = hog_space.shape().y() * hog_info.cell_size().height;
+
+    SimpleTensor<V> mag_cell(TensorShape(cell_width, cell_height), DataType::F32);
+    SimpleTensor<V> phase_cell(TensorShape(cell_width, cell_height), DataType::F32);
+
+    int cell_idx = 0;
+    int y_offset = 0;
+    int x_offset = 0;
+
+    // Traverse shape
+    for(auto sy = cell_height - 1; sy < shape_height; sy += cell_height)
+    {
+        x_offset = 0;
+        for(auto sx = cell_width - 1; sx < shape_width; sx += cell_width)
+        {
+            int row_idx  = 0;
+            int elem_idx = 0;
+
+            // Traverse cell
+            for(auto y = 0u; y < cell_height; ++y)
+            {
+                for(auto x = 0u; x < cell_width; ++x)
+                {
+                    int shape_idx        = x + row_idx + x_offset + y_offset;
+                    mag_cell[elem_idx]   = mag[shape_idx];
+                    phase_cell[elem_idx] = phase[shape_idx];
+                    elem_idx++;
+                }
+
+                row_idx += shape_width;
+            }
+
+            // Partition magnitude values into bins based on phase values
+            std::vector<V> bins(hog_info.num_bins());
+            hog_orientation_compute(mag_cell, phase_cell, bins, hog_info);
+
+            for(size_t i = 0; i < hog_info.num_bins(); ++i)
+            {
+                hog_space[cell_idx * hog_info.num_bins() + i] = bins[i];
+            }
+
+            x_offset += cell_width;
+            cell_idx++;
+        }
+
+        y_offset += (cell_height * shape_width);
+    }
+}
+
+template <typename T>
+void hog_block_normalization(SimpleTensor<T> &desc, const SimpleTensor<T> &hog_space, const HOGInfo &hog_info)
+{
+    const Size2D cells_per_block        = hog_info.num_cells_per_block();
+    const Size2D cells_per_block_stride = hog_info.num_cells_per_block_stride();
+
+    const size_t block_width         = hog_info.block_size().width;
+    const size_t block_height        = hog_info.block_size().height;
+    const size_t block_stride_width  = hog_info.block_stride().width;
+    const size_t block_stride_height = hog_info.block_stride().height;
+    const size_t shape_width         = hog_space.shape().x() * hog_info.cell_size().width;
+    const size_t shape_height        = hog_space.shape().y() * hog_info.cell_size().height;
+
+    const size_t num_bins     = hog_info.num_bins();
+    const size_t num_channels = cells_per_block.area() * num_bins;
+
+    SimpleTensor<T> block(TensorShape{ 1u, 1u }, DataType::F32, num_channels);
+
+    int block_idx      = 0;
+    int block_y_offset = 0;
+
+    // Traverse shape
+    for(auto sy = block_width - 1; sy < shape_height; sy += block_stride_height)
+    {
+        int block_x_offset = 0;
+        for(auto sx = block_height - 1; sx < shape_width; sx += block_stride_width)
+        {
+            int cell_y_offset = 0;
+            int elem_idx      = 0;
+
+            // Traverse block
+            for(auto y = 0u; y < cells_per_block.height; ++y)
+            {
+                int cell_x_offset = 0;
+                for(auto x = 0u; x < cells_per_block.width; ++x)
+                {
+                    for(auto bin = 0u; bin < num_bins; ++bin)
+                    {
+                        int idx         = bin + cell_x_offset + cell_y_offset + block_x_offset + block_y_offset;
+                        block[elem_idx] = hog_space[idx];
+                        elem_idx++;
+                    }
+
+                    cell_x_offset += num_bins;
+                }
+
+                cell_y_offset += hog_space.shape().x() * num_bins;
+            }
+
+            // Normalize block and write to descriptor
+            hog_block_normalization_compute(block, desc, hog_info, block_idx);
+
+            block_x_offset += cells_per_block_stride.width * num_bins;
+            block_idx++;
+        }
+
+        block_y_offset += cells_per_block_stride.height * num_bins * hog_space.shape().x();
+    }
+}
+
+template <typename T, typename U>
+SimpleTensor<T> hog_descriptor(const SimpleTensor<U> &src, BorderMode border_mode, U constant_border_value, const HOGInfo &hog_info)
+{
+    SimpleTensor<int16_t> _mag;
+    SimpleTensor<uint8_t> _phase;
+
+    SimpleTensor<int16_t> grad_x;
+    SimpleTensor<int16_t> grad_y;
+
+    // Create tensor info for HOG descriptor
+    TensorInfo      desc_info(hog_info, src.shape().x(), src.shape().y());
+    SimpleTensor<T> desc(desc_info.tensor_shape(), DataType::F32, desc_info.num_channels());
+
+    // Create HOG space tensor (num_cells_x, num_cells_y)
+    TensorShape hog_space_shape(src.shape().x() / hog_info.cell_size().width,
+                                src.shape().y() / hog_info.cell_size().height);
+
+    // For each cell a histogram with a num_bins is created
+    TensorInfo      info_hog_space(hog_space_shape, hog_info.num_bins(), DataType::F32);
+    SimpleTensor<T> hog_space(info_hog_space.tensor_shape(), DataType::F32, info_hog_space.num_channels());
+
+    // Calculate derivative
+    std::tie(grad_x, grad_y) = derivative<int16_t>(src, border_mode, constant_border_value, GradientDimension::GRAD_XY);
+
+    // Calculate magnitude and phase
+    _mag   = magnitude(grad_x, grad_y, MagnitudeType::L2NORM);
+    _phase = phase(grad_x, grad_y, hog_info.phase_type());
+
+    // For each cell create histogram based on magnitude and phase
+    hog_orientation_binning(_mag, _phase, hog_space, hog_info);
+
+    // Normalize histograms based on block size
+    hog_block_normalization(desc, hog_space, hog_info);
+
+    return desc;
+}
+
+template SimpleTensor<float> hog_descriptor(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value, const HOGInfo &hog_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/HOGDescriptor.h b/tests/validation/reference/HOGDescriptor.h
new file mode 100644
index 0000000..e886445
--- /dev/null
+++ b/tests/validation/reference/HOGDescriptor.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_HOG_DESCRIPTOR_H__
+#define __ARM_COMPUTE_TEST_HOG_DESCRIPTOR_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename U>
+SimpleTensor<T> hog_descriptor(const SimpleTensor<U> &src, BorderMode border_mode, U constant_border_value, const HOGInfo &hog_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_HOG_DESCRIPTOR_H__ */
diff --git a/tests/validation/reference/NormalizePlanarYUVLayer.cpp b/tests/validation/reference/NormalizePlanarYUVLayer.cpp
new file mode 100644
index 0000000..2442943
--- /dev/null
+++ b/tests/validation/reference/NormalizePlanarYUVLayer.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NormalizePlanarYUVLayer.h"
+
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+// NormalizePlanarYUV Layer for floating point type
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type *>
+SimpleTensor<T> normalize_planar_yuv_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &mean, const SimpleTensor<T> &sd)
+{
+    SimpleTensor<T> result(src.shape(), src.data_type());
+
+    const auto cols       = static_cast<int>(src.shape()[0]);
+    const auto rows       = static_cast<int>(src.shape()[1]);
+    const auto depth      = static_cast<int>(src.shape()[2]);
+    const int  upper_dims = src.shape().total_size() / (cols * rows * depth);
+
+    for(int r = 0; r < upper_dims; ++r)
+    {
+        for(int i = 0; i < depth; ++i)
+        {
+            for(int k = 0; k < rows; ++k)
+            {
+                for(int l = 0; l < cols; ++l)
+                {
+                    const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
+                    result[pos]   = (src[pos] - mean[i]) / sd[i];
+                }
+            }
+        }
+    }
+    return result;
+}
+
+template SimpleTensor<half> normalize_planar_yuv_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &mean, const SimpleTensor<half> &sd);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/NormalizePlanarYUVLayer.h b/tests/validation/reference/NormalizePlanarYUVLayer.h
new file mode 100644
index 0000000..c8740a3
--- /dev/null
+++ b/tests/validation/reference/NormalizePlanarYUVLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_H__
+#define __ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
+SimpleTensor<T> normalize_planar_yuv_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &mean, const SimpleTensor<T> &sd);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_H__ */
diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp
index 727325f..0cc96ab 100644
--- a/tests/validation/reference/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -22,10 +22,9 @@
  * SOFTWARE.
  */
 
-#include "arm_compute/core/Helpers.h"
-
 #include "Scale.h"
 #include "Utils.h"
+#include "arm_compute/core/utils/misc/utility.h"
 #include "support/ToolchainSupport.h"
 
 namespace arm_compute
@@ -119,8 +118,8 @@
                     }
                     else if(border_mode == BorderMode::REPLICATE)
                     {
-                        id.set(0, clamp(static_cast<int>(x_src), 0, width - 1));
-                        id.set(1, clamp(static_cast<int>(y_src), 0, height - 1));
+                        id.set(0, utility::clamp<int>(x_src, 0, width - 1));
+                        id.set(1, utility::clamp<int>(y_src, 0, height - 1));
                         out[element_idx] = in[coord2index(in.shape(), id)];
                     }
                 }