arm_compute v19.02

Change-Id: I853a3ecf38f206da13c1b03640c8adf73c20477c
diff --git a/tests/validation/reference/Comparisons.cpp b/tests/validation/reference/Comparisons.cpp
new file mode 100644
index 0000000..a83c365
--- /dev/null
+++ b/tests/validation/reference/Comparisons.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Comparisons.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+template <typename T>
+uint8_t compare_op(ComparisonOperation op, T src1, T src2)
+{
+    uint8_t result = 0;
+    switch(op)
+    {
+        case ComparisonOperation::Equal:
+            result = static_cast<uint8_t>(src1 == src2);
+            break;
+        case ComparisonOperation::NotEqual:
+            result = static_cast<uint8_t>(src1 != src2);
+            break;
+        case ComparisonOperation::GreaterEqual:
+            result = static_cast<uint8_t>(src1 >= src2);
+            break;
+        case ComparisonOperation::Greater:
+            result = static_cast<uint8_t>(src1 > src2);
+            break;
+        case ComparisonOperation::LessEqual:
+            result = static_cast<uint8_t>(src1 <= src2);
+            break;
+        case ComparisonOperation::Less:
+            result = static_cast<uint8_t>(src1 < src2);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Unsupported operation");
+    }
+    return (result != 0) ? 255 : 0;
+}
+
+template <size_t dim>
+struct BroadcastUnroll
+{
+    template <typename T>
+    static void unroll(ComparisonOperation    op,
+                       const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<uint8_t> &dst,
+                       Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
+    {
+        const bool src1_is_broadcast = (src1.shape()[dim - 1] != dst.shape()[dim - 1]);
+        const bool src2_is_broadcast = (src2.shape()[dim - 1] != dst.shape()[dim - 1]);
+
+        id_src1.set(dim - 1, 0);
+        id_src2.set(dim - 1, 0);
+        id_dst.set(dim - 1, 0);
+
+        for(size_t i = 0; i < dst.shape()[dim - 1]; ++i, ++id_dst[dim - 1])
+        {
+            BroadcastUnroll < dim - 1 >::unroll(op, src1, src2, dst, id_src1, id_src2, id_dst);
+
+            id_src1[dim - 1] += !src1_is_broadcast;
+            id_src2[dim - 1] += !src2_is_broadcast;
+        }
+    }
+};
+
+template <>
+struct BroadcastUnroll<0>
+{
+    template <typename T>
+    static void unroll(ComparisonOperation    op,
+                       const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<uint8_t> &dst,
+                       Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
+    {
+        dst[coord2index(dst.shape(), id_dst)] = compare_op(op, src1[coord2index(src1.shape(), id_src1)], src2[coord2index(src2.shape(), id_src2)]);
+    }
+};
+} // namespace
+
+template <typename T>
+SimpleTensor<uint8_t> compare(ComparisonOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2)
+{
+    SimpleTensor<uint8_t> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), DataType::U8);
+
+    Coordinates id_src1, id_src2, id_dst;
+    BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, id_src1, id_src2, id_dst);
+    return dst;
+}
+
+template <>
+SimpleTensor<uint8_t> compare(ComparisonOperation op, const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2)
+{
+    SimpleTensor<uint8_t> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), DataType::U8);
+
+    if(src1.data_type() == DataType::QASYMM8)
+    {
+        SimpleTensor<float> src1_tmp = convert_from_asymmetric(src1);
+        SimpleTensor<float> src2_tmp = convert_from_asymmetric(src2);
+
+        Coordinates id_src1, id_src2, id_dst;
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst, id_src1, id_src2, id_dst);
+    }
+    else
+    {
+        // DataType::U8
+        Coordinates id_src1, id_src2, id_dst;
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, id_src1, id_src2, id_dst);
+    }
+    return dst;
+}
+
+template SimpleTensor<uint8_t> compare(ComparisonOperation op, const SimpleTensor<half> &src1, const SimpleTensor<half> &src2);
+template SimpleTensor<uint8_t> compare(ComparisonOperation op, const SimpleTensor<float> &src1, const SimpleTensor<float> &src2);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Comparisons.h b/tests/validation/reference/Comparisons.h
new file mode 100644
index 0000000..4e054ad
--- /dev/null
+++ b/tests/validation/reference/Comparisons.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_COMPARISONS_H__
+#define __ARM_COMPUTE_TEST_COMPARISONS_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<uint8_t> compare(ComparisonOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_COMPARISONS_H__ */
diff --git a/tests/validation/reference/ComputeAllAnchors.cpp b/tests/validation/reference/ComputeAllAnchors.cpp
index 48f4767..3f04980 100644
--- a/tests/validation/reference/ComputeAllAnchors.cpp
+++ b/tests/validation/reference/ComputeAllAnchors.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/validation/reference/ComputeAllAnchors.h b/tests/validation/reference/ComputeAllAnchors.h
index b21bf3c..8fa5eab 100644
--- a/tests/validation/reference/ComputeAllAnchors.h
+++ b/tests/validation/reference/ComputeAllAnchors.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 5ca3b44..9167924 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,7 +35,7 @@
 {
 template <typename T, typename TB>
 SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape,
-                                    const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a)
+                                    const PadStrideInfo &info)
 {
     // Create reference
     const int stride_x           = info.stride().first;
@@ -45,8 +45,8 @@
     const int weights_upper_dims = weights.shape().total_size() / (weights_width * weights_height);
 
     // Find the upsampled dimensions
-    unsigned int out_x = (src.shape().x() - 1) * stride_x + a.first + 1;
-    unsigned int out_y = (src.shape().y() - 1) * stride_y + a.second + 1;
+    unsigned int out_x = (src.shape().x() - 1) * stride_x + 1;
+    unsigned int out_y = (src.shape().y() - 1) * stride_y + 1;
 
     // Find the padding needed for the convolution with stride 1 in order to match output shape
     unsigned int padx = output_shape.x() - (out_x - weights_width + 1);
@@ -64,13 +64,8 @@
     const int width_scaled  = scaled.shape().x();
     const int height_scaled = scaled.shape().y();
     const int num_2d_slices = src.shape().total_size() / (width_in * height_in);
-    const int ax            = a.first;  // The number of zeros added to right edge of the input.
-    const int ay            = a.second; // The number of zeros added to top edge of the input.
     ARM_COMPUTE_ERROR_ON(info.pad().first > (weights.shape().x() - 1));
 
-    ARM_COMPUTE_ERROR_ON_MSG(ax > stride_x - 1, "ax must be smaller than stride_x");
-    ARM_COMPUTE_ERROR_ON_MSG(ay > stride_y - 1, "ay must be smaller than stride_y");
-
     if(src.data_type() == DataType::QASYMM8)
     {
         const uint8_t quantized_zero = src.quantization_info().offset;
@@ -100,9 +95,9 @@
         const int offset_slice_in  = slice * width_in * height_in;
         const int offset_slice_out = slice * width_scaled * height_scaled;
         const int start_x          = padx / 2;
-        const int start_y          = ay + pady / 2;
+        const int start_y          = pady / 2;
         const int end_y            = height_scaled - pady / 2;
-        const int end_x            = width_scaled - ax - padx / 2;
+        const int end_x            = width_scaled - padx / 2;
 
         for(int yi = start_y, in_y = 0; yi < end_y; yi += stride_y, in_y++)
         {
@@ -120,11 +115,11 @@
 }
 
 template SimpleTensor<uint8_t> deconvolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
-                                                   const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a);
+                                                   const PadStrideInfo &info);
 template SimpleTensor<float> deconvolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
-                                                 const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a);
+                                                 const PadStrideInfo &info);
 template SimpleTensor<half> deconvolution_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &output_shape,
-                                                const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a);
+                                                const PadStrideInfo &info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DeconvolutionLayer.h b/tests/validation/reference/DeconvolutionLayer.h
index 95fb416..21583e3 100644
--- a/tests/validation/reference/DeconvolutionLayer.h
+++ b/tests/validation/reference/DeconvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,8 +46,7 @@
  *
  */
 template <typename T, typename TB>
-SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
-                                    const std::pair<unsigned int, unsigned int> &a);
+SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthConcatenateLayer.cpp b/tests/validation/reference/DepthConcatenateLayer.cpp
index 90fbd91..139675d 100644
--- a/tests/validation/reference/DepthConcatenateLayer.cpp
+++ b/tests/validation/reference/DepthConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs)
+SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst)
 {
     // Create reference
     std::vector<TensorShape> shapes;
@@ -44,20 +44,40 @@
         shapes.emplace_back(src.shape());
     }
 
-    DataType        dst_type  = srcs.empty() ? DataType::UNKNOWN : srcs[0].data_type();
-    TensorShape     dst_shape = calculate_depth_concatenate_shape(shapes);
-    SimpleTensor<T> dst(dst_shape, dst_type);
-
     // Compute reference
-    int       depth_offset = 0;
-    const int width_out    = dst.shape().x();
-    const int height_out   = dst.shape().y();
-    const int depth_out    = dst.shape().z();
-    const int out_stride_z = width_out * height_out;
-    const int batches      = dst.shape().total_size_upper(3);
-
-    // Set output tensor to 0
-    std::fill_n(dst.data(), dst.num_elements(), 0);
+    int       depth_offset                = 0;
+    const int width_out                   = dst.shape().x();
+    const int height_out                  = dst.shape().y();
+    const int depth_out                   = dst.shape().z();
+    const int out_stride_z                = width_out * height_out;
+    const int batches                     = dst.shape().total_size_upper(3);
+    auto have_different_quantization_info = [&](const SimpleTensor<T> &tensor)
+    {
+        return tensor.quantization_info() != dst.quantization_info();
+    };
+    if(srcs[0].data_type() == DataType::QASYMM8 && std::any_of(srcs.cbegin(), srcs.cend(), have_different_quantization_info))
+    {
+        for(int b = 0; b < batches; ++b)
+        {
+            // input tensors can have smaller width and height than the output, so for each output's slice we need to requantize 0 (as this is the value
+            // used in NEFillBorderKernel by NEDepthConcatenateLayer) using the corresponding quantization info for that particular slice/input tensor.
+            int slice = 0;
+            for(const auto &src : srcs)
+            {
+                auto       ptr_slice = static_cast<T *>(dst(Coordinates(0, 0, slice, b)));
+                const auto num_elems_in_slice((dst.num_elements() / depth_out) * src.shape().z());
+                std::transform(ptr_slice, ptr_slice + num_elems_in_slice, ptr_slice, [src, dst](T t)
+                {
+                    return dst.quantization_info().quantize(src.quantization_info().dequantize(0), RoundingPolicy::TO_NEAREST_UP);
+                });
+                slice += src.shape().z();
+            }
+        }
+    }
+    else
+    {
+        std::fill_n(dst.data(), dst.num_elements(), 0);
+    }
 
     for(const auto &src : srcs)
     {
@@ -80,8 +100,20 @@
             {
                 for(int r = 0; r < height; ++r)
                 {
-                    std::copy(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out);
-                    src_ptr += width;
+                    if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info())
+                    {
+                        std::transform(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out, [src, dst](T t)
+                        {
+                            const float dequantized_input = src.quantization_info().dequantize(t);
+                            return dst.quantization_info().quantize(dequantized_input, RoundingPolicy::TO_NEAREST_UP);
+                        });
+                        src_ptr += width;
+                    }
+                    else
+                    {
+                        std::copy(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out);
+                        src_ptr += width;
+                    }
                 }
             }
         }
@@ -92,9 +124,9 @@
     return dst;
 }
 
-template SimpleTensor<uint8_t> depthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs);
-template SimpleTensor<float> depthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs);
-template SimpleTensor<half> depthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs);
+template SimpleTensor<uint8_t> depthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst);
+template SimpleTensor<float> depthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst);
+template SimpleTensor<half> depthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthConcatenateLayer.h b/tests/validation/reference/DepthConcatenateLayer.h
index 3c486a8..8a78441 100644
--- a/tests/validation/reference/DepthConcatenateLayer.h
+++ b/tests/validation/reference/DepthConcatenateLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs);
+SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp
index fd2e0ae..6d9f98d 100644
--- a/tests/validation/reference/DepthConvertLayer.cpp
+++ b/tests/validation/reference/DepthConvertLayer.cpp
@@ -25,6 +25,9 @@
 
 #include "tests/validation/Helpers.h"
 
+#include "arm_compute/core/utils/misc/Rounding.h"
+#include "arm_compute/core/utils/misc/SaturateCast.h"
+
 #include "tests/Types.h"
 
 namespace arm_compute
@@ -35,17 +38,24 @@
 {
 namespace reference
 {
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type >
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&!std::is_same<T1, T2>::value, int >::type >
 SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
 {
     SimpleTensor<T2> result(src.shape(), dt_out);
 
     // Up-casting
-    if(src.data_type() <= dt_out)
+    if(element_size_from_data_type(src.data_type()) < element_size_from_data_type(dt_out))
     {
         for(int i = 0; i < src.num_elements(); ++i)
         {
-            result[i] = src[i] << shift;
+            if(is_data_type_quantized(src.data_type()))
+            {
+                result[i] = scvt_f32_qasymm8(src[i], src.quantization_info().scale, src.quantization_info().offset);
+            }
+            else
+            {
+                result[i] = src[i] << shift;
+            }
         }
     }
     // Down-casting
@@ -54,48 +64,118 @@
         for(int i = 0; i < src.num_elements(); ++i)
         {
             T1 val    = src[i] >> shift;
-            result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val);
+            result[i] = (policy == ConvertPolicy::SATURATE) ? utils::cast::saturate_cast<T2>(val) : static_cast<T2>(val);
         }
     }
     return result;
 }
 
-template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&is_floating_point<T2>::value &&!std::is_same<T1, T2>::value, int >::type >
+template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&!std::is_same<T1, T2>::value, int >::type >
 SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
 {
     SimpleTensor<T2> result(src.shape(), dt_out);
+    ARM_COMPUTE_ERROR_ON(shift != 0);
+    ARM_COMPUTE_UNUSED(policy, shift);
 
-    const uint32_t scale = 1 << shift;
-
-    // Up-casting
-    if(src.data_type() <= dt_out)
+    if(!is_floating_point<T2>::value)
     {
+        // Always saturate on floats
         for(int i = 0; i < src.num_elements(); ++i)
         {
-            result[i] = src[i] * static_cast<T2>(scale);
+            if(is_data_type_quantized(dt_out))
+            {
+                T1 val    = utils::rounding::round_half_away_from_zero(src[i]);
+                result[i] = sqcvt_qasymm8_f32(val, src.quantization_info().scale, src.quantization_info().offset);
+            }
+            else
+            {
+                T1 val    = utils::rounding::round_half_away_from_zero(src[i]);
+                result[i] = utils::cast::saturate_cast<T2>(val);
+            }
         }
     }
-    // Down-casting
     else
     {
         for(int i = 0; i < src.num_elements(); ++i)
         {
-            T1 val    = src[i] / static_cast<T1>(scale);
-            result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val);
+            result[i] = static_cast<T2>(src[i]);
         }
     }
     return result;
 }
 
+// U8
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// S8
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// U16
 template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// S16
 template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// U32
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// S32
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// F16
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<float> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// F32
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthConvertLayer.h b/tests/validation/reference/DepthConvertLayer.h
index 5d97c73..2113593 100644
--- a/tests/validation/reference/DepthConvertLayer.h
+++ b/tests/validation/reference/DepthConvertLayer.h
@@ -35,10 +35,10 @@
 {
 namespace reference
 {
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
 SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 
-template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&is_floating_point<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
+template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
 SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 } // namespace reference
 } // namespace validation
diff --git a/tests/validation/reference/ElementWiseUnary.cpp b/tests/validation/reference/ElementWiseUnary.cpp
new file mode 100644
index 0000000..ae7f256
--- /dev/null
+++ b/tests/validation/reference/ElementWiseUnary.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ElementWiseUnary.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op)
+{
+    SimpleTensor<T> dst(src.shape(), src.data_type());
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        switch(op)
+        {
+            case ElementWiseUnary::RSQRT:
+                dst[i] = 1.f / std::sqrt(src[i]);
+                break;
+            case ElementWiseUnary::EXP:
+                dst[i] = std::exp(src[i]);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Not implemented");
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, ElementWiseUnary op);
+template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, ElementWiseUnary op);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/ElementWiseUnary.h b/tests/validation/reference/ElementWiseUnary.h
new file mode 100644
index 0000000..9c6fe14
--- /dev/null
+++ b/tests/validation/reference/ElementWiseUnary.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_H__
+#define __ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_H__ */
diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp
new file mode 100644
index 0000000..6d533ed
--- /dev/null
+++ b/tests/validation/reference/ElementwiseOperations.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ElementwiseOperations.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+template <typename T>
+T arithm_op(ArithmeticOperation op, T src1, T src2, ConvertPolicy convert_policy)
+{
+    using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
+
+    intermediate_type val;
+
+    if(op == ArithmeticOperation::ADD)
+    {
+        val = static_cast<intermediate_type>(src1) + static_cast<intermediate_type>(src2);
+    }
+    else if(op == ArithmeticOperation::SUB)
+    {
+        val = static_cast<intermediate_type>(src1) - static_cast<intermediate_type>(src2);
+    }
+    else if(op == ArithmeticOperation::MIN)
+    {
+        val = std::min(static_cast<intermediate_type>(src1), static_cast<intermediate_type>(src2));
+    }
+    else if(op == ArithmeticOperation::MAX)
+    {
+        val = std::max(static_cast<intermediate_type>(src1), static_cast<intermediate_type>(src2));
+    }
+    else if(op == ArithmeticOperation::SQUARED_DIFF)
+    {
+        intermediate_type tmp = (static_cast<intermediate_type>(src1) - static_cast<intermediate_type>(src2));
+        val                   = tmp * tmp;
+    }
+    else if(op == ArithmeticOperation::DIV)
+    {
+        val = (static_cast<intermediate_type>(src1) / static_cast<intermediate_type>(src2));
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR("Not handled");
+    }
+
+    T result;
+    if(op == ArithmeticOperation::ADD || op == ArithmeticOperation::SUB || op == ArithmeticOperation::DIV)
+    {
+        result = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T>(val) : static_cast<T>(val);
+    }
+    else
+    {
+        result = static_cast<T>(val);
+    }
+    return result;
+}
+
+template <size_t dim>
+struct BroadcastUnroll
+{
+    template <typename T>
+    static void unroll(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst,
+                       ConvertPolicy convert_policy, Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
+    {
+        const bool src1_is_broadcast = (src1.shape()[dim - 1] != dst.shape()[dim - 1]);
+        const bool src2_is_broadcast = (src2.shape()[dim - 1] != dst.shape()[dim - 1]);
+
+        id_src1.set(dim - 1, 0);
+        id_src2.set(dim - 1, 0);
+        id_dst.set(dim - 1, 0);
+
+        for(size_t i = 0; i < dst.shape()[dim - 1]; ++i, ++id_dst[dim - 1])
+        {
+            BroadcastUnroll < dim - 1 >::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
+
+            id_src1[dim - 1] += !src1_is_broadcast;
+            id_src2[dim - 1] += !src2_is_broadcast;
+        }
+    }
+};
+
+template <>
+struct BroadcastUnroll<0>
+{
+    template <typename T>
+    static void unroll(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst,
+                       ConvertPolicy convert_policy, Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
+    {
+        dst[coord2index(dst.shape(), id_dst)] = arithm_op(op, src1[coord2index(src1.shape(), id_src1)], src2[coord2index(src2.shape(), id_src2)], convert_policy);
+    }
+};
+} // namespace
+
+template <typename T>
+SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst, ConvertPolicy convert_policy)
+{
+    Coordinates id_src1, id_src2, id_dst;
+
+    BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
+
+    return dst;
+}
+
+template <>
+SimpleTensor<uint8_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2, SimpleTensor<uint8_t> &dst, ConvertPolicy convert_policy)
+{
+    if(dst.data_type() == DataType::QASYMM8)
+    {
+        SimpleTensor<float> src1_tmp = convert_from_asymmetric(src1);
+        SimpleTensor<float> src2_tmp = convert_from_asymmetric(src2);
+        SimpleTensor<float> dst_tmp(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst.data_type());
+
+        Coordinates id_src1, id_src2, id_dst;
+
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst);
+
+        dst = convert_to_asymmetric(dst_tmp, dst.quantization_info());
+        return dst;
+    }
+    else
+    {
+        // DataType::U8
+        Coordinates id_src1, id_src2, id_dst;
+
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
+
+        return dst;
+    }
+}
+
+template SimpleTensor<int32_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int32_t> &src1, const SimpleTensor<int32_t> &src2, SimpleTensor<int32_t> &dst,
+                                                    ConvertPolicy convert_policy);
+template SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, SimpleTensor<int16_t> &dst,
+                                                    ConvertPolicy convert_policy);
+template SimpleTensor<int8_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int8_t> &src1, const SimpleTensor<int8_t> &src2, SimpleTensor<int8_t> &dst,
+                                                   ConvertPolicy convert_policy);
+template SimpleTensor<half> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<half> &src1, const SimpleTensor<half> &src2, SimpleTensor<half> &dst, ConvertPolicy convert_policy);
+template SimpleTensor<float> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<float> &src1, const SimpleTensor<float> &src2, SimpleTensor<float> &dst, ConvertPolicy convert_policy);
+
+template <typename T>
+SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(dst_data_type == DataType::QASYMM8, "For QASYMM8, the quantized output tensor should be passed directly.");
+
+    SimpleTensor<T> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst_data_type);
+    arithmetic_operation<T>(op, src1, src2, dst, convert_policy);
+    return dst;
+}
+
+template SimpleTensor<int32_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int32_t> &src1, const SimpleTensor<int32_t> &src2, DataType dst_data_type,
+                                                    ConvertPolicy convert_policy);
+template SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, DataType dst_data_type,
+                                                    ConvertPolicy convert_policy);
+template SimpleTensor<int8_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int8_t> &src1, const SimpleTensor<int8_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+template SimpleTensor<half> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<half> &src1, const SimpleTensor<half> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+template SimpleTensor<float> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<float> &src1, const SimpleTensor<float> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/ElementwiseOperations.h b/tests/validation/reference/ElementwiseOperations.h
new file mode 100644
index 0000000..7518ec8
--- /dev/null
+++ b/tests/validation/reference/ElementwiseOperations.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_ELEMENTWISE_OPERATIONS_H__
+#define __ARM_COMPUTE_TEST_ELEMENTWISE_OPERATIONS_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst, ConvertPolicy convert_policy = ConvertPolicy::WRAP);
+
+template <typename T>
+SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy = ConvertPolicy::WRAP);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_ELEMENTWISE_OPERATIONS_H__ */
diff --git a/tests/validation/reference/GEMMReshapeLHSMatrix.cpp b/tests/validation/reference/GEMMReshapeLHSMatrix.cpp
new file mode 100644
index 0000000..431d656
--- /dev/null
+++ b/tests/validation/reference/GEMMReshapeLHSMatrix.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GEMMReshapeLHSMatrix.h"
+
+#include "arm_compute/core/Types.h"
+
+#include "tests/validation/Helpers.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gemm_reshape_lhs_matrix(const SimpleTensor<T> &in, const TensorShape &output_shape, const GEMMLHSMatrixInfo &lhs_info)
+{
+    ARM_COMPUTE_ERROR_ON(in.shape().num_dimensions() > 3);
+
+    SimpleTensor<T> out{ output_shape, in.data_type() };
+
+    // Initialize the output tensor with zero
+    std::memset(&out[0], 0, out.num_elements() * sizeof(T));
+
+    const unsigned int K = in.shape()[0];
+    const unsigned int M = in.shape()[1];
+    const unsigned int B = in.shape()[2];
+
+    const unsigned int num_tiles_x = std::ceil(K / static_cast<float>(lhs_info.k0));
+    const unsigned int num_tiles_y = std::ceil(M / static_cast<float>(lhs_info.m0));
+
+    const TensorShape tile_dims(lhs_info.k0, lhs_info.m0);
+    const TensorShape tile_dims_transposed(lhs_info.m0, lhs_info.k0);
+
+    // Simple tensor for the input tile
+    SimpleTensor<T> src_tile{ tile_dims, in.data_type() };
+
+    // Simple tensor for the input tile
+    SimpleTensor<T> src_tile_transposed{ tile_dims_transposed, in.data_type() };
+
+    // Simple tensor to use when storing the values
+    SimpleTensor<T> *tile_to_use = lhs_info.transpose ? &src_tile_transposed : &src_tile;
+
+    const unsigned int offset_output_x = lhs_info.interleave ? tile_to_use->shape()[0] : tile_to_use->shape()[0] * tile_to_use->shape()[1];
+    const unsigned int step_output_x   = lhs_info.interleave ? tile_to_use->shape()[0] * lhs_info.v0 : tile_to_use->shape()[0];
+
+    for(unsigned int z = 0; z < B; ++z)
+    {
+        for(unsigned int y = 0; y < num_tiles_y; ++y)
+        {
+            for(unsigned int x = 0; x < num_tiles_x; ++x)
+            {
+                // Get the tile from the input tensor
+                get_tile<T>(in, src_tile, Coordinates(x * lhs_info.k0, y * lhs_info.m0, z, 0));
+
+                if(lhs_info.transpose)
+                {
+                    // Transpose matrix
+                    transpose_matrix<T>(src_tile, src_tile_transposed);
+                }
+
+                // Store
+                const unsigned int offset_output = (x * lhs_info.k0 * lhs_info.m0 * lhs_info.v0) + ((y % lhs_info.v0) * offset_output_x) + ((y / lhs_info.v0) * out.shape()[0]) + (z * out.shape()[0] * out.shape()[1]);
+
+                for(unsigned int i = 0; i < tile_to_use->shape()[1]; ++i)
+                {
+                    const unsigned int offset_tile = i * tile_to_use->shape()[0];
+
+                    // Copy per row
+                    std::copy(&(*tile_to_use)[offset_tile], &(*tile_to_use)[offset_tile + tile_to_use->shape()[0]], &out[offset_output + i * step_output_x]);
+                }
+            }
+        }
+    }
+
+    return out;
+}
+template SimpleTensor<int> gemm_reshape_lhs_matrix(const SimpleTensor<int> &in, const TensorShape &output_shape, const GEMMLHSMatrixInfo &lhs_info);
+template SimpleTensor<short> gemm_reshape_lhs_matrix(const SimpleTensor<short> &in, const TensorShape &output_shape, const GEMMLHSMatrixInfo &lhs_info);
+template SimpleTensor<char> gemm_reshape_lhs_matrix(const SimpleTensor<char> &in, const TensorShape &output_shape, const GEMMLHSMatrixInfo &lhs_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/validation/reference/GEMMReshapeLHSMatrix.h b/tests/validation/reference/GEMMReshapeLHSMatrix.h
new file mode 100644
index 0000000..c0328db
--- /dev/null
+++ b/tests/validation/reference/GEMMReshapeLHSMatrix.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_GEMMRESHAPELHSMATRIX_H__
+#define __ARM_COMPUTE_TEST_GEMMRESHAPELHSMATRIX_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gemm_reshape_lhs_matrix(const SimpleTensor<T> &in, const TensorShape &output_shape, const GEMMLHSMatrixInfo &lhs_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_GEMMRESHAPELHSMATRIX_H__ */
\ No newline at end of file
diff --git a/tests/validation/reference/GEMMReshapeRHSMatrix.cpp b/tests/validation/reference/GEMMReshapeRHSMatrix.cpp
new file mode 100644
index 0000000..0224c5c
--- /dev/null
+++ b/tests/validation/reference/GEMMReshapeRHSMatrix.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GEMMReshapeRHSMatrix.h"
+
+#include "arm_compute/core/Types.h"
+
+#include "tests/validation/Helpers.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gemm_reshape_rhs_matrix(const SimpleTensor<T> &in, const TensorShape &output_shape, const GEMMRHSMatrixInfo &rhs_info)
+{
+    ARM_COMPUTE_ERROR_ON(in.shape().num_dimensions() > 3);
+
+    SimpleTensor<T> out{ output_shape, in.data_type() };
+
+    // Initialize the output tensor with zero
+    std::memset(&out[0], 0, out.num_elements() * sizeof(T));
+
+    const unsigned int N = in.shape()[0];
+    const unsigned int K = in.shape()[1];
+    const unsigned int B = in.shape()[2];
+
+    const unsigned int num_tiles_x = std::ceil(N / static_cast<float>(rhs_info.n0));
+    const unsigned int num_tiles_y = std::ceil(K / static_cast<float>(rhs_info.k0));
+
+    const TensorShape tile_dims(rhs_info.n0, rhs_info.k0);
+    const TensorShape tile_dims_transposed(rhs_info.k0, rhs_info.n0);
+
+    // Simple tensor for the input tile
+    SimpleTensor<T> src_tile{ tile_dims, in.data_type() };
+
+    // Simple tensor for the input tile
+    SimpleTensor<T> src_tile_transposed{ tile_dims_transposed, in.data_type() };
+
+    // Simple tensor to use when storing the values
+    SimpleTensor<T> *tile_to_use = rhs_info.transpose ? &src_tile_transposed : &src_tile;
+
+    const unsigned int offset_output_x = rhs_info.interleave ? tile_to_use->shape()[0] : tile_to_use->shape()[0] * tile_to_use->shape()[1];
+    const unsigned int step_output_x   = rhs_info.interleave ? tile_to_use->shape()[0] * rhs_info.h0 : tile_to_use->shape()[0];
+
+    for(unsigned int z = 0; z < B; ++z)
+    {
+        for(unsigned int y = 0; y < num_tiles_y; ++y)
+        {
+            for(unsigned int x = 0; x < num_tiles_x; ++x)
+            {
+                // Get the tile from the input tensor
+                get_tile<T>(in, src_tile, Coordinates(x * rhs_info.n0, y * rhs_info.k0, z, 0));
+
+                if(rhs_info.transpose)
+                {
+                    // Transpose matrix
+                    transpose_matrix<T>(src_tile, src_tile_transposed);
+                }
+
+                // Store
+                const unsigned int offset_output = (y * rhs_info.k0 * rhs_info.n0 * rhs_info.h0) + ((x % rhs_info.h0) * offset_output_x) + ((x / rhs_info.h0) * out.shape()[0]) + (z * out.shape()[0] * out.shape()[1]);
+
+                for(unsigned int i = 0; i < tile_to_use->shape()[1]; ++i)
+                {
+                    const unsigned int offset_tile = i * tile_to_use->shape()[0];
+
+                    // Copy per row
+                    std::copy(&(*tile_to_use)[offset_tile], &(*tile_to_use)[offset_tile + tile_to_use->shape()[0]], &out[offset_output + i * step_output_x]);
+                }
+            }
+        }
+    }
+
+    return out;
+}
+template SimpleTensor<int> gemm_reshape_rhs_matrix(const SimpleTensor<int> &in, const TensorShape &output_shape, const GEMMRHSMatrixInfo &rhs_info);
+template SimpleTensor<short> gemm_reshape_rhs_matrix(const SimpleTensor<short> &in, const TensorShape &output_shape, const GEMMRHSMatrixInfo &rhs_info);
+template SimpleTensor<char> gemm_reshape_rhs_matrix(const SimpleTensor<char> &in, const TensorShape &output_shape, const GEMMRHSMatrixInfo &rhs_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/validation/reference/GEMMReshapeRHSMatrix.h b/tests/validation/reference/GEMMReshapeRHSMatrix.h
new file mode 100644
index 0000000..8edcfd6
--- /dev/null
+++ b/tests/validation/reference/GEMMReshapeRHSMatrix.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_GEMMRESHAPERHSMATRIX_H__
+#define __ARM_COMPUTE_TEST_GEMMRESHAPERHSMATRIX_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gemm_reshape_rhs_matrix(const SimpleTensor<T> &in, const TensorShape &output_shape, const GEMMRHSMatrixInfo &rhs_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_GEMMRESHAPERHS_MATRIX_H__ */
\ No newline at end of file
diff --git a/tests/validation/reference/Gather.cpp b/tests/validation/reference/Gather.cpp
new file mode 100644
index 0000000..ab5ea2f
--- /dev/null
+++ b/tests/validation/reference/Gather.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Gather.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gather(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis)
+{
+    const auto       *indices_ptr = static_cast<const uint32_t *>(indices.data());
+    const TensorShape dst_shape   = arm_compute::misc::shape_calculator::compute_gather_shape(src.shape(), indices.shape(), actual_axis);
+    SimpleTensor<T>   dst(dst_shape, src.data_type());
+
+    Window win;
+    win.use_tensor_dimensions(dst_shape);
+    execute_window_loop(win, [&](const Coordinates & id)
+    {
+        Coordinates offset;
+        for(unsigned int dim = 0; dim < id.num_dimensions(); ++dim)
+        {
+            if(dim == actual_axis)
+            {
+                offset.set(dim, indices_ptr[id[dim]]);
+            }
+            else
+            {
+                offset.set(dim, id[dim]);
+            }
+        }
+        *reinterpret_cast<T *>(dst(id)) = *reinterpret_cast<const T *>(src(offset));
+    });
+
+    return dst;
+}
+
+template SimpleTensor<float> gather(const SimpleTensor<float> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis);
+template SimpleTensor<half> gather(const SimpleTensor<half> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis);
+template SimpleTensor<uint16_t> gather(const SimpleTensor<uint16_t> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis);
+template SimpleTensor<uint8_t> gather(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/validation/reference/Gather.h b/tests/validation/reference/Gather.h
new file mode 100644
index 0000000..54e1cb8
--- /dev/null
+++ b/tests/validation/reference/Gather.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_GATHER_H__
+#define __ARM_COMPUTE_TEST_GATHER_H__
+
+#include "arm_compute/core/Types.h"
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gather(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif /* __ARM_COMPUTE_TEST_GATHER_H__ */
diff --git a/tests/validation/reference/L2NormalizeLayer.cpp b/tests/validation/reference/L2NormalizeLayer.cpp
index fcd6226..43885b2 100644
--- a/tests/validation/reference/L2NormalizeLayer.cpp
+++ b/tests/validation/reference/L2NormalizeLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,7 +54,7 @@
     SimpleTensor<T> dst{ src.shape(), src.data_type() };
 
     // Reduce across given axis
-    SimpleTensor<T> sum = reduction_operation(src, get_output_shape(src.shape(), axis), axis, ReductionOperation::SUM_SQUARE);
+    SimpleTensor<T> sum = reduction_operation<T, T>(src, get_output_shape(src.shape(), axis), axis, ReductionOperation::SUM_SQUARE);
 
     // Compute reference
     const int upper_dims     = src.shape().total_size_upper(axis + 1);
diff --git a/tests/validation/reference/NonMaxSuppression.cpp b/tests/validation/reference/NonMaxSuppression.cpp
new file mode 100644
index 0000000..5b7980d
--- /dev/null
+++ b/tests/validation/reference/NonMaxSuppression.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NonMaxSuppression.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+using CandidateBox = std::pair<int /* index */, float /* score */>;
+using Box          = std::tuple<float, float, float, float>;
+
+inline float get_elem_by_coordinate(const SimpleTensor<float> &tensor, Coordinates coord)
+{
+    return *static_cast<const float *>(tensor(coord));
+}
+
+inline Box get_box(const SimpleTensor<float> &boxes, size_t id)
+{
+    return std::make_tuple(
+               get_elem_by_coordinate(boxes, Coordinates(0, id)),
+               get_elem_by_coordinate(boxes, Coordinates(1, id)),
+               get_elem_by_coordinate(boxes, Coordinates(2, id)),
+               get_elem_by_coordinate(boxes, Coordinates(3, id)));
+}
+
+// returns a pair (minX, minY)
+inline std::pair<float, float> get_min_yx(Box b)
+{
+    return std::make_pair(
+               std::min<float>(std::get<0>(b), std::get<2>(b)),
+               std::min<float>(std::get<1>(b), std::get<3>(b)));
+}
+// returns a pair (maxX, maxY)
+inline std::pair<float, float> get_max_yx(Box b)
+{
+    return std::make_pair(
+               std::max<float>(std::get<0>(b), std::get<2>(b)),
+               std::max<float>(std::get<1>(b), std::get<3>(b)));
+}
+
+inline float compute_size(const std::pair<float, float> &min, const std::pair<float, float> &max)
+{
+    return (max.first - min.first) * (max.second - min.second);
+}
+
+inline float compute_intersection(const std::pair<float, float> &b0_min, const std::pair<float, float> &b0_max,
+                                  const std::pair<float, float> &b1_min, const std::pair<float, float> &b1_max, float b0_size, float b1_size)
+{
+    const float inter = std::max<float>(std::min<float>(b0_max.first, b1_max.first) - std::max<float>(b0_min.first, b1_min.first), 0.0) * std::max<float>(std::min<float>(b0_max.second,
+                        b1_max.second)
+                        - std::max<float>(b0_min.second, b1_min.second),
+                        0.0);
+    return inter / (b0_size + b1_size - inter);
+}
+
+inline bool reject_box(Box b0, Box b1, float threshold)
+{
+    const auto  b0_min  = get_min_yx(b0);
+    const auto  b0_max  = get_max_yx(b0);
+    const auto  b1_min  = get_min_yx(b1);
+    const auto  b1_max  = get_max_yx(b1);
+    const float b0_size = compute_size(b0_min, b0_max);
+    const float b1_size = compute_size(b1_min, b1_max);
+    if(b0_size <= 0.f || b1_size <= 0.f)
+    {
+        return false;
+    }
+    else
+    {
+        const float box_weight = compute_intersection(b0_min, b0_max, b1_min, b1_max, b0_size, b1_size);
+        return box_weight > threshold;
+    }
+}
+
+inline std::vector<CandidateBox> get_candidates(const SimpleTensor<float> &scores, float threshold)
+{
+    std::vector<CandidateBox> candidates_vector;
+    for(int i = 0; i < scores.num_elements(); ++i)
+    {
+        if(scores[i] > threshold)
+        {
+            const auto cb = CandidateBox({ i, scores[i] });
+            candidates_vector.push_back(cb);
+        }
+    }
+    std::stable_sort(candidates_vector.begin(), candidates_vector.end(), [](const CandidateBox bb0, const CandidateBox bb1)
+    {
+        return bb0.second >= bb1.second;
+    });
+    return candidates_vector;
+}
+
+inline bool is_box_selected(const CandidateBox &cb, const SimpleTensor<float> &bboxes, std::vector<int> &selected_boxes, float threshold)
+{
+    for(int j = selected_boxes.size() - 1; j >= 0; --j)
+    {
+        const auto selected_box_jth   = get_box(bboxes, selected_boxes[j]);
+        const auto candidate_box      = get_box(bboxes, cb.first);
+        const bool candidate_rejected = reject_box(candidate_box, selected_box_jth, threshold);
+        if(candidate_rejected)
+        {
+            return false;
+        }
+    }
+    return true;
+}
+} // namespace
+
+SimpleTensor<int> non_max_suppression(const SimpleTensor<float> &bboxes, const SimpleTensor<float> &scores, SimpleTensor<int> &indices,
+                                      unsigned int max_output_size, float score_threshold, float nms_threshold)
+{
+    const size_t                    num_boxes         = bboxes.shape().y();
+    const size_t                    output_size       = std::min(static_cast<size_t>(max_output_size), num_boxes);
+    const std::vector<CandidateBox> candidates_vector = get_candidates(scores, score_threshold);
+    std::vector<int>                selected;
+    for(const auto c : candidates_vector)
+    {
+        if(selected.size() == output_size)
+        {
+            break;
+        }
+        if(is_box_selected(c, bboxes, selected, nms_threshold))
+        {
+            selected.push_back(c.first);
+        }
+    }
+    std::copy_n(selected.begin(), selected.size(), indices.data());
+    return indices;
+}
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/NonMaxSuppression.h b/tests/validation/reference/NonMaxSuppression.h
new file mode 100644
index 0000000..0418412
--- /dev/null
+++ b/tests/validation/reference/NonMaxSuppression.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_NON_MAX_SUPPRESION_H__
+#define __ARM_COMPUTE_TEST_NON_MAX_SUPPRESION_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+SimpleTensor<int> non_max_suppression(const SimpleTensor<float> &bboxes, const SimpleTensor<float> &scores, SimpleTensor<int> &indices,
+                                      unsigned int max_output_size, float score_threshold, float nms_threshold);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_NON_MAX_SUPPRESION_H__ */
diff --git a/tests/validation/reference/NormalizationLayer.cpp b/tests/validation/reference/NormalizationLayer.cpp
index e6ca233..d57e6f1 100644
--- a/tests/validation/reference/NormalizationLayer.cpp
+++ b/tests/validation/reference/NormalizationLayer.cpp
@@ -56,7 +56,7 @@
     // IN_MAP_1D and CROSS_MAP normalize over a single axis only
     int radius_rows = (NormType::IN_MAP_2D == type) ? norm_size / 2 : 0;
 
-    if(type == NormType::CROSS_MAP)
+    if(info.is_cross_map())
     {
         // Remove also depth from upper dimensions since it is the dimension we
         // want to use for normalization
diff --git a/tests/validation/reference/PadLayer.cpp b/tests/validation/reference/PadLayer.cpp
index 0a3b38d..b9a93dd 100644
--- a/tests/validation/reference/PadLayer.cpp
+++ b/tests/validation/reference/PadLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -65,8 +65,8 @@
         const size_t m = coord[4];
         const size_t n = coord[5];
 
-        std::array<size_t, TensorShape::num_max_dimensions> dims   = { 0, 1, 2, 3, 4, 5 };
-        std::array<size_t, TensorShape::num_max_dimensions> coords = { i, j, k, l, m, n };
+        std::array<size_t, TensorShape::num_max_dimensions> dims   = { { 0, 1, 2, 3, 4, 5 } };
+        std::array<size_t, TensorShape::num_max_dimensions> coords = { { i, j, k, l, m, n } };
         auto is_padding_area = [&](size_t i)
         {
             return (coords[i] < paddings_extended[i].first || coords[i] > orig_shape[i] + paddings_extended[i].first - 1);
diff --git a/tests/validation/reference/Range.cpp b/tests/validation/reference/Range.cpp
new file mode 100644
index 0000000..c24512f
--- /dev/null
+++ b/tests/validation/reference/Range.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+template <typename T>
+void generate_range(SimpleTensor<T> &dst, float start, const size_t num_of_elements, float step)
+{
+    float val = start;
+    for(size_t index = 0; index < num_of_elements; index++)
+    {
+        dst[index] = static_cast<T>(val);
+        val += step;
+    }
+}
+} // namespace
+
+template <typename T>
+SimpleTensor<T> range(SimpleTensor<T> &dst, float start, const size_t num_of_elements, float step)
+{
+    generate_range(dst, start, num_of_elements, step);
+    return dst;
+}
+
+template <>
+SimpleTensor<uint8_t> range(SimpleTensor<uint8_t> &dst, float start, const size_t num_of_elements, float step)
+{
+    if(dst.data_type() == DataType::QASYMM8)
+    {
+        SimpleTensor<float> dst_tmp{ dst.shape(), DataType::F32, 1 };
+        generate_range(dst_tmp, start, num_of_elements, step);
+        return convert_to_asymmetric(dst_tmp, dst.quantization_info());
+    }
+    generate_range(dst, start, num_of_elements, step);
+    return dst;
+}
+template SimpleTensor<float> range(SimpleTensor<float> &dst, float start, const size_t num_of_elements, float step);
+template SimpleTensor<half> range(SimpleTensor<half> &dst, float start, const size_t num_of_elements, float step);
+template SimpleTensor<int8_t> range(SimpleTensor<int8_t> &dst, float start, const size_t num_of_elements, float step);
+template SimpleTensor<uint16_t> range(SimpleTensor<uint16_t> &dst, float start, const size_t num_of_elements, float step);
+template SimpleTensor<int16_t> range(SimpleTensor<int16_t> &dst, float start, const size_t num_of_elements, float step);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Range.h b/tests/validation/reference/Range.h
new file mode 100644
index 0000000..d31166b
--- /dev/null
+++ b/tests/validation/reference/Range.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_RANGE_H__
+#define __ARM_COMPUTE_TEST_RANGE_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> range(SimpleTensor<T> &dst, float start, size_t num_elements, float step);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_RANGE_H__ */
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index 2f103a6..fb7a6d6 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,19 +38,46 @@
 {
 namespace
 {
-template <typename T>
-T reduce_operation(T *ptr, int reduce_elements, ReductionOperation op, int stride)
+template <typename T, typename OT>
+OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride)
 {
-    using type = typename std::remove_cv<T>::type;
-    auto res   = type(0);
+    using type = typename std::remove_cv<OT>::type;
+    auto res   = (op == ReductionOperation::PROD) ? type(1) : type(0);
 
     if(std::is_integral<type>::value)
     {
-        uint32_t int_res = 0;
+        auto int_res = static_cast<uint32_t>(res);
         for(int i = 0; i < reduce_elements; ++i)
         {
-            auto elem = static_cast<uint32_t>(*(ptr + stride * i));
-            int_res += (op == ReductionOperation::SUM_SQUARE) ? elem * elem : elem;
+            auto elem = *(ptr + stride * i);
+
+            switch(op)
+            {
+                case ReductionOperation::ARG_IDX_MIN:
+                    if(*(ptr + stride * static_cast<uint32_t>(int_res)) > elem)
+                    {
+                        int_res = static_cast<uint32_t>(i);
+                    }
+                    break;
+                case ReductionOperation::ARG_IDX_MAX:
+                    if(*(ptr + stride * static_cast<uint32_t>(int_res)) < elem)
+                    {
+                        int_res = static_cast<uint32_t>(i);
+                    }
+                    break;
+                case ReductionOperation::SUM_SQUARE:
+                    int_res += elem * elem;
+                    break;
+                case ReductionOperation::MEAN_SUM:
+                case ReductionOperation::SUM:
+                    int_res += elem;
+                    break;
+                case ReductionOperation::PROD:
+                    int_res *= elem;
+                    break;
+                default:
+                    ARM_COMPUTE_ERROR("Operation not supported");
+            }
         }
         if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0)
         {
@@ -63,23 +90,50 @@
         for(int i = 0; i < reduce_elements; ++i)
         {
             auto elem = *(ptr + stride * i);
-            res += (op == ReductionOperation::SUM_SQUARE) ? elem * elem : elem;
+            switch(op)
+            {
+                case ReductionOperation::ARG_IDX_MIN:
+                    if(*(ptr + stride * static_cast<uint32_t>(res)) > elem)
+                    {
+                        res = static_cast<uint32_t>(i);
+                    }
+                    break;
+                case ReductionOperation::ARG_IDX_MAX:
+                    if(*(ptr + stride * static_cast<uint32_t>(res)) < elem)
+                    {
+                        res = static_cast<uint32_t>(i);
+                    }
+                    break;
+                case ReductionOperation::SUM_SQUARE:
+                    res += elem * elem;
+                    break;
+                case ReductionOperation::MEAN_SUM:
+                case ReductionOperation::SUM:
+                    res += elem;
+                    break;
+                case ReductionOperation::PROD:
+                    res *= elem;
+                    break;
+                default:
+                    ARM_COMPUTE_ERROR("Operation not supported");
+            }
         }
         if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0)
         {
             res /= reduce_elements;
         }
     }
-
     return res;
 }
 } // namespace
 
-template <typename T>
-SimpleTensor<T> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
+template <typename T, typename OT>
+SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
 {
     // Create reference
-    SimpleTensor<T>    dst{ dst_shape, src.data_type(), 1, src.quantization_info() };
+    const bool         is_arg_min_max   = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
+    DataType           output_data_type = is_arg_min_max ? DataType::U32 : src.data_type();
+    SimpleTensor<OT>   dst{ dst_shape, output_data_type, 1, src.quantization_info() };
     const unsigned int src_width    = src.shape().x();
     const unsigned int src_height   = src.shape().y();
     const unsigned int src_depth    = src.shape().z();
@@ -94,8 +148,7 @@
             for(unsigned int du = 0; du < upper_dims; ++du)
             {
                 const T *src_row_ptr = src.data() + du * reduce_elems;
-                auto     res         = reduce_operation(src_row_ptr, reduce_elems, op, 1);
-                dst[du]              = res;
+                dst[du]              = reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1);
             }
         }
         break;
@@ -109,8 +162,7 @@
                     const int in_offset   = du * src_height * src_width + x;
                     const int out_offset  = du * src_width + x;
                     const T *src_row_ptr = src.data() + in_offset;
-                    auto      res         = reduce_operation(src_row_ptr, reduce_elems, op, src_width);
-                    dst[out_offset]       = res;
+                    dst[out_offset]       = reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width);
                 }
             }
         }
@@ -127,8 +179,7 @@
                         const int in_offset   = du * src_depth * src_height * src_width + y * src_width + x;
                         const int out_offset  = du * src_width * src_height + y * src_width + x;
                         const T *src_row_ptr = src.data() + in_offset;
-                        auto      res         = reduce_operation(src_row_ptr, reduce_elems, op, src_height * src_width);
-                        dst[out_offset]       = res;
+                        dst[out_offset]       = reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_height * src_width);
                     }
                 }
             }
@@ -148,8 +199,7 @@
                             const int in_offset   = du * src_batch * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
                             const int out_offset  = du * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
                             const T *src_row_ptr = src.data() + in_offset;
-                            auto      res         = reduce_operation(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth);
-                            dst[out_offset]       = res;
+                            dst[out_offset]       = reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth);
                         }
                     }
                 }
@@ -163,9 +213,34 @@
     return dst;
 }
 
+template <typename T, typename OT>
+SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
+{
+    return compute_reduction_operation<T, OT>(src, dst_shape, axis, op);
+}
+
+template <>
+SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
+{
+    if(src.data_type() == DataType::QASYMM8 && op != ReductionOperation::MEAN_SUM)
+    {
+        SimpleTensor<float> src_f = convert_from_asymmetric(src);
+        SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
+        return convert_to_asymmetric(dst_f, src.quantization_info());
+    }
+    else
+    {
+        return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op);
+    }
+}
+
 template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+
+template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ReductionOperation.h b/tests/validation/reference/ReductionOperation.h
index 859b57a..9f7050f 100644
--- a/tests/validation/reference/ReductionOperation.h
+++ b/tests/validation/reference/ReductionOperation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,10 +35,10 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template <typename T, typename OT>
+SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_FLOOR_H__ */
+#endif /* __ARM_COMPUTE_TEST_REDUCTION_OPERATION_H__ */
diff --git a/tests/validation/reference/Reverse.cpp b/tests/validation/reference/Reverse.cpp
new file mode 100644
index 0000000..1662dc2
--- /dev/null
+++ b/tests/validation/reference/Reverse.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Reverse.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis)
+{
+    ARM_COMPUTE_ERROR_ON(axis.shape().num_dimensions() > 1);
+    ARM_COMPUTE_ERROR_ON(axis.shape().x() > 4);
+
+    // Create reference
+    SimpleTensor<T> dst{ src.shape(), src.data_type(), src.num_channels(), src.quantization_info() };
+
+    const unsigned int width   = src.shape()[0];
+    const unsigned int height  = src.shape()[1];
+    const unsigned int depth   = src.shape()[2];
+    const unsigned int batches = src.shape()[3];
+
+    std::array<bool, 4> to_reverse = { { false, false, false, false } };
+    for(int i = 0; i < axis.num_elements(); ++i)
+    {
+        to_reverse[axis[i]] = true;
+    }
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        const Coordinates  src_coord = index2coord(src.shape(), i);
+        const unsigned int dst_x     = to_reverse[0] ? width - src_coord[0] - 1 : src_coord[0];
+        const unsigned int dst_y     = to_reverse[1] ? height - src_coord[1] - 1 : src_coord[1];
+        const unsigned int dst_z     = to_reverse[2] ? depth - src_coord[2] - 1 : src_coord[2];
+        const unsigned int dst_w     = to_reverse[3] ? batches - src_coord[3] - 1 : src_coord[3];
+
+        dst[coord2index(src.shape(), Coordinates(dst_x, dst_y, dst_z, dst_w))] = src[i];
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> reverse(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint32_t> &axis);
+template SimpleTensor<half> reverse(const SimpleTensor<half> &src, const SimpleTensor<uint32_t> &axis);
+template SimpleTensor<float> reverse(const SimpleTensor<float> &src, const SimpleTensor<uint32_t> &axis);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Reverse.h b/tests/validation/reference/Reverse.h
new file mode 100644
index 0000000..a5ff7b0
--- /dev/null
+++ b/tests/validation/reference/Reverse.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_REVERSE_H__
+#define __ARM_COMPUTE_TEST_REVERSE_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_REVERSE_H__ */
diff --git a/tests/validation/reference/Select.cpp b/tests/validation/reference/Select.cpp
new file mode 100644
index 0000000..91b6b66
--- /dev/null
+++ b/tests/validation/reference/Select.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Select.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> select(const SimpleTensor<uint8_t> &c, const SimpleTensor<T> &x, const SimpleTensor<T> &y)
+{
+    // Check if condition has the same rank as c
+    const bool is_same_rank = (c.shape().num_dimensions() == x.shape().num_dimensions());
+
+    // Check shapes
+    ARM_COMPUTE_ERROR_ON(x.shape() != y.shape());
+    ARM_COMPUTE_ERROR_ON(is_same_rank && (x.shape() != c.shape()));
+    ARM_COMPUTE_ERROR_ON(!is_same_rank && (c.shape().num_dimensions() > 1) && (c.shape().x() != x.shape()[x.shape().num_dimensions() - 1]));
+
+    // Create reference
+    SimpleTensor<T> dst{ x.shape(), x.data_type(), 1 };
+
+    // Run select core
+    if(is_same_rank)
+    {
+        for(int i = 0; i < x.num_elements(); ++i)
+        {
+            dst[i] = c[i] > 0 ? x[i] : y[i];
+        }
+    }
+    else
+    {
+        T *output_ptr = dst.data();
+
+        const int outer_size = c.num_elements();
+        const int inner_size = x.num_elements() / outer_size;
+        size_t    offset     = 0;
+
+        for(int i = 0; i < outer_size; ++i)
+        {
+            const T *input_ptr = c[i] > 0 ? x.data() : y.data();
+            memcpy(output_ptr + offset, input_ptr + offset, inner_size * sizeof(T));
+            offset += inner_size;
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> select(const SimpleTensor<uint8_t> &c, const SimpleTensor<uint8_t> &x, const SimpleTensor<uint8_t> &y);
+template SimpleTensor<half> select(const SimpleTensor<uint8_t> &c, const SimpleTensor<half> &x, const SimpleTensor<half> &y);
+template SimpleTensor<float> select(const SimpleTensor<uint8_t> &c, const SimpleTensor<float> &x, const SimpleTensor<float> &y);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Select.h b/tests/validation/reference/Select.h
new file mode 100644
index 0000000..394f367
--- /dev/null
+++ b/tests/validation/reference/Select.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_SELECT_H__
+#define __ARM_COMPUTE_TEST_SELECT_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> select(const SimpleTensor<uint8_t> &c, const SimpleTensor<T> &x, const SimpleTensor<T> &y);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_SELECT_H__ */
diff --git a/tests/validation/reference/SliceOperations.cpp b/tests/validation/reference/SliceOperations.cpp
index 04b5b98..40ca9de 100644
--- a/tests/validation/reference/SliceOperations.cpp
+++ b/tests/validation/reference/SliceOperations.cpp
@@ -24,6 +24,7 @@
 #include "SliceOperations.h"
 
 #include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 
 namespace arm_compute
 {
@@ -50,11 +51,8 @@
     // Get source shape
     const TensorShape &src_shape = src.shape();
 
-    // Get actual end
-    Coordinates ends_abs = slice_absolute_end_coords(src_shape, ends);
-
     // Get destination shape
-    TensorShape dst_shape = compute_slice_output_shape(src_shape, starts, ends_abs);
+    TensorShape dst_shape = arm_compute::misc::shape_calculator::compute_slice_shape(src_shape, starts, ends);
 
     // Create destination tensor
     SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
@@ -98,20 +96,24 @@
     // Get source shape
     const TensorShape &src_shape = src.shape();
 
-    // Get actual start, end coordinates and strides
-    const Coordinates final_strides = strided_slice_strides(src_shape, strides);
-    const Coordinates starts_abs    = strided_slice_absolute_start_coords(src_shape, starts, final_strides, begin_mask);
-    const Coordinates ends_abs      = strided_slice_absolute_end_coords(src_shape, starts_abs, ends, final_strides, end_mask, shrink_axis_mask);
-
     // Get destination shape
-    const TensorShape dst_shape = compute_strided_slice_output_shape(src_shape, starts_abs, ends_abs, final_strides);
+    const TensorShape dst_shape = compute_strided_slice_output_shape(src_shape, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
 
     // Create destination tensor
     SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
 
+    // Get coordinates
+    Coordinates starts_abs, ends_abs, final_strides;
+    std::tie(starts_abs, ends_abs, final_strides) = calculate_strided_slice_coords(src_shape,
+                                                                                   starts, ends, strides,
+                                                                                   begin_mask, end_mask, shrink_axis_mask);
+
     // Perform strided slice
-    Window win;
-    win.use_tensor_dimensions(dst_shape);
+    unsigned int idx = 0;
+    Window       win;
+    win.use_tensor_dimensions(compute_strided_slice_output_shape(src_shape,
+                                                                 starts, ends, strides,
+                                                                 begin_mask, end_mask, shrink_axis_mask, true));
     execute_window_loop(win, [&](const Coordinates & id)
     {
         Coordinates offset;
@@ -119,7 +121,7 @@
         {
             offset.set(i, starts_abs[i] + id[i] * final_strides[i]);
         }
-        *reinterpret_cast<T *>(dst(id)) = *reinterpret_cast<const T *>(src(offset));
+        dst.data()[idx++] = *reinterpret_cast<const T *>(src(offset));
     });
 
     return dst;
diff --git a/tests/validation/reference/SpaceToBatch.cpp b/tests/validation/reference/SpaceToBatch.cpp
index 979ab94..c635d4a 100644
--- a/tests/validation/reference/SpaceToBatch.cpp
+++ b/tests/validation/reference/SpaceToBatch.cpp
@@ -39,38 +39,52 @@
 {
     SimpleTensor<T> result(dst_shape, src.data_type());
 
-    auto width_out  = static_cast<int>(dst_shape[0]);
-    auto height_out = static_cast<int>(dst_shape[1]);
-    auto z_out      = static_cast<int>(dst_shape[2]);
+    const auto width_out  = static_cast<int>(dst_shape[0]);
+    const auto height_out = static_cast<int>(dst_shape[1]);
+    const auto batch_out  = static_cast<int>(dst_shape[3]);
+
+    const auto width_in  = static_cast<int>(src.shape()[0]);
+    const auto height_in = static_cast<int>(src.shape()[1]);
+    const auto batch_in  = static_cast<int>(src.shape()[3]);
+
+    const auto channel = static_cast<int>(src.shape()[2]);
+
+    const auto block_width  = block_shape[0];
+    const auto block_height = block_shape[1];
+
+    const auto padding_left = paddings[0];
+    const auto padding_top  = paddings[2];
 
     int out_pos = 0;
-    for(int batch = 0; batch < static_cast<int>(dst_shape[3]); ++batch)
+    for(int outB = 0; outB < batch_out; ++outB)
     {
-        for(int z = 0; z < z_out; ++z)
+        unsigned int inB = outB % batch_in;
+
+        int shift_w = (outB / batch_in) % block_width;
+        int shift_h = (outB / batch_in) / block_width;
+
+        for(int c = 0; c < channel; ++c)
         {
-            for(int y = 0; y < height_out; ++y)
+            for(int outH = 0; outH < height_out; ++outH)
             {
-                for(int x = 0; x < width_out; ++x)
+                for(int outW = 0; outW < width_out; ++outW)
                 {
-                    if(x < paddings[0] || x > width_out - paddings[1] - 1
-                       || y < paddings[2] || y > height_out - paddings[3] - 1)
+                    const auto in_pos = ((inB * channel + c) * height_in + ((outH * block_height + shift_h) - padding_top)) * width_in + (outW * block_width + shift_w) - padding_left;
+
+                    if(outH * block_height + shift_h < padding_top || outH * block_height + shift_h >= padding_top + height_in || outW * block_width + shift_w < padding_left
+                       || outW * block_width + shift_w >= padding_left + width_in)
                     {
                         result[out_pos] = 0;
                     }
                     else
                     {
-                        const int r      = dst_shape[3] / (block_shape[0] * block_shape[1]);
-                        const int in_x   = (block_shape[0] * (x - paddings[0]) + (batch / r) % block_shape[0]);
-                        const int in_y   = (block_shape[1] * (y - paddings[2]) + (batch / r) / block_shape[0]);
-                        int       in_pos = in_x + src.shape()[0] * in_y + z * src.shape()[0] * src.shape()[1] + (batch % r) * src.shape()[0] * src.shape()[1] * src.shape()[2];
-                        result[out_pos]  = src[in_pos];
+                        result[out_pos] = src[in_pos];
                     }
                     ++out_pos;
                 }
             }
         }
     }
-
     return result;
 }
 
diff --git a/tests/validation/reference/StackLayer.cpp b/tests/validation/reference/StackLayer.cpp
new file mode 100644
index 0000000..50e440c
--- /dev/null
+++ b/tests/validation/reference/StackLayer.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "StackLayer.h"
+
+#include "arm_compute/core/Types.h"
+
+#include "tests/validation/Helpers.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> stack_layer(const std::vector<SimpleTensor<T>> &in, const TensorShape &output_shape, DataType data_type, unsigned int axis)
+{
+    ARM_COMPUTE_ERROR_ON(output_shape.num_dimensions() > 5);
+    ARM_COMPUTE_ERROR_ON(in.size() < 2);
+    ARM_COMPUTE_ERROR_ON(axis > in[0].shape().num_dimensions());
+
+    SimpleTensor<T> out{ output_shape, data_type };
+
+    const int width       = in[0].shape()[0];
+    const int height      = in[0].shape()[1];
+    const int depth       = in[0].shape()[2];
+    const int batch_size  = in[0].shape()[3];
+    const int num_tensors = in.size();
+
+    // Array to store the input coordinates
+    // i_coordinates[0] = xi, i_coordinates[1] = yi, i_coordinates[2] = zi
+    // i_coordinates[3] = bi, i_coordinates[4] = i, i_coordinates[5] = 0
+    // i_coordinates[5] will be always zero and used for not incrementing the output when the input has less than 4 dimensions
+    int i_coordinates[6] = { 0 };
+
+    // Array of pointers used to map the output coordinates to the input ones accordingly with the axis
+    // This array is initialized with &i_coordinates[5] since this will be always zero
+    int *o_coordinates[5] = { &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5] };
+
+    // Set the axis coordinate
+    o_coordinates[axis] = &i_coordinates[4];
+
+    unsigned int k_shift = 0;
+
+    // Map the output coordinates
+    for(unsigned int k = 0; k < in[0].shape().num_dimensions(); ++k)
+    {
+        if(k == axis)
+        {
+            k_shift++;
+        }
+
+        o_coordinates[k + k_shift] = &i_coordinates[k];
+    }
+
+    // Use alias for the input coordinates
+    int &xi = i_coordinates[0];
+    int &yi = i_coordinates[1];
+    int &zi = i_coordinates[2];
+    int &bi = i_coordinates[3];
+    int &i  = i_coordinates[4];
+
+    // Use alias for the output coordinates
+    int &xo = *(o_coordinates[0]);
+    int &yo = *(o_coordinates[1]);
+    int &zo = *(o_coordinates[2]);
+    int &bo = *(o_coordinates[3]);
+    int &wo = *(o_coordinates[4]);
+
+    // Stack tensors
+    for(; i < num_tensors; ++(i))
+    {
+        bi = 0;
+        for(; bi < batch_size; ++(bi))
+        {
+            zi = 0;
+            for(; zi < depth; ++(zi))
+            {
+                yi = 0;
+                for(; yi < height; ++(yi))
+                {
+                    xi = 0;
+                    for(; xi < width; ++(xi))
+                    {
+                        *(reinterpret_cast<T *>(out(Coordinates(xo, yo, zo, bo, wo)))) = *(reinterpret_cast<const T *>(in[i](Coordinates(xi, yi, zi, bi))));
+                    }
+                }
+            }
+        }
+    }
+
+    return out;
+}
+template SimpleTensor<int> stack_layer(const std::vector<SimpleTensor<int>> &in, const TensorShape &output_shape, DataType data_type, unsigned int axis);
+template SimpleTensor<short> stack_layer(const std::vector<SimpleTensor<short>> &in, const TensorShape &output_shape, DataType data_type, unsigned int axis);
+template SimpleTensor<char> stack_layer(const std::vector<SimpleTensor<char>> &in, const TensorShape &output_shape, DataType data_type, unsigned int axis);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/StackLayer.h b/tests/validation/reference/StackLayer.h
new file mode 100644
index 0000000..453f176
--- /dev/null
+++ b/tests/validation/reference/StackLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_STACK_LAYER_H__
+#define __ARM_COMPUTE_TEST_STACK_LAYER_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> stack_layer(const std::vector<SimpleTensor<T>> &in, const TensorShape &output_shape, DataType data_type, unsigned int axis);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_STACK_LAYER_H__ */
diff --git a/tests/validation/reference/Tile.cpp b/tests/validation/reference/Tile.cpp
new file mode 100644
index 0000000..e87e515
--- /dev/null
+++ b/tests/validation/reference/Tile.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Tile.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> tile(const SimpleTensor<T> &src, const Multiples &multiples)
+{
+    // Create reference
+    const auto src_shape   = src.shape();
+    const auto tiled_shape = misc::shape_calculator::compute_tiled_shape(src.shape(), multiples);
+
+    SimpleTensor<T> dst{ tiled_shape, src.data_type() };
+
+    for(int idx = 0; idx < dst.num_elements(); idx++)
+    {
+        Coordinates coord = index2coord(tiled_shape, idx);
+
+        const size_t x = coord.x();
+        const size_t y = coord.y();
+        const size_t z = coord.z();
+        const size_t w = coord[3];
+
+        Coordinates src_coords{ x % src_shape[0], y % src_shape[1], z % src_shape[2], w % src_shape[3] };
+        int         src_idx = coord2index(src_shape, src_coords);
+
+        dst[idx] = src[src_idx];
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> tile(const SimpleTensor<uint8_t> &src, const Multiples &multiples);
+template SimpleTensor<int8_t> tile(const SimpleTensor<int8_t> &src, const Multiples &multiples);
+template SimpleTensor<uint16_t> tile(const SimpleTensor<uint16_t> &src, const Multiples &multiples);
+template SimpleTensor<int16_t> tile(const SimpleTensor<int16_t> &src, const Multiples &multiples);
+template SimpleTensor<uint32_t> tile(const SimpleTensor<uint32_t> &src, const Multiples &multiples);
+template SimpleTensor<int32_t> tile(const SimpleTensor<int32_t> &src, const Multiples &multiples);
+template SimpleTensor<half> tile(const SimpleTensor<half> &src, const Multiples &multiples);
+template SimpleTensor<float> tile(const SimpleTensor<float> &src, const Multiples &multiples);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Tile.h b/tests/validation/reference/Tile.h
new file mode 100644
index 0000000..b8d15f6
--- /dev/null
+++ b/tests/validation/reference/Tile.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TILE_H__
+#define __ARM_COMPUTE_TEST_TILE_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> tile(const SimpleTensor<T> &src, const Multiples &multiples);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_TILE_H__ */
diff --git a/tests/validation/reference/Unstack.cpp b/tests/validation/reference/Unstack.cpp
new file mode 100644
index 0000000..3474c15
--- /dev/null
+++ b/tests/validation/reference/Unstack.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Unstack.h"
+
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+inline Coordinates expand_coordinates(Coordinates in_coord, size_t axis, size_t slice, size_t num_dimensions)
+{
+    /*
+        Reconstruct input_coord to read the corresponding value from the correct slice. This is done by adding an extra dimension
+        to the coordinates and shuffling around the values based on the info below.
+
+        For example, if input tensor shape is (X, Y, Z, W);
+
+        If axis == 0, each slice will have the shape (Y, Z, W) and there will be X slices
+
+        If axis == 1, each slice will have the shape (X, Z, W) and there will be Y slices.
+    */
+    Coordinates expanded_coord;
+    expanded_coord.set_num_dimensions(num_dimensions);
+    expanded_coord.set(axis, slice);
+    for(size_t k = 0; k < axis; ++k)
+    {
+        expanded_coord.set(k, in_coord[k]);
+    }
+    for(size_t k = axis + 1; k < num_dimensions; ++k)
+    {
+        expanded_coord.set(k, in_coord[k - 1]);
+    }
+    return expanded_coord;
+}
+
+template <typename T>
+SimpleTensor<T> get_slice(const SimpleTensor<T> &input_tensor, size_t axis, size_t slice)
+{
+    TensorShape out_shape = input_tensor.shape();
+    out_shape.remove_dimension(axis);
+
+    const size_t unpacked_num_dimensions(input_tensor.shape().num_dimensions());
+
+    SimpleTensor<T> output{ out_shape, input_tensor.data_type() };
+
+    Window win;
+    win.use_tensor_dimensions(out_shape);
+    execute_window_loop(win, [&](const Coordinates & id)
+    {
+        const Coordinates input_coords     = expand_coordinates(id, axis, slice, unpacked_num_dimensions);
+        *reinterpret_cast<T *>(output(id)) = *reinterpret_cast<const T *>(input_tensor(input_coords));
+    });
+
+    return output;
+}
+} // namespace
+
+template <typename T>
+std::vector<SimpleTensor<T>> unstack(const SimpleTensor<T> &input_tensor, std::vector<SimpleTensor<T>> &output_tensors, int axis)
+{
+    // Wrap around negative values
+    const unsigned int axis_u = wrap_around(axis, static_cast<int>(input_tensor.shape().num_dimensions()));
+    ARM_COMPUTE_ERROR_ON(axis_u >= input_tensor.shape().num_dimensions());
+    for(size_t k = 0; k < output_tensors.size(); ++k)
+    {
+        SimpleTensor<T>      &output    = output_tensors[k];
+        const SimpleTensor<T> kth_slice = get_slice(input_tensor, axis_u, k);
+        output                          = copy_tensor<T>(kth_slice);
+    }
+    return output_tensors;
+}
+
+template std::vector<SimpleTensor<float>> unstack(const SimpleTensor<float> &input_tensor, std::vector<SimpleTensor<float>> &output_tensors, int axis);
+template std::vector<SimpleTensor<half>> unstack(const SimpleTensor<half> &input_tensor, std::vector<SimpleTensor<half>> &output_tensors, int axis);
+template std::vector<SimpleTensor<uint8_t>> unstack(const SimpleTensor<uint8_t> &input_tensor, std::vector<SimpleTensor<uint8_t>> &output_tensors, int axis);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Unstack.h b/tests/validation/reference/Unstack.h
new file mode 100644
index 0000000..56e3778
--- /dev/null
+++ b/tests/validation/reference/Unstack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_UNSTACK_H__
+#define __ARM_COMPUTE_TEST_UNSTACK_H__
+
+#include "tests/SimpleTensor.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+std::vector<SimpleTensor<T>> unstack(const SimpleTensor<T> &input_tensor, std::vector<SimpleTensor<T>> &output_tensors, int axis);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_UNSTACK_H__ */
diff --git a/tests/validation/reference/WidthConcatenateLayer.cpp b/tests/validation/reference/WidthConcatenateLayer.cpp
index 6be171b..3854339 100644
--- a/tests/validation/reference/WidthConcatenateLayer.cpp
+++ b/tests/validation/reference/WidthConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs)
+SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst)
 {
     // Create reference
     std::vector<TensorShape> shapes;
@@ -44,10 +44,6 @@
         shapes.emplace_back(src.shape());
     }
 
-    DataType        dst_type  = srcs.empty() ? DataType::UNKNOWN : srcs[0].data_type();
-    TensorShape     dst_shape = calculate_width_concatenate_shape(shapes);
-    SimpleTensor<T> dst(dst_shape, dst_type);
-
     // Compute reference
     int       width_offset = 0;
     const int width_out    = dst.shape().x();
@@ -74,21 +70,32 @@
                 for(int r = 0; r < height; ++r)
                 {
                     const int offset = u * height * depth + d * height + r;
-                    std::copy(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out);
-                    src_ptr += width;
+                    if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info())
+                    {
+                        std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [src, dst](T t)
+                        {
+                            const float dequantized_input = src.quantization_info().dequantize(t);
+                            return dst.quantization_info().quantize(dequantized_input, RoundingPolicy::TO_NEAREST_UP);
+                        });
+                        src_ptr += width;
+                    }
+                    else
+                    {
+                        std::copy(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out);
+                        src_ptr += width;
+                    }
                 }
             }
         }
-
         width_offset += width;
     }
 
     return dst;
 }
 
-template SimpleTensor<float> widthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs);
-template SimpleTensor<half> widthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs);
-template SimpleTensor<uint8_t> widthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs);
+template SimpleTensor<float> widthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst);
+template SimpleTensor<half> widthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst);
+template SimpleTensor<uint8_t> widthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/WidthConcatenateLayer.h b/tests/validation/reference/WidthConcatenateLayer.h
index 237e72b..0f1f428 100644
--- a/tests/validation/reference/WidthConcatenateLayer.h
+++ b/tests/validation/reference/WidthConcatenateLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs);
+SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst);
 } // namespace reference
 } // namespace validation
 } // namespace test