arm_compute v19.08
diff --git a/tests/validation/reference/ActivationLayer.cpp b/tests/validation/reference/ActivationLayer.cpp
index 9887e42..f573d12 100644
--- a/tests/validation/reference/ActivationLayer.cpp
+++ b/tests/validation/reference/ActivationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,8 +35,10 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo info)
+SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
 {
+    ARM_COMPUTE_UNUSED(oq_info);
+
     // Create reference
     SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
 
@@ -53,16 +55,29 @@
 }
 
 template <>
-SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info)
+SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
 {
+    const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
+
     SimpleTensor<float>   src_tmp = convert_from_asymmetric(src);
     SimpleTensor<float>   dst_tmp = activation_layer<float>(src_tmp, info);
-    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, src.quantization_info());
+    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, dst_qinfo);
     return dst;
 }
 
-template SimpleTensor<float> activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info);
-template SimpleTensor<half> activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info);
+template <>
+SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
+{
+    const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
+
+    SimpleTensor<float>   src_tmp = convert_from_symmetric(src);
+    SimpleTensor<float>   dst_tmp = activation_layer<float>(src_tmp, info);
+    SimpleTensor<int16_t> dst     = convert_to_symmetric<int16_t>(dst_tmp, dst_qinfo);
+    return dst;
+}
+
+template SimpleTensor<float> activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
+template SimpleTensor<half> activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ActivationLayer.h b/tests/validation/reference/ActivationLayer.h
index 77b3530..5beca7c 100644
--- a/tests/validation/reference/ActivationLayer.h
+++ b/tests/validation/reference/ActivationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -75,6 +75,9 @@
         case ActivationLayerInfo::ActivationFunction::TANH:
             ret = a * std::tanh(b * x);
             break;
+        case ActivationLayerInfo::ActivationFunction::IDENTITY:
+            ret = x;
+            break;
         default:
             ARM_COMPUTE_ERROR("Unsupported activation function");
             break;
@@ -84,7 +87,7 @@
 }
 
 template <typename T>
-SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo info);
+SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info = QuantizationInfo());
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ArithmeticOperations.cpp b/tests/validation/reference/ArithmeticOperations.cpp
index a6205af..abd4f31 100644
--- a/tests/validation/reference/ArithmeticOperations.cpp
+++ b/tests/validation/reference/ArithmeticOperations.cpp
@@ -124,8 +124,32 @@
     }
 }
 
-template SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, SimpleTensor<int16_t> &dst,
-                                                    ConvertPolicy convert_policy);
+template <>
+SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, SimpleTensor<int16_t> &dst, ConvertPolicy convert_policy)
+{
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
+
+    if(dst.data_type() == DataType::QSYMM16)
+    {
+        SimpleTensor<float> src1_tmp = convert_from_symmetric<int16_t>(src1);
+        SimpleTensor<float> src2_tmp = convert_from_symmetric<int16_t>(src2);
+        SimpleTensor<float> dst_tmp(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst.data_type());
+
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst);
+
+        dst = convert_to_symmetric<int16_t>(dst_tmp, dst.quantization_info());
+        return dst;
+    }
+    else
+    {
+        // DataType::S16
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
+        return dst;
+    }
+}
+
 template SimpleTensor<int8_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int8_t> &src1, const SimpleTensor<int8_t> &src2, SimpleTensor<int8_t> &dst, ConvertPolicy convert_policy);
 template SimpleTensor<half> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<half> &src1, const SimpleTensor<half> &src2, SimpleTensor<half> &dst, ConvertPolicy convert_policy);
 template SimpleTensor<float> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<float> &src1, const SimpleTensor<float> &src2, SimpleTensor<float> &dst, ConvertPolicy convert_policy);
@@ -133,7 +157,7 @@
 template <typename T>
 SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(dst_data_type == DataType::QASYMM8, "For QASYMM8, the quantized output tensor should be passed directly.");
+    ARM_COMPUTE_ERROR_ON_MSG(is_data_type_quantized(dst_data_type), "For quantized input data types, the quantized output tensor should be passed directly.");
 
     SimpleTensor<T> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst_data_type);
     arithmetic_operation<T>(op, src1, src2, dst, convert_policy);
diff --git a/tests/validation/reference/ChannelExtract.cpp b/tests/validation/reference/ChannelExtract.cpp
index 6f17fc0..fc7ae7d 100644
--- a/tests/validation/reference/ChannelExtract.cpp
+++ b/tests/validation/reference/ChannelExtract.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,7 +61,7 @@
             const auto *src_pixel = reinterpret_cast<const T *>(src(src_coord));
             auto       *dst_pixel = reinterpret_cast<T *>(dst(dst_coord));
 
-            dst_pixel[0] = src_pixel[channel_idx];
+            dst_pixel[0] = src_pixel[channel_idx]; // NOLINT
         }
     }
 
diff --git a/tests/validation/reference/ColorConvertHelper.h b/tests/validation/reference/ColorConvertHelper.h
index b2ae6f2..abd1f5d 100644
--- a/tests/validation/reference/ColorConvertHelper.h
+++ b/tests/validation/reference/ColorConvertHelper.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -9,14 +9,14 @@
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
- *asymm_int_mult
+ *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, asymm_int_multDAMAGES OR OTHER
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
@@ -70,7 +70,7 @@
             auto       *gvec_pixel = reinterpret_cast<T *>(gvec(vec_coord));
             auto       *bvec_pixel = reinterpret_cast<T *>(bvec(vec_coord));
 
-            rvec_pixel[0] = src_pixel[0];
+            rvec_pixel[0] = src_pixel[0]; // NOLINT
             gvec_pixel[0] = src_pixel[1];
             bvec_pixel[0] = src_pixel[2];
         }
diff --git a/tests/validation/reference/ConcatenateLayer.cpp b/tests/validation/reference/ConcatenateLayer.cpp
index af818a5..aa74ca2 100644
--- a/tests/validation/reference/ConcatenateLayer.cpp
+++ b/tests/validation/reference/ConcatenateLayer.cpp
@@ -72,10 +72,13 @@
                     const int offset = u * height * depth + d * height + r;
                     if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info())
                     {
-                        std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [src, dst](T t)
+                        const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
+                        const UniformQuantizationInfo oq_info = dst.quantization_info().uniform();
+
+                        std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [&](T t)
                         {
-                            const float dequantized_input = src.quantization_info().dequantize(t);
-                            return dst.quantization_info().quantize(dequantized_input, RoundingPolicy::TO_NEAREST_UP);
+                            const float dequantized_input = dequantize_qasymm8(t, iq_info);
+                            return quantize_qasymm8(dequantized_input, oq_info);
                         });
                         src_ptr += width;
                     }
@@ -124,6 +127,16 @@
             dst = reference::permute<T>(dst, PermutationVector(2U, 1U, 0U));
             return reference::permute<T>(widthconcatenate_layer(srcs, dst), PermutationVector(2U, 1U, 0U));
         }
+        case 3:
+        {
+            for(auto &t : srcs)
+            {
+                t = reference::permute<T>(t, PermutationVector(3U, 2U, 1U, 0U));
+            }
+            dst      = reference::permute<T>(dst, PermutationVector(3U, 2U, 1U, 0U));
+            auto ret = reference::permute<T>(widthconcatenate_layer(srcs, dst), PermutationVector(3U, 2U, 1U, 0U));
+            return ret;
+        }
         default:
         {
             ARM_COMPUTE_ERROR("Not supported");
diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h
index 2e5fefd..30be25f 100644
--- a/tests/validation/reference/Convolution3d.h
+++ b/tests/validation/reference/Convolution3d.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -9,14 +9,14 @@
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
- *asymm_int_mult
+ *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, asymm_int_multDAMAGES OR OTHER
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
@@ -101,12 +101,16 @@
     const TB *b_ptr   = bias.data() + b_offset;
     T        *out_ptr = out.data() + o_offset;
 
-    const int   input_offset   = -in.quantization_info().offset;
-    const float input_scale    = in.quantization_info().scale;
-    const int   weights_offset = -weights.quantization_info().offset;
-    const float weights_scale  = weights.quantization_info().scale;
-    const int   output_offset  = out.quantization_info().offset;
-    const float output_scale   = out.quantization_info().scale;
+    const UniformQuantizationInfo iq_info = in.quantization_info().uniform();
+    const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+    const UniformQuantizationInfo oq_info = out.quantization_info().uniform();
+
+    const int   input_offset   = -iq_info.offset;
+    const float input_scale    = iq_info.scale;
+    const int   weights_offset = -wq_info.offset;
+    const float weights_scale  = wq_info.scale;
+    const int   output_offset  = oq_info.offset;
+    const float output_scale   = oq_info.scale;
 
     int         output_multiplier = 0;
     int         output_shift      = 0;
diff --git a/tests/validation/reference/CropResize.cpp b/tests/validation/reference/CropResize.cpp
index 8cfce97..f25a031 100644
--- a/tests/validation/reference/CropResize.cpp
+++ b/tests/validation/reference/CropResize.cpp
@@ -59,8 +59,8 @@
             case InterpolationPolicy::NEAREST_NEIGHBOR:
             {
                 //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords
-                float x_src = (idw + 0.5f) * wr;
-                float y_src = (idh + 0.5f) * hr;
+                float x_src = std::floor(idw * wr);
+                float y_src = std::floor(idh * hr);
                 in_id.set(1, x_src);
                 in_id.set(2, y_src);
 
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 9167924..af59830 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -68,7 +68,7 @@
 
     if(src.data_type() == DataType::QASYMM8)
     {
-        const uint8_t quantized_zero = src.quantization_info().offset;
+        const uint8_t quantized_zero = src.quantization_info().uniform().offset;
         std::fill_n(scaled.data(), scaled.num_elements(), quantized_zero);
     }
     else
diff --git a/tests/validation/reference/DepthConcatenateLayer.cpp b/tests/validation/reference/DepthConcatenateLayer.cpp
index 22271a0..d6e6e78 100644
--- a/tests/validation/reference/DepthConcatenateLayer.cpp
+++ b/tests/validation/reference/DepthConcatenateLayer.cpp
@@ -55,6 +55,7 @@
     {
         return tensor.quantization_info() != dst.quantization_info();
     };
+
     if(srcs[0].data_type() == DataType::QASYMM8 && std::any_of(srcs.cbegin(), srcs.cend(), have_different_quantization_info))
     {
         for(int b = 0; b < batches; ++b)
@@ -64,11 +65,14 @@
             int slice = 0;
             for(const auto &src : srcs)
             {
-                auto       ptr_slice = static_cast<T *>(dst(Coordinates(0, 0, slice, b)));
-                const auto num_elems_in_slice((dst.num_elements() / depth_out) * src.shape().z());
-                std::transform(ptr_slice, ptr_slice + num_elems_in_slice, ptr_slice, [src, dst](T)
+                auto                          ptr_slice = static_cast<T *>(dst(Coordinates(0, 0, slice, b)));
+                const auto                    num_elems_in_slice((dst.num_elements() / depth_out) * src.shape().z());
+                const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
+                const UniformQuantizationInfo oq_info = dst.quantization_info().uniform();
+
+                std::transform(ptr_slice, ptr_slice + num_elems_in_slice, ptr_slice, [&](T)
                 {
-                    return dst.quantization_info().quantize(src.quantization_info().dequantize(0), RoundingPolicy::TO_NEAREST_UP);
+                    return quantize_qasymm8(dequantize_qasymm8(0, iq_info), oq_info);
                 });
                 slice += src.shape().z();
             }
@@ -102,10 +106,12 @@
                 {
                     if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info())
                     {
-                        std::transform(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out, [src, dst](T t)
+                        const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
+                        const UniformQuantizationInfo oq_info = dst.quantization_info().uniform();
+                        std::transform(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out, [&](T t)
                         {
-                            const float dequantized_input = src.quantization_info().dequantize(t);
-                            return dst.quantization_info().quantize(dequantized_input, RoundingPolicy::TO_NEAREST_UP);
+                            const float dequantized_input = dequantize_qasymm8(t, iq_info);
+                            return quantize_qasymm8(dequantized_input, oq_info);
                         });
                         src_ptr += width;
                     }
diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp
index 6d9f98d..7da0011 100644
--- a/tests/validation/reference/DepthConvertLayer.cpp
+++ b/tests/validation/reference/DepthConvertLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,14 +48,7 @@
     {
         for(int i = 0; i < src.num_elements(); ++i)
         {
-            if(is_data_type_quantized(src.data_type()))
-            {
-                result[i] = scvt_f32_qasymm8(src[i], src.quantization_info().scale, src.quantization_info().offset);
-            }
-            else
-            {
-                result[i] = src[i] << shift;
-            }
+            result[i] = src[i] << shift;
         }
     }
     // Down-casting
@@ -82,16 +75,8 @@
         // Always saturate on floats
         for(int i = 0; i < src.num_elements(); ++i)
         {
-            if(is_data_type_quantized(dt_out))
-            {
-                T1 val    = utils::rounding::round_half_away_from_zero(src[i]);
-                result[i] = sqcvt_qasymm8_f32(val, src.quantization_info().scale, src.quantization_info().offset);
-            }
-            else
-            {
-                T1 val    = utils::rounding::round_half_away_from_zero(src[i]);
-                result[i] = utils::cast::saturate_cast<T2>(val);
-            }
+            T1 val    = utils::rounding::round_half_away_from_zero(src[i]);
+            result[i] = utils::cast::saturate_cast<T2>(val);
         }
     }
     else
diff --git a/tests/validation/reference/DepthToSpaceLayer.cpp b/tests/validation/reference/DepthToSpaceLayer.cpp
new file mode 100644
index 0000000..4135ce5
--- /dev/null
+++ b/tests/validation/reference/DepthToSpaceLayer.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "DepthToSpaceLayer.h"
+
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+// Batch to Space
+template <typename T>
+SimpleTensor<T> depth_to_space(const SimpleTensor<T> &src, const TensorShape &dst_shape, int32_t block_shape)
+{
+    ARM_COMPUTE_ERROR_ON(block_shape <= 0);
+    SimpleTensor<T> result(dst_shape, src.data_type());
+
+    int        in_pos     = 0;
+    const auto width_in   = static_cast<int>(src.shape()[0]);
+    const auto height_in  = static_cast<int>(src.shape()[1]);
+    const auto channel_in = static_cast<int>(src.shape()[2]);
+    const auto batch_in   = static_cast<int>(src.shape()[3]);
+    const int  r          = channel_in / (block_shape * block_shape);
+
+    for(int b = 0; b < batch_in; ++b)
+    {
+        for(int z = 0; z < channel_in; ++z)
+        {
+            for(int y = 0; y < height_in; ++y)
+            {
+                for(int x = 0; x < width_in; ++x)
+                {
+                    const int out_x   = (block_shape * x + (z / r) % block_shape);
+                    const int out_y   = (block_shape * y + (z / r) / block_shape);
+                    const int out_pos = out_x + dst_shape[0] * out_y + (z % r) * dst_shape[0] * dst_shape[1] + b * dst_shape[0] * dst_shape[1] * dst_shape[2];
+                    result[out_pos]   = src[in_pos];
+                    ++in_pos;
+                }
+            }
+        }
+    }
+
+    return result;
+}
+template SimpleTensor<float> depth_to_space(const SimpleTensor<float> &src, const TensorShape &dst_shape, int32_t block_shape);
+template SimpleTensor<half> depth_to_space(const SimpleTensor<half> &src, const TensorShape &dst_shape, int32_t block_shape);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/DepthToSpaceLayer.h b/tests/validation/reference/DepthToSpaceLayer.h
new file mode 100644
index 0000000..3989401
--- /dev/null
+++ b/tests/validation/reference/DepthToSpaceLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DEPTH_TO_SPACE_LAYER_H__
+#define __ARM_COMPUTE_TEST_DEPTH_TO_SPACE_LAYER_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> depth_to_space(const SimpleTensor<T> &src, const TensorShape &dst_shape, int32_t block_shape);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_DEPTH_TO_SPACE_LAYER_H__ */
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index 90ecffb..b1d2b92 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -50,7 +50,7 @@
  */
 template <typename T, typename TB>
 SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
-                                      unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info)
+                                      unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
 {
     ARM_COMPUTE_UNUSED(out_quant_info);
 
@@ -126,26 +126,23 @@
 
 template <>
 SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
-                                            const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info)
+                                            const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
 {
     // if no explicit quantization has been set you the same as src
-    if(out_quant_info == QuantizationInfo(0.0f, 0))
-    {
-        out_quant_info = src.quantization_info();
-    }
-    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, out_quant_info };
+    const QuantizationInfo &dst_qinfo = out_quant_info.uniform().empty() ? src.quantization_info() : out_quant_info;
+    SimpleTensor<uint8_t>   dst{ dst_shape, src.data_type(), 1, dst_qinfo };
 
     // Create reference
-    const int   input_offset   = -src.quantization_info().offset;
-    const float input_scale    = src.quantization_info().scale;
-    const int   weights_offset = -weights.quantization_info().offset;
-    const float weights_scale  = weights.quantization_info().scale;
-    const int   output_offset  = dst.quantization_info().offset;
-    const float output_scale   = dst.quantization_info().scale;
+    const int   input_offset   = -src.quantization_info().uniform().offset;
+    const float input_scale    = src.quantization_info().uniform().scale;
+    const int   weights_offset = -weights.quantization_info().uniform().offset;
+    const float weights_scale  = weights.quantization_info().uniform().scale;
+    const int   output_offset  = dst_qinfo.uniform().offset;
+    const float output_scale   = dst_qinfo.uniform().scale;
 
-    int         output_multiplier;
-    int         output_shift;
-    const float multiplier = input_scale * weights_scale / output_scale;
+    int         output_multiplier = 0;
+    int         output_shift      = 0;
+    const float multiplier        = input_scale * weights_scale / output_scale;
     arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
 
     // Compute reference
@@ -224,10 +221,10 @@
 }
 
 template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
-                                                   const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info);
+                                                   const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info);
 
 template SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
-                                                  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info);
+                                                  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.h b/tests/validation/reference/DepthwiseConvolutionLayer.h
index ac70de0..ee323fa 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.h
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.h
@@ -37,7 +37,7 @@
 {
 template <typename T, typename TB>
 SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
-                                      unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U, 1U), QuantizationInfo out_quant_info = QuantizationInfo(0.0f, 0));
+                                      unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U, 1U), const QuantizationInfo &out_quant_info = QuantizationInfo(0.0f, 0));
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp
index df50c14..cceee04 100644
--- a/tests/validation/reference/DequantizationLayer.cpp
+++ b/tests/validation/reference/DequantizationLayer.cpp
@@ -23,6 +23,8 @@
  */
 #include "DequantizationLayer.h"
 
+#include "Permute.h"
+
 namespace arm_compute
 {
 namespace test
@@ -31,24 +33,89 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> dequantization_layer(const SimpleTensor<uint8_t> &src)
+namespace
 {
-    const DataType          dst_data_type     = std::is_same<T, float>::value ? DataType::F32 : DataType::F16;
-    const QuantizationInfo &quantization_info = src.quantization_info();
+template <typename TOut>
+TOut dequantize(int8_t val, const UniformQuantizationInfo qinfo)
+{
+    return static_cast<TOut>(dequantize_qsymm8(val, qinfo));
+}
+template <typename TOut>
+TOut dequantize(uint8_t val, const UniformQuantizationInfo qinfo)
+{
+    return static_cast<TOut>(dequantize_qasymm8(val, qinfo));
+}
+template <typename TOut>
+TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo)
+{
+    return static_cast<TOut>(dequantize_qsymm16(val, qinfo));
+}
 
-    SimpleTensor<T> dst{ src.shape(), dst_data_type };
+template <typename TOut, typename TIn>
+SimpleTensor<TOut> dequantization_layer_nchw(const SimpleTensor<TIn> &src)
+{
+    const DataType src_data_type = src.data_type();
+    const DataType dst_data_type = std::is_same<TOut, float>::value ? DataType::F32 : DataType::F16;
 
-    for(int i = 0; i < src.num_elements(); ++i)
+    SimpleTensor<TOut> dst{ src.shape(), dst_data_type };
+
+    if(src_data_type == DataType::QSYMM8_PER_CHANNEL)
     {
-        dst[i] = static_cast<T>(quantization_info.dequantize(src[i]));
+        const int WH = src.shape().x() * src.shape().y();
+        const int C  = src.shape().z();
+        const int N  = src.shape().total_size() / (WH * C);
+
+        const std::vector<float> qscales = src.quantization_info().scale();
+
+        for(int n = 0; n < N; ++n)
+        {
+            for(int c = 0; c < C; ++c)
+            {
+                const size_t                  idx           = n * C * WH + c * WH;
+                const UniformQuantizationInfo channel_qinfo = { qscales[c], 0 };
+
+                // Dequantize slice
+                for(int s = 0; s < WH; ++s)
+                {
+                    dst[idx + s] = dequantize<TOut>(static_cast<TIn>(src[idx + s]), channel_qinfo);
+                }
+            }
+        }
+    }
+    else
+    {
+        const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
+        ARM_COMPUTE_ERROR_ON(quantization_info.offset != 0 && src_data_type == DataType::QSYMM8);
+
+        for(int i = 0; i < src.num_elements(); ++i)
+        {
+            dst[i] = static_cast<TOut>(dequantize<TOut>(static_cast<TIn>(src[i]), quantization_info));
+        }
     }
 
     return dst;
 }
+} // namespace
+template <typename TOut, typename TIn>
+SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src)
+{
+    if(src.data_layout() == DataLayout::NHWC && src.data_type() == DataType::QSYMM8_PER_CHANNEL)
+    {
+        SimpleTensor<TIn> src_nchw = reference::permute<TIn>(src, PermutationVector(1U, 2U, 0U));
+        return reference::permute<TOut>(dequantization_layer_nchw<TOut>(src_nchw), PermutationVector(2U, 0U, 1U));
+    }
+    else
+    {
+        return dequantization_layer_nchw<TOut>(src);
+    }
+}
 
 template SimpleTensor<half> dequantization_layer(const SimpleTensor<uint8_t> &src);
 template SimpleTensor<float> dequantization_layer(const SimpleTensor<uint8_t> &src);
+template SimpleTensor<half> dequantization_layer(const SimpleTensor<int8_t> &src);
+template SimpleTensor<float> dequantization_layer(const SimpleTensor<int8_t> &src);
+template SimpleTensor<half> dequantization_layer(const SimpleTensor<int16_t> &src);
+template SimpleTensor<float> dequantization_layer(const SimpleTensor<int16_t> &src);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DequantizationLayer.h b/tests/validation/reference/DequantizationLayer.h
index 1d0e54b..8c78084 100644
--- a/tests/validation/reference/DequantizationLayer.h
+++ b/tests/validation/reference/DequantizationLayer.h
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> dequantization_layer(const SimpleTensor<uint8_t> &src);
+template <typename TOut, typename TIn>
+SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ElementWiseUnary.cpp b/tests/validation/reference/ElementWiseUnary.cpp
index ae7f256..06beb2a 100644
--- a/tests/validation/reference/ElementWiseUnary.cpp
+++ b/tests/validation/reference/ElementWiseUnary.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,6 +46,21 @@
             case ElementWiseUnary::EXP:
                 dst[i] = std::exp(src[i]);
                 break;
+            case ElementWiseUnary::NEG:
+                dst[i] = -src[i];
+                break;
+            case ElementWiseUnary::LOG:
+                dst[i] = std::log(src[i]);
+                break;
+            case ElementWiseUnary::ABS:
+                dst[i] = std::abs(src[i]);
+                break;
+            case ElementWiseUnary::SIN:
+                dst[i] = std::sin(src[i]);
+                break;
+            case ElementWiseUnary::ROUND:
+                dst[i] = std::nearbyint(src[i]);
+                break;
             default:
                 ARM_COMPUTE_ERROR("Not implemented");
         }
@@ -56,6 +71,7 @@
 
 template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, ElementWiseUnary op);
 template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, ElementWiseUnary op);
+template SimpleTensor<int32_t> elementwise_unary(const SimpleTensor<int32_t> &src, ElementWiseUnary op);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp
index 2ffb0fa..d5a37a0 100644
--- a/tests/validation/reference/ElementwiseOperations.cpp
+++ b/tests/validation/reference/ElementwiseOperations.cpp
@@ -43,38 +43,58 @@
 
     intermediate_type val;
 
-    if(op == ArithmeticOperation::ADD)
+    switch(op)
     {
-        val = static_cast<intermediate_type>(src1) + static_cast<intermediate_type>(src2);
+        case ArithmeticOperation::ADD:
+        {
+            val = static_cast<intermediate_type>(src1) + static_cast<intermediate_type>(src2);
+            break;
+        }
+        case ArithmeticOperation::SUB:
+        {
+            val = static_cast<intermediate_type>(src1) - static_cast<intermediate_type>(src2);
+            break;
+        }
+        case ArithmeticOperation::MIN:
+        {
+            val = std::min(static_cast<intermediate_type>(src1), static_cast<intermediate_type>(src2));
+            break;
+        }
+        case ArithmeticOperation::MAX:
+        {
+            val = std::max(static_cast<intermediate_type>(src1), static_cast<intermediate_type>(src2));
+            break;
+        }
+        case ArithmeticOperation::SQUARED_DIFF:
+        {
+            intermediate_type tmp = (static_cast<intermediate_type>(src1) - static_cast<intermediate_type>(src2));
+            val                   = tmp * tmp;
+            break;
+        }
+        case ArithmeticOperation::DIV:
+        {
+            val = (static_cast<intermediate_type>(src1) / static_cast<intermediate_type>(src2));
+            break;
+        }
+        case ArithmeticOperation::POWER:
+        {
+            val = std::pow(static_cast<intermediate_type>(src1), static_cast<intermediate_type>(src2));
+            break;
+        }
+        case ArithmeticOperation::PRELU:
+        {
+            const T x     = static_cast<intermediate_type>(src1);
+            const T alpha = static_cast<intermediate_type>(src2);
+            val           = (x > 0 ? x : alpha * x);
+            break;
+        }
+        default:
+        {
+            ARM_COMPUTE_ERROR("Not handled");
+        }
     }
-    else if(op == ArithmeticOperation::SUB)
-    {
-        val = static_cast<intermediate_type>(src1) - static_cast<intermediate_type>(src2);
-    }
-    else if(op == ArithmeticOperation::MIN)
-    {
-        val = std::min(static_cast<intermediate_type>(src1), static_cast<intermediate_type>(src2));
-    }
-    else if(op == ArithmeticOperation::MAX)
-    {
-        val = std::max(static_cast<intermediate_type>(src1), static_cast<intermediate_type>(src2));
-    }
-    else if(op == ArithmeticOperation::SQUARED_DIFF)
-    {
-        intermediate_type tmp = (static_cast<intermediate_type>(src1) - static_cast<intermediate_type>(src2));
-        val                   = tmp * tmp;
-    }
-    else if(op == ArithmeticOperation::DIV)
-    {
-        val = (static_cast<intermediate_type>(src1) / static_cast<intermediate_type>(src2));
-    }
-    else
-    {
-        ARM_COMPUTE_ERROR("Not handled");
-    }
-
     T result;
-    if(op == ArithmeticOperation::ADD || op == ArithmeticOperation::SUB || op == ArithmeticOperation::DIV)
+    if(op == ArithmeticOperation::ADD || op == ArithmeticOperation::SUB || op == ArithmeticOperation::DIV || op == ArithmeticOperation::POWER)
     {
         result = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T>(val) : static_cast<T>(val);
     }
@@ -164,10 +184,39 @@
     }
 }
 
+template <>
+SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, SimpleTensor<int16_t> &dst, ConvertPolicy convert_policy)
+{
+    if(dst.data_type() == DataType::QSYMM16)
+    {
+        SimpleTensor<float> src1_tmp = convert_from_symmetric<int16_t>(src1);
+        SimpleTensor<float> src2_tmp = convert_from_symmetric<int16_t>(src2);
+        SimpleTensor<float> dst_tmp(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst.data_type());
+
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
+
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst);
+
+        dst = convert_to_symmetric<int16_t>(dst_tmp, dst.quantization_info());
+        return dst;
+    }
+    else
+    {
+        // DataType::S16
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
+
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
+
+        return dst;
+    }
+}
+
 template SimpleTensor<int32_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int32_t> &src1, const SimpleTensor<int32_t> &src2, SimpleTensor<int32_t> &dst,
                                                     ConvertPolicy convert_policy);
-template SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, SimpleTensor<int16_t> &dst,
-                                                    ConvertPolicy convert_policy);
 template SimpleTensor<int8_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int8_t> &src1, const SimpleTensor<int8_t> &src2, SimpleTensor<int8_t> &dst,
                                                    ConvertPolicy convert_policy);
 template SimpleTensor<half> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<half> &src1, const SimpleTensor<half> &src2, SimpleTensor<half> &dst, ConvertPolicy convert_policy);
@@ -176,7 +225,7 @@
 template <typename T>
 SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(dst_data_type == DataType::QASYMM8, "For QASYMM8, the quantized output tensor should be passed directly.");
+    ARM_COMPUTE_ERROR_ON_MSG(is_data_type_quantized(dst_data_type), "For quantized data types, the quantized output tensor should be passed directly.");
 
     SimpleTensor<T> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst_data_type);
     arithmetic_operation<T>(op, src1, src2, dst, convert_policy);
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
index 07ddf6d..cd84b9c 100644
--- a/tests/validation/reference/FullyConnectedLayer.cpp
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -67,12 +67,16 @@
     const TB *bias_ptr    = bias.data();
     T        *dst_ptr     = dst.data() + offset_dst;
 
-    const int   input_offset   = -src.quantization_info().offset;
-    const float input_scale    = src.quantization_info().scale;
-    const int   weights_offset = -weights.quantization_info().offset;
-    const float weights_scale  = weights.quantization_info().scale;
-    const int   output_offset  = dst.quantization_info().offset;
-    const float output_scale   = dst.quantization_info().scale;
+    const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
+    const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+    const UniformQuantizationInfo oq_info = dst.quantization_info().uniform();
+
+    const int   input_offset   = -iq_info.offset;
+    const float input_scale    = iq_info.scale;
+    const int   weights_offset = -wq_info.offset;
+    const float weights_scale  = wq_info.scale;
+    const int   output_offset  = oq_info.offset;
+    const float output_scale   = oq_info.scale;
 
     int         output_multiplier = 0;
     int         output_shift      = 0;
diff --git a/tests/validation/reference/FuseBatchNormalization.cpp b/tests/validation/reference/FuseBatchNormalization.cpp
new file mode 100644
index 0000000..df12b25
--- /dev/null
+++ b/tests/validation/reference/FuseBatchNormalization.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "FuseBatchNormalization.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+void fuse_batch_normalization_dwc_layer(const SimpleTensor<T> &w, const SimpleTensor<T> &mean, const SimpleTensor<T> &var, SimpleTensor<T> &w_fused, SimpleTensor<T> &b_fused, const SimpleTensor<T> &b,
+                                        const SimpleTensor<T> &beta, const SimpleTensor<T> &gamma, float epsilon)
+{
+    const auto *w_data = w.data();
+    const auto *b_data = b.data();
+
+    auto *w_fused_data = w_fused.data();
+    auto *b_fused_data = b_fused.data();
+
+    const unsigned int width  = w.shape()[0];
+    const unsigned int height = w.shape()[1];
+    const unsigned int dim2   = w.shape()[2];
+
+    for(unsigned int b = 0; b < dim2; ++b)
+    {
+        const auto mean_val  = mean.data()[b];
+        const auto var_val   = var.data()[b];
+        const auto beta_val  = beta.data()[b];
+        const auto gamma_val = gamma.data()[b];
+
+        for(unsigned int i = 0; i < width * height; ++i)
+        {
+            unsigned int index = i + b * width * height;
+
+            w_fused_data[index] = (gamma_val * (w_data[index])) / sqrt(var_val + epsilon);
+        }
+
+        b_fused_data[b] = (b_data[b] - mean_val) / sqrt(var_val + epsilon) * gamma_val + beta_val;
+    }
+}
+
+template <typename T>
+void fuse_batch_normalization_conv_layer(const SimpleTensor<T> &w, const SimpleTensor<T> &mean, const SimpleTensor<T> &var, SimpleTensor<T> &w_fused, SimpleTensor<T> &b_fused,
+                                         const SimpleTensor<T> &b,
+                                         const SimpleTensor<T> &beta, const SimpleTensor<T> &gamma, float epsilon)
+{
+    const auto *w_data = w.data();
+    const auto *b_data = b.data();
+
+    auto *w_fused_data = w_fused.data();
+    auto *b_fused_data = b_fused.data();
+
+    const unsigned int width  = w.shape()[0];
+    const unsigned int height = w.shape()[1];
+    const unsigned int dim2   = w.shape()[2];
+    const unsigned int dim3   = w.shape()[3];
+
+    for(unsigned int b = 0; b < dim3; ++b)
+    {
+        const auto mean_val  = mean.data()[b];
+        const auto var_val   = var.data()[b];
+        const auto beta_val  = beta.data()[b];
+        const auto gamma_val = gamma.data()[b];
+
+        for(unsigned int i = 0; i < width * height * dim2; ++i)
+        {
+            unsigned int index = i + b * width * height * dim2;
+
+            w_fused_data[index] = (gamma_val * (w_data[index])) / sqrt(var_val + epsilon);
+        }
+
+        b_fused_data[b] = (b_data[b] - mean_val) / sqrt(var_val + epsilon) * gamma_val + beta_val;
+    }
+}
+
+template void fuse_batch_normalization_dwc_layer(const SimpleTensor<float> &w, const SimpleTensor<float> &mean, const SimpleTensor<float> &var, SimpleTensor<float> &w_fused,
+                                                 SimpleTensor<float> &b_fused, const SimpleTensor<float> &b, const SimpleTensor<float> &beta, const SimpleTensor<float> &gamma, float epsilon);
+template void fuse_batch_normalization_dwc_layer(const SimpleTensor<half> &w, const SimpleTensor<half> &mean, const SimpleTensor<half> &var, SimpleTensor<half> &w_fused, SimpleTensor<half> &b_fused,
+                                                 const SimpleTensor<half> &b, const SimpleTensor<half> &beta, const SimpleTensor<half> &gamma, float epsilon);
+template void fuse_batch_normalization_conv_layer(const SimpleTensor<float> &w, const SimpleTensor<float> &mean, const SimpleTensor<float> &var, SimpleTensor<float> &w_fused,
+                                                  SimpleTensor<float> &b_fused, const SimpleTensor<float> &b, const SimpleTensor<float> &beta, const SimpleTensor<float> &gamma, float epsilon);
+template void fuse_batch_normalization_conv_layer(const SimpleTensor<half> &w, const SimpleTensor<half> &mean, const SimpleTensor<half> &var, SimpleTensor<half> &w_fused, SimpleTensor<half> &b_fused,
+                                                  const SimpleTensor<half> &b, const SimpleTensor<half> &beta, const SimpleTensor<half> &gamma, float epsilon);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/FuseBatchNormalization.h b/tests/validation/reference/FuseBatchNormalization.h
new file mode 100644
index 0000000..1575fc0
--- /dev/null
+++ b/tests/validation/reference/FuseBatchNormalization.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_FUSEBATCHNORMALIZATION_H__
+#define __ARM_COMPUTE_TEST_FUSEBATCHNORMALIZATION_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+void fuse_batch_normalization_dwc_layer(const SimpleTensor<T> &w, const SimpleTensor<T> &mean, const SimpleTensor<T> &var, SimpleTensor<T> &w_fused, SimpleTensor<T> &b_fused, const SimpleTensor<T> &b,
+                                        const SimpleTensor<T> &beta, const SimpleTensor<T> &gamma, float epsilon);
+
+template <typename T>
+void fuse_batch_normalization_conv_layer(const SimpleTensor<T> &w, const SimpleTensor<T> &mean, const SimpleTensor<T> &var, SimpleTensor<T> &w_fused, SimpleTensor<T> &b_fused,
+                                         const SimpleTensor<T> &b,
+                                         const SimpleTensor<T> &beta, const SimpleTensor<T> &gamma, float epsilon);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // __ARM_COMPUTE_TEST_FUSEBATCHNORMALIZATION_H__
diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp
index 9a7e409..97d0532 100644
--- a/tests/validation/reference/GEMMLowp.cpp
+++ b/tests/validation/reference/GEMMLowp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -95,6 +95,34 @@
         (*dst)[i] = static_cast<uint8_t>(std::max(0, std::min(255, result)));
     }
 }
+
+template <typename T>
+void quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<int16_t> *dst, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                      int32_t min, int32_t max)
+{
+    const int cols_in = in->shape().x();
+
+    for(int i = 0; i < in->num_elements(); ++i)
+    {
+        int32_t result = (*in)[i];
+
+        if(bias != nullptr)
+        {
+            result += (*bias)[i % cols_in];
+        }
+
+        // Fixed point multiplication
+        result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, result_fixedpoint_multiplier), result_shift);
+
+        // Bounded ReLu
+        if(min != max)
+        {
+            result = std::max(min, std::min(max, result));
+        }
+
+        (*dst)[i] = static_cast<int16_t>(std::max(-32768, std::min(32767, result)));
+    }
+}
 } // namespace
 
 template <typename T_out, typename T_in>
@@ -201,10 +229,36 @@
     return dst;
 }
 
+template <typename T>
+SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min,
+                                                                                int32_t max)
+{
+    SimpleTensor<int16_t> dst(in.shape(), DataType::QSYMM16);
+
+    quantize_down_int32_to_int16_scale_by_fixedpoint<T>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, min, max);
+
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                                                int32_t min, int32_t max)
+{
+    SimpleTensor<int16_t> dst(in.shape(), DataType::QSYMM16);
+
+    quantize_down_int32_to_int16_scale_by_fixedpoint<T>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, min, max);
+
+    return dst;
+}
+
 template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, int32_t result_fixedpoint_multiplier, int32_t result_shift,
                                                                                          int32_t result_offset_after_shift, int32_t min, int32_t max);
 template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_fixedpoint_multiplier,
                                                                                          int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
+template SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                                                         int32_t min, int32_t max);
+template SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_fixedpoint_multiplier,
+                                                                                         int32_t result_shift, int32_t min, int32_t max);
 template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min,
                                                                            int32_t max);
 template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, int32_t result_mult_int,
diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h
index 4396155..5581f67 100644
--- a/tests/validation/reference/GEMMLowp.h
+++ b/tests/validation/reference/GEMMLowp.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,13 @@
 template <typename T>
 SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift,
                                                                                 int32_t result_offset_after_shift, int32_t min = 0, int32_t max = 0);
+
+template <typename T>
+SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                                                int32_t min, int32_t max);
+template <typename T>
+SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier,
+                                                                                int32_t result_shift, int32_t min, int32_t max);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/Im2Col.cpp b/tests/validation/reference/Im2Col.cpp
index 076b2ab..4b41cdb 100644
--- a/tests/validation/reference/Im2Col.cpp
+++ b/tests/validation/reference/Im2Col.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,7 +50,7 @@
     const int src_channels  = src.shape().z();
     const int batches       = src.shape().total_size_upper(3);
     const int dst_height    = dst.shape().y();
-    const int pad_val       = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0;
+    const int pad_val       = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().uniform().offset : 0;
     int       dst_idx       = 0;
 
     // Compute width and height of the convolved tensors
@@ -105,7 +105,7 @@
     const int batches       = src.shape().total_size_upper(3);
     const int dst_width     = has_bias ? dst.shape().x() - 1 : dst.shape().x();
     const int dst_height    = dst.shape().y();
-    const int pad_val       = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0;
+    const int pad_val       = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().uniform().offset : 0;
 
     // Compute width and height of the convolved tensors
     std::pair<unsigned int, unsigned int> convolved_dims = scaled_dimensions(src_width, src_height, kernel_dims.width, kernel_dims.height, conv_info);
@@ -139,7 +139,7 @@
 }
 
 template <typename T>
-void im2col(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const unsigned int num_groups)
+void im2col(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int num_groups)
 {
     switch(src.data_layout())
     {
diff --git a/tests/validation/reference/Im2Col.h b/tests/validation/reference/Im2Col.h
index f519d0e..34b8476 100644
--- a/tests/validation/reference/Im2Col.h
+++ b/tests/validation/reference/Im2Col.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,7 +35,7 @@
 namespace reference
 {
 template <typename T>
-void im2col(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const unsigned int num_groups);
+void im2col(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int num_groups);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
new file mode 100644
index 0000000..c44c983
--- /dev/null
+++ b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "MeanStdDevNormalizationLayer.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> mean_std_normalization_layer(const SimpleTensor<T> &src, float epsilon)
+{
+    // Create reference
+    SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
+
+    const int cols       = src.shape()[0];
+    const int batch_size = src.shape()[1];
+
+    for(int i = 0; i < batch_size; ++i)
+    {
+        T sum    = static_cast<T>(0.f);
+        T sum_sq = static_cast<T>(0.f);
+        for(int j = 0; j < cols; ++j)
+        {
+            const T value = src[j + i * cols];
+            sum += value;
+            sum_sq += value * value;
+        }
+        const T mean       = sum / static_cast<T>(cols);
+        const T var        = ((sum_sq / static_cast<T>(cols)) - (mean * mean)) + static_cast<T>(epsilon);
+        const T stddev_inv = static_cast<T>(1.f) / static_cast<T>(std::sqrt(var));
+        for(int j = 0; j < cols; ++j)
+        {
+            dst[j + i * cols] = (src[j + i * cols] - mean) * stddev_inv;
+        }
+    }
+    return dst;
+}
+
+template SimpleTensor<float> mean_std_normalization_layer(const SimpleTensor<float> &src, float epsilon);
+template SimpleTensor<half> mean_std_normalization_layer(const SimpleTensor<half> &src, float epsilon);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/MeanStdDevNormalizationLayer.h b/tests/validation/reference/MeanStdDevNormalizationLayer.h
new file mode 100644
index 0000000..b97f285
--- /dev/null
+++ b/tests/validation/reference/MeanStdDevNormalizationLayer.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_MEAN_STDDEV_NORMALIZATION_LAYER_H__
+#define __ARM_COMPUTE_TEST_MEAN_STDDEV_NORMALIZATION_LAYER_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> mean_std_normalization_layer(const SimpleTensor<T> &src, float epsilon = 1e-8f);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_MEAN_STDDEV_NORMALIZATION_LAYER_H__ */
diff --git a/tests/validation/reference/NonMaxSuppression.cpp b/tests/validation/reference/NonMaxSuppression.cpp
index 5b7980d..8fc370b 100644
--- a/tests/validation/reference/NonMaxSuppression.cpp
+++ b/tests/validation/reference/NonMaxSuppression.cpp
@@ -76,10 +76,10 @@
 inline float compute_intersection(const std::pair<float, float> &b0_min, const std::pair<float, float> &b0_max,
                                   const std::pair<float, float> &b1_min, const std::pair<float, float> &b1_max, float b0_size, float b1_size)
 {
-    const float inter = std::max<float>(std::min<float>(b0_max.first, b1_max.first) - std::max<float>(b0_min.first, b1_min.first), 0.0) * std::max<float>(std::min<float>(b0_max.second,
+    const float inter = std::max<float>(std::min<float>(b0_max.first, b1_max.first) - std::max<float>(b0_min.first, b1_min.first), 0.0f) * std::max<float>(std::min<float>(b0_max.second,
                         b1_max.second)
                         - std::max<float>(b0_min.second, b1_min.second),
-                        0.0);
+                        0.0f);
     return inter / (b0_size + b1_size - inter);
 }
 
@@ -107,7 +107,7 @@
     std::vector<CandidateBox> candidates_vector;
     for(int i = 0; i < scores.num_elements(); ++i)
     {
-        if(scores[i] > threshold)
+        if(scores[i] >= threshold)
         {
             const auto cb = CandidateBox({ i, scores[i] });
             candidates_vector.push_back(cb);
@@ -115,7 +115,7 @@
     }
     std::stable_sort(candidates_vector.begin(), candidates_vector.end(), [](const CandidateBox bb0, const CandidateBox bb1)
     {
-        return bb0.second >= bb1.second;
+        return bb0.second > bb1.second;
     });
     return candidates_vector;
 }
@@ -155,6 +155,12 @@
         }
     }
     std::copy_n(selected.begin(), selected.size(), indices.data());
+
+    for(unsigned int i = selected.size(); i < max_output_size; ++i)
+    {
+        indices[i] = -1;
+    }
+
     return indices;
 }
 } // namespace reference
diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp
index ea058ec..41a9192 100644
--- a/tests/validation/reference/PixelWiseMultiplication.cpp
+++ b/tests/validation/reference/PixelWiseMultiplication.cpp
@@ -128,7 +128,8 @@
 } // namespace
 
 template <typename T1, typename T2>
-SimpleTensor<T2> pixel_wise_multiplication(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, QuantizationInfo qout)
+SimpleTensor<T2> pixel_wise_multiplication(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
+                                           const QuantizationInfo &qout)
 {
     ARM_COMPUTE_UNUSED(qout);
 
@@ -150,7 +151,7 @@
 
 template <>
 SimpleTensor<uint8_t> pixel_wise_multiplication(const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
-                                                QuantizationInfo qout)
+                                                const QuantizationInfo &qout)
 {
     SimpleTensor<uint8_t> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), src2.data_type(), 1, qout);
 
@@ -175,12 +176,39 @@
     }
     return dst;
 }
+
+template <>
+SimpleTensor<int16_t> pixel_wise_multiplication(const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
+                                                const QuantizationInfo &qout)
+{
+    SimpleTensor<int16_t> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), src2.data_type(), 1, qout);
+
+    if(src1.data_type() == DataType::QSYMM16 && src2.data_type() == DataType::QSYMM16)
+    {
+        SimpleTensor<float> src1_tmp = convert_from_symmetric<int16_t>(src1);
+        SimpleTensor<float> src2_tmp = convert_from_symmetric<int16_t>(src2);
+        SimpleTensor<float> dst_tmp  = pixel_wise_multiplication<float>(src1_tmp, src2_tmp, scale, convert_policy, rounding_policy, qout);
+        dst                          = convert_to_symmetric<int16_t>(dst_tmp, qout);
+    }
+    else
+    {
+        if(scale < 0)
+        {
+            ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
+        }
+
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2, dst, scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
+    }
+    return dst;
+}
 // *INDENT-OFF*
 // clang-format off
-template SimpleTensor<int16_t> pixel_wise_multiplication(const SimpleTensor<uint8_t> &src1, const SimpleTensor<int16_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, QuantizationInfo qout);
-template SimpleTensor<int16_t> pixel_wise_multiplication(const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, QuantizationInfo qout);
-template SimpleTensor<float> pixel_wise_multiplication(const SimpleTensor<float> &src1, const SimpleTensor<float> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, QuantizationInfo qout);
-template SimpleTensor<half_float::half> pixel_wise_multiplication(const SimpleTensor<half_float::half> &src1, const SimpleTensor<half_float::half> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, QuantizationInfo qout);
+template SimpleTensor<int16_t> pixel_wise_multiplication(const SimpleTensor<uint8_t> &src1, const SimpleTensor<int16_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, const QuantizationInfo &qout);
+template SimpleTensor<float> pixel_wise_multiplication(const SimpleTensor<float> &src1, const SimpleTensor<float> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, const QuantizationInfo &qout);
+template SimpleTensor<half_float::half> pixel_wise_multiplication(const SimpleTensor<half_float::half> &src1, const SimpleTensor<half_float::half> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, const QuantizationInfo &qout);
 // clang-format on
 // *INDENT-ON*
 } // namespace reference
diff --git a/tests/validation/reference/PixelWiseMultiplication.h b/tests/validation/reference/PixelWiseMultiplication.h
index 787a7b2..39d2bc7 100644
--- a/tests/validation/reference/PixelWiseMultiplication.h
+++ b/tests/validation/reference/PixelWiseMultiplication.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,7 @@
 {
 template <typename T1, typename T2>
 SimpleTensor<T2> pixel_wise_multiplication(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, float scale,
-                                           ConvertPolicy convert_policy, RoundingPolicy rounding_policy, QuantizationInfo qout = QuantizationInfo());
+                                           ConvertPolicy convert_policy, RoundingPolicy rounding_policy, const QuantizationInfo &qout = QuantizationInfo());
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp
index 2f33481..182585a 100644
--- a/tests/validation/reference/QuantizationLayer.cpp
+++ b/tests/validation/reference/QuantizationLayer.cpp
@@ -34,24 +34,25 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src, const QuantizationInfo quantization_info)
+SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src, const QuantizationInfo &quantization_info)
 {
     // Create reference
     SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, quantization_info };
 
+    const UniformQuantizationInfo qinfo = quantization_info.uniform();
     for(int i = 0; i < src.num_elements(); ++i)
     {
 #ifdef __aarch64__
-        dst[i] = quantization_info.quantize((src[i]), RoundingPolicy::TO_NEAREST_EVEN);
+        dst[i] = quantize_qasymm8((src[i]), qinfo, RoundingPolicy::TO_NEAREST_EVEN);
 #else  // __aarch64__
-        dst[i] = quantization_info.quantize((src[i]), RoundingPolicy::TO_ZERO);
+        dst[i] = quantize_qasymm8((src[i]), qinfo, RoundingPolicy::TO_ZERO);
 #endif // __aarch64__
     }
     return dst;
 }
 
-template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<half> &src, const QuantizationInfo quantization_info);
-template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<float> &src, const QuantizationInfo quantization_info);
+template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<half> &src, const QuantizationInfo &quantization_info);
+template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/QuantizationLayer.h b/tests/validation/reference/QuantizationLayer.h
index 2d13690..462396f 100644
--- a/tests/validation/reference/QuantizationLayer.h
+++ b/tests/validation/reference/QuantizationLayer.h
@@ -36,7 +36,7 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src, const QuantizationInfo quantization_info);
+SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src, const QuantizationInfo &quantization_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index fb7a6d6..fe128cc 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -42,7 +42,25 @@
 OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride)
 {
     using type = typename std::remove_cv<OT>::type;
-    auto res   = (op == ReductionOperation::PROD) ? type(1) : type(0);
+    T res;
+    switch(op)
+    {
+        case ReductionOperation::PROD:
+        {
+            res = type(1);
+        }
+        break;
+        case ReductionOperation::MIN:
+        case ReductionOperation::MAX:
+        {
+            res = *ptr;
+        }
+        break;
+        default:
+        {
+            res = type(0);
+        }
+    }
 
     if(std::is_integral<type>::value)
     {
@@ -53,16 +71,16 @@
 
             switch(op)
             {
-                case ReductionOperation::ARG_IDX_MIN:
-                    if(*(ptr + stride * static_cast<uint32_t>(int_res)) > elem)
+                case ReductionOperation::MIN:
+                    if(static_cast<T>(int_res) > elem)
                     {
-                        int_res = static_cast<uint32_t>(i);
+                        int_res = elem;
                     }
                     break;
-                case ReductionOperation::ARG_IDX_MAX:
-                    if(*(ptr + stride * static_cast<uint32_t>(int_res)) < elem)
+                case ReductionOperation::MAX:
+                    if(static_cast<T>(int_res) < elem)
                     {
-                        int_res = static_cast<uint32_t>(i);
+                        int_res = elem;
                     }
                     break;
                 case ReductionOperation::SUM_SQUARE:
@@ -92,16 +110,16 @@
             auto elem = *(ptr + stride * i);
             switch(op)
             {
-                case ReductionOperation::ARG_IDX_MIN:
-                    if(*(ptr + stride * static_cast<uint32_t>(res)) > elem)
+                case ReductionOperation::MIN:
+                    if(res > elem)
                     {
-                        res = static_cast<uint32_t>(i);
+                        res = elem;
                     }
                     break;
-                case ReductionOperation::ARG_IDX_MAX:
-                    if(*(ptr + stride * static_cast<uint32_t>(res)) < elem)
+                case ReductionOperation::MAX:
+                    if(res < elem)
                     {
-                        res = static_cast<uint32_t>(i);
+                        res = elem;
                     }
                     break;
                 case ReductionOperation::SUM_SQUARE:
@@ -125,6 +143,35 @@
     }
     return res;
 }
+
+template <typename T, typename OT>
+OT reduce_operation_arg_min_max(const T *ptr, int reduce_elements, ReductionOperation op, int stride)
+{
+    uint32_t res = 0;
+    for(int i = 0; i < reduce_elements; ++i)
+    {
+        auto elem = *(ptr + stride * i);
+        switch(op)
+        {
+            case ReductionOperation::ARG_IDX_MIN:
+                if(*(ptr + stride * res) > elem)
+                {
+                    res = static_cast<uint32_t>(i);
+                }
+                break;
+            case ReductionOperation::ARG_IDX_MAX:
+                if(*(ptr + stride * res) < elem)
+                {
+                    res = static_cast<uint32_t>(i);
+                }
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Operation not supported");
+        }
+    }
+    return static_cast<OT>(res);
+}
+
 } // namespace
 
 template <typename T, typename OT>
@@ -148,7 +195,9 @@
             for(unsigned int du = 0; du < upper_dims; ++du)
             {
                 const T *src_row_ptr = src.data() + du * reduce_elems;
-                dst[du]              = reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1);
+                dst[du]              = is_arg_min_max ?
+                                       reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, 1) :
+                                       reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1);
             }
         }
         break;
@@ -162,7 +211,9 @@
                     const int in_offset   = du * src_height * src_width + x;
                     const int out_offset  = du * src_width + x;
                     const T *src_row_ptr = src.data() + in_offset;
-                    dst[out_offset]       = reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width);
+                    dst[out_offset]       = is_arg_min_max ?
+                                            reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width) :
+                                            reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width);
                 }
             }
         }
@@ -179,7 +230,9 @@
                         const int in_offset   = du * src_depth * src_height * src_width + y * src_width + x;
                         const int out_offset  = du * src_width * src_height + y * src_width + x;
                         const T *src_row_ptr = src.data() + in_offset;
-                        dst[out_offset]       = reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_height * src_width);
+                        dst[out_offset]       = is_arg_min_max ?
+                                                reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height) :
+                                                reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height);
                     }
                 }
             }
@@ -199,7 +252,9 @@
                             const int in_offset   = du * src_batch * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
                             const int out_offset  = du * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
                             const T *src_row_ptr = src.data() + in_offset;
-                            dst[out_offset]       = reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth);
+                            dst[out_offset]       = is_arg_min_max ?
+                                                    reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth) :
+                                                    reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth);
                         }
                     }
                 }
@@ -238,6 +293,7 @@
 template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 
 template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 
diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp
index 2f7bf2d..63a2853 100644
--- a/tests/validation/reference/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -71,28 +71,25 @@
         float       x_src = 0;
         float       y_src = 0;
 
-        switch(sampling_policy)
-        {
-            case SamplingPolicy::TOP_LEFT:
-                x_src = idx * wr;
-                y_src = idy * hr;
-                break;
-            case SamplingPolicy::CENTER:
-                x_src = (idx + 0.5f) * wr - 0.5f;
-                y_src = (idy + 0.5f) * hr - 0.5f;
-                break;
-            default:
-                ARM_COMPUTE_ERROR("Unsupported sampling policy.");
-                break;
-        }
-
         switch(policy)
         {
             case InterpolationPolicy::NEAREST_NEIGHBOR:
             {
-                //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords
-                x_src = (idx + 0.5f) * wr;
-                y_src = (idy + 0.5f) * hr;
+                switch(sampling_policy)
+                {
+                    case SamplingPolicy::TOP_LEFT:
+                        x_src = std::floor(idx * wr);
+                        y_src = std::floor(idy * hr);
+                        break;
+                    case SamplingPolicy::CENTER:
+                        //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords
+                        x_src = (idx + 0.5f) * wr;
+                        y_src = (idy + 0.5f) * hr;
+                        break;
+                    default:
+                        ARM_COMPUTE_ERROR("Unsupported sampling policy.");
+                }
+
                 id.set(0, x_src);
                 id.set(1, y_src);
 
@@ -105,6 +102,20 @@
             }
             case InterpolationPolicy::BILINEAR:
             {
+                switch(sampling_policy)
+                {
+                    case SamplingPolicy::TOP_LEFT:
+                        x_src = idx * wr;
+                        y_src = idy * hr;
+                        break;
+                    case SamplingPolicy::CENTER:
+                        x_src = (idx + 0.5f) * wr - 0.5f;
+                        y_src = (idy + 0.5f) * hr - 0.5f;
+                        break;
+                    default:
+                        ARM_COMPUTE_ERROR("Unsupported sampling policy.");
+                }
+
                 id.set(0, std::floor(x_src));
                 id.set(1, std::floor(y_src));
                 if(is_valid_pixel_index(x_src, y_src, width, height, border_size))
@@ -180,10 +191,10 @@
                             SamplingPolicy sampling_policy, bool ceil_policy_scale)
 {
     SimpleTensor<uint8_t> dst;
-    if(src.quantization_info().scale != 0.f)
+    if(src.quantization_info().uniform().scale != 0.f)
     {
         SimpleTensor<float> src_tmp                 = convert_from_asymmetric(src);
-        float               constant_border_value_f = scvt_f32_qasymm8(constant_border_value, src.quantization_info().scale, src.quantization_info().offset);
+        float               constant_border_value_f = dequantize_qasymm8(constant_border_value, src.quantization_info());
         SimpleTensor<float> dst_tmp                 = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale);
         dst                                         = convert_to_asymmetric(dst_tmp, src.quantization_info());
     }
diff --git a/tests/validation/reference/SpaceToBatch.cpp b/tests/validation/reference/SpaceToBatch.cpp
index c635d4a..8c25bb7 100644
--- a/tests/validation/reference/SpaceToBatch.cpp
+++ b/tests/validation/reference/SpaceToBatch.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@
 template <typename T>
 SimpleTensor<T> space_to_batch(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const SimpleTensor<int32_t> &paddings, const TensorShape &dst_shape)
 {
-    SimpleTensor<T> result(dst_shape, src.data_type());
+    SimpleTensor<T> result(dst_shape, src.data_type(), 1, src.quantization_info());
 
     const auto width_out  = static_cast<int>(dst_shape[0]);
     const auto height_out = static_cast<int>(dst_shape[1]);
@@ -55,6 +55,9 @@
     const auto padding_left = paddings[0];
     const auto padding_top  = paddings[2];
 
+    // Pad value must be logic zero
+    const auto pad_value = is_data_type_quantized(src.data_type()) ? src.quantization_info().uniform().offset : 0;
+
     int out_pos = 0;
     for(int outB = 0; outB < batch_out; ++outB)
     {
@@ -74,7 +77,7 @@
                     if(outH * block_height + shift_h < padding_top || outH * block_height + shift_h >= padding_top + height_in || outW * block_width + shift_w < padding_left
                        || outW * block_width + shift_w >= padding_left + width_in)
                     {
-                        result[out_pos] = 0;
+                        result[out_pos] = pad_value;
                     }
                     else
                     {
@@ -90,6 +93,7 @@
 
 template SimpleTensor<float> space_to_batch(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &block_shape, const SimpleTensor<int32_t> &paddings, const TensorShape &dst_shape);
 template SimpleTensor<half> space_to_batch(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &block_shape, const SimpleTensor<int32_t> &paddings, const TensorShape &dst_shape);
+template SimpleTensor<uint8_t> space_to_batch(const SimpleTensor<uint8_t> &src, const SimpleTensor<int32_t> &block_shape, const SimpleTensor<int32_t> &paddings, const TensorShape &dst_shape);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/SpaceToDepth.cpp b/tests/validation/reference/SpaceToDepth.cpp
new file mode 100644
index 0000000..bd8e37a8b
--- /dev/null
+++ b/tests/validation/reference/SpaceToDepth.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "SpaceToDepth.h"
+
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+// Space to Batch
+template <typename T>
+SimpleTensor<T> space_to_depth(const SimpleTensor<T> &src, const TensorShape &dst_shape, const int block_shape)
+{
+    SimpleTensor<T> result(dst_shape, src.data_type());
+
+    const auto width_out   = static_cast<int>(dst_shape[0]);
+    const auto height_out  = static_cast<int>(dst_shape[1]);
+    const auto channel_out = static_cast<int>(dst_shape[2]);
+
+    const auto width_in   = static_cast<int>(src.shape()[0]);
+    const auto height_in  = static_cast<int>(src.shape()[1]);
+    const auto channel_in = static_cast<int>(src.shape()[2]);
+
+    const auto batch = static_cast<int>(src.shape()[3]);
+
+    const auto block_width  = block_shape;
+    const auto block_height = block_shape;
+
+    int out_pos = 0;
+    for(int ba = 0; ba < batch; ++ba)
+    {
+        for(int outC = 0; outC < channel_out; ++outC)
+        {
+            unsigned int inC = outC % channel_in;
+
+            int shift_w = (outC / channel_in) % block_width;
+            int shift_h = (outC / channel_in) / block_width;
+
+            for(int outH = 0; outH < height_out; ++outH)
+            {
+                for(int outW = 0; outW < width_out; ++outW)
+                {
+                    const auto in_pos = ((ba * channel_in + inC) * height_in + ((outH * block_height + shift_h))) * width_in + (outW * block_width + shift_w);
+                    result[out_pos]   = src[in_pos];
+                    ++out_pos;
+                }
+            }
+        }
+    }
+    return result;
+}
+
+template SimpleTensor<float> space_to_depth(const SimpleTensor<float> &src, const TensorShape &dst_shape, const int block_shape);
+template SimpleTensor<half> space_to_depth(const SimpleTensor<half> &src, const TensorShape &dst_shape, const int block_shape);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/SpaceToDepth.h b/tests/validation/reference/SpaceToDepth.h
new file mode 100644
index 0000000..885c615
--- /dev/null
+++ b/tests/validation/reference/SpaceToDepth.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_SPACE_TO_DEPTH_LAYER_H__
+#define __ARM_COMPUTE_TEST_SPACE_TO_DEPTH_LAYER_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> space_to_depth(const SimpleTensor<T> &src, const TensorShape &dst_shape, const int block_shape);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_SPACE_TO_DEPTH_LAYER_H__ */
diff --git a/tests/validation/reference/UpsampleLayer.cpp b/tests/validation/reference/UpsampleLayer.cpp
index 876f6d7..8e36ee8 100644
--- a/tests/validation/reference/UpsampleLayer.cpp
+++ b/tests/validation/reference/UpsampleLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -33,9 +33,10 @@
 {
 namespace reference
 {
+namespace
+{
 template <typename T>
-SimpleTensor<T> upsample_layer(const SimpleTensor<T> &src,
-                               const Size2D &info, const InterpolationPolicy policy)
+SimpleTensor<T> upsample_function(const SimpleTensor<T> &src, const Size2D &info, const InterpolationPolicy policy)
 {
     ARM_COMPUTE_ERROR_ON(policy != InterpolationPolicy::NEAREST_NEIGHBOR);
     ARM_COMPUTE_UNUSED(policy);
@@ -72,16 +73,39 @@
             }
         }
     }
-
     return out;
 }
 
+} // namespace
+
+template <typename T>
+SimpleTensor<T> upsample_layer(const SimpleTensor<T> &src, const Size2D &info, const InterpolationPolicy policy)
+{
+    return upsample_function<T>(src, info, policy);
+}
+
+template <>
+SimpleTensor<uint8_t> upsample_layer(const SimpleTensor<uint8_t> &src, const Size2D &info, const InterpolationPolicy policy)
+{
+    SimpleTensor<uint8_t> dst(src.shape(), src.data_type(), 1, src.quantization_info());
+
+    if(is_data_type_quantized_asymmetric(src.data_type()))
+    {
+        SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+        SimpleTensor<float> dst_tmp = upsample_function<float>(src_tmp, info, policy);
+        dst                         = convert_to_asymmetric(dst_tmp, src.quantization_info());
+    }
+    else
+    {
+        dst = upsample_function<uint8_t>(src, info, policy);
+    }
+    return dst;
+}
+
 template SimpleTensor<float> upsample_layer(const SimpleTensor<float> &src,
                                             const Size2D &info, const InterpolationPolicy policy);
 template SimpleTensor<half> upsample_layer(const SimpleTensor<half> &src,
                                            const Size2D &info, const InterpolationPolicy policy);
-template SimpleTensor<uint8_t> upsample_layer(const SimpleTensor<uint8_t> &src,
-                                              const Size2D &info, const InterpolationPolicy policy);
 } // namespace reference
 } // namespace validation
 } // namespace test