arm_compute v19.05
diff --git a/tests/validation/reference/ArithmeticDivision.cpp b/tests/validation/reference/ArithmeticDivision.cpp
index 0102231..0ced439 100644
--- a/tests/validation/reference/ArithmeticDivision.cpp
+++ b/tests/validation/reference/ArithmeticDivision.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -77,7 +77,9 @@
 {
     SimpleTensor<T> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), data_type);
 
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
 
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2, dst, id_src1, id_src2, id_dst);
 
diff --git a/tests/validation/reference/ArithmeticOperations.cpp b/tests/validation/reference/ArithmeticOperations.cpp
index 062be93..a6205af 100644
--- a/tests/validation/reference/ArithmeticOperations.cpp
+++ b/tests/validation/reference/ArithmeticOperations.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -88,7 +88,9 @@
 template <typename T>
 SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst, ConvertPolicy convert_policy)
 {
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
 
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
 
@@ -98,14 +100,16 @@
 template <>
 SimpleTensor<uint8_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2, SimpleTensor<uint8_t> &dst, ConvertPolicy convert_policy)
 {
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
+
     if(dst.data_type() == DataType::QASYMM8)
     {
         SimpleTensor<float> src1_tmp = convert_from_asymmetric(src1);
         SimpleTensor<float> src2_tmp = convert_from_asymmetric(src2);
         SimpleTensor<float> dst_tmp(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst.data_type());
 
-        Coordinates id_src1, id_src2, id_dst;
-
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst);
 
         dst = convert_to_asymmetric(dst_tmp, dst.quantization_info());
@@ -114,8 +118,6 @@
     else
     {
         // DataType::U8
-        Coordinates id_src1, id_src2, id_dst;
-
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
 
         return dst;
diff --git a/tests/validation/reference/CannyEdgeDetector.cpp b/tests/validation/reference/CannyEdgeDetector.cpp
index 92a11db..a952dde 100644
--- a/tests/validation/reference/CannyEdgeDetector.cpp
+++ b/tests/validation/reference/CannyEdgeDetector.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -117,7 +117,8 @@
     ValidRegion     valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(gradient_size / 2 + 1));
 
     // Sobel computation: U == int16_t or int32_t
-    SimpleTensor<U> gx, gy;
+    SimpleTensor<U> gx{};
+    SimpleTensor<U> gy{};
     std::tie(gx, gy) = sobel<U>(src, gradient_size, border_mode, constant_border_value, GradientDimension::GRAD_XY);
 
     using unsigned_U = typename traits::make_unsigned_conditional_t<U>::type;
@@ -178,7 +179,8 @@
             continue;
         }
 
-        unsigned_U mag_90, mag90;
+        unsigned_U mag_90;
+        unsigned_U mag90;
         switch(grad_dir[i])
         {
             case 0: // North/South edge direction, compare against East/West pixels (left & right)
diff --git a/tests/validation/reference/Comparisons.cpp b/tests/validation/reference/Comparisons.cpp
index a83c365..6d08daf 100644
--- a/tests/validation/reference/Comparisons.cpp
+++ b/tests/validation/reference/Comparisons.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -109,7 +109,9 @@
 {
     SimpleTensor<uint8_t> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), DataType::U8);
 
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, id_src1, id_src2, id_dst);
     return dst;
 }
@@ -119,18 +121,19 @@
 {
     SimpleTensor<uint8_t> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), DataType::U8);
 
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
+
     if(src1.data_type() == DataType::QASYMM8)
     {
         SimpleTensor<float> src1_tmp = convert_from_asymmetric(src1);
         SimpleTensor<float> src2_tmp = convert_from_asymmetric(src2);
-
-        Coordinates id_src1, id_src2, id_dst;
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst, id_src1, id_src2, id_dst);
     }
     else
     {
         // DataType::U8
-        Coordinates id_src1, id_src2, id_dst;
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, id_src1, id_src2, id_dst);
     }
     return dst;
diff --git a/tests/validation/reference/WidthConcatenateLayer.cpp b/tests/validation/reference/ConcatenateLayer.cpp
similarity index 69%
rename from tests/validation/reference/WidthConcatenateLayer.cpp
rename to tests/validation/reference/ConcatenateLayer.cpp
index 3854339..af818a5 100644
--- a/tests/validation/reference/WidthConcatenateLayer.cpp
+++ b/tests/validation/reference/ConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,9 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "WidthConcatenateLayer.h"
+#include "ConcatenateLayer.h"
 
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Permute.h"
 
 namespace arm_compute
 {
@@ -33,24 +34,23 @@
 {
 namespace reference
 {
+namespace
+{
 template <typename T>
 SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst)
 {
     // Create reference
     std::vector<TensorShape> shapes;
-
+    shapes.reserve(srcs.size());
     for(const auto &src : srcs)
     {
         shapes.emplace_back(src.shape());
     }
-
     // Compute reference
     int       width_offset = 0;
     const int width_out    = dst.shape().x();
-
     // Set output tensor to 0
     std::fill_n(dst.data(), dst.num_elements(), 0);
-
     for(const auto &src : srcs)
     {
         ARM_COMPUTE_ERROR_ON(width_offset >= width_out);
@@ -89,13 +89,52 @@
         }
         width_offset += width;
     }
-
     return dst;
 }
 
 template SimpleTensor<float> widthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst);
 template SimpleTensor<half> widthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst);
 template SimpleTensor<uint8_t> widthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst);
+} // namespace
+
+template <typename T>
+SimpleTensor<T> concatenate_layer(std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst, unsigned int axis)
+{
+    switch(axis)
+    {
+        case Window::DimX:
+        {
+            return widthconcatenate_layer(srcs, dst);
+        }
+        case Window::DimY:
+        {
+            for(auto &t : srcs)
+            {
+                t = reference::permute<T>(t, PermutationVector(1U, 0U));
+            }
+            dst = reference::permute<T>(dst, PermutationVector(1U, 0U));
+            return reference::permute<T>(widthconcatenate_layer(srcs, dst), PermutationVector(1U, 0U));
+        }
+        case Window::DimZ:
+        {
+            for(auto &t : srcs)
+            {
+                t = reference::permute<T>(t, PermutationVector(2U, 1U, 0U));
+            }
+            dst = reference::permute<T>(dst, PermutationVector(2U, 1U, 0U));
+            return reference::permute<T>(widthconcatenate_layer(srcs, dst), PermutationVector(2U, 1U, 0U));
+        }
+        default:
+        {
+            ARM_COMPUTE_ERROR("Not supported");
+            return dst;
+        }
+    }
+}
+
+template SimpleTensor<float> concatenate_layer(std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst, unsigned int axis);
+template SimpleTensor<half> concatenate_layer(std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst, unsigned int axis);
+template SimpleTensor<uint8_t> concatenate_layer(std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst, unsigned int axis);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/WidthConcatenateLayer.h b/tests/validation/reference/ConcatenateLayer.h
similarity index 81%
rename from tests/validation/reference/WidthConcatenateLayer.h
rename to tests/validation/reference/ConcatenateLayer.h
index 0f1f428..14fd097 100644
--- a/tests/validation/reference/WidthConcatenateLayer.h
+++ b/tests/validation/reference/ConcatenateLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
-#define __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
+#ifndef __ARM_COMPUTE_TEST_CONCATENATE_LAYER_H__
+#define __ARM_COMPUTE_TEST_CONCATENATE_LAYER_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -37,9 +37,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst);
+SimpleTensor<T> concatenate_layer(std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst, unsigned int axis);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__ */
+#endif /* __ARM_COMPUTE_TEST_CONCATENATE_LAYER_H__ */
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
index f41a6fc..6909011 100644
--- a/tests/validation/reference/ConvolutionLayer.cpp
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -111,10 +111,15 @@
 }
 template <typename T, typename TB>
 SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
-                                  const Size2D &dilation, unsigned int num_groups)
+                                  const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info)
 {
+    // if no explicit quantization has been set you the same as src
+    if(out_quant_info == QuantizationInfo())
+    {
+        out_quant_info = src.quantization_info();
+    }
     // Create reference
-    SimpleTensor<T> dst{ output_shape, src.data_type(), 1, src.quantization_info() };
+    SimpleTensor<T> dst{ output_shape, src.data_type(), 1, out_quant_info };
 
     if(src.data_layout() == DataLayout::NHWC)
     {
@@ -131,11 +136,11 @@
 }
 
 template SimpleTensor<float> convolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
-                                               const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups);
+                                               const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info);
 template SimpleTensor<half> convolution_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &output_shape,
-                                              const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups);
+                                              const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info);
 template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
-                                                 const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups);
+                                                 const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ConvolutionLayer.h b/tests/validation/reference/ConvolutionLayer.h
index ccce53a..c51a9b3 100644
--- a/tests/validation/reference/ConvolutionLayer.h
+++ b/tests/validation/reference/ConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@
 {
 template <typename T, typename TB>
 SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
-                                  const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
+                                  const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1, QuantizationInfo out_quant_info = QuantizationInfo());
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/CropResize.cpp b/tests/validation/reference/CropResize.cpp
new file mode 100644
index 0000000..8cfce97
--- /dev/null
+++ b/tests/validation/reference/CropResize.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CropResize.h"
+#include "Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+SimpleTensor<float> scale_image(const SimpleTensor<float> &in, const TensorShape &out_shape, InterpolationPolicy policy, float extrapolation_value)
+{
+    ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NHWC);
+
+    SimpleTensor<float> out{ out_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC };
+    // Compute the ratio between source width/height and destination width/height
+    const auto wr = static_cast<float>(in.shape()[1]) / static_cast<float>(out_shape[1]);
+    const auto hr = static_cast<float>(in.shape()[2]) / static_cast<float>(out_shape[2]);
+
+    const auto width  = static_cast<int>(in.shape().y());
+    const auto height = static_cast<int>(in.shape().z());
+
+    Window win;
+    win.use_tensor_dimensions(out_shape);
+    execute_window_loop(win, [&](const Coordinates & out_id)
+    {
+        Coordinates in_id(out_id);
+        int         idw = in_id.y();
+        int         idh = in_id.z();
+
+        switch(policy)
+        {
+            case InterpolationPolicy::NEAREST_NEIGHBOR:
+            {
+                //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords
+                float x_src = (idw + 0.5f) * wr;
+                float y_src = (idh + 0.5f) * hr;
+                in_id.set(1, x_src);
+                in_id.set(2, y_src);
+
+                // If coordinates in range of tensor's width or height
+                if(is_valid_pixel_index(x_src, y_src, width, height, 0))
+                {
+                    *reinterpret_cast<float *>(out(out_id)) = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                }
+                else
+                {
+                    *reinterpret_cast<float *>(out(out_id)) = extrapolation_value;
+                }
+                break;
+            }
+            case InterpolationPolicy::BILINEAR:
+            {
+                float x_src = idw * wr;
+                float y_src = idh * hr;
+                in_id.set(1, std::floor(x_src));
+                in_id.set(2, std::floor(y_src));
+                if(is_valid_pixel_index(x_src, y_src, width, height, 0))
+                {
+                    const int id_w = in_id[1];
+                    const int id_h = in_id[2];
+
+                    const float dx   = x_src - id_w;
+                    const float dy   = y_src - id_h;
+                    const float dx_1 = 1.0f - dx;
+                    const float dy_1 = 1.0f - dy;
+
+                    in_id.set(1, id_w);
+                    in_id.set(2, id_h);
+                    const float tl = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                    in_id.set(1, id_w + 1);
+                    in_id.set(2, id_h);
+                    const float tr = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                    in_id.set(1, id_w);
+                    in_id.set(2, id_h + 1);
+                    const float bl = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                    in_id.set(1, id_w + 1);
+                    in_id.set(2, id_h + 1);
+                    const float br = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+
+                    *reinterpret_cast<float *>(out(out_id)) = tl * (dx_1 * dy_1) + tr * (dx * dy_1) + bl * (dx_1 * dy) + br * (dx * dy);
+                }
+                else
+                {
+                    *reinterpret_cast<float *>(out(out_id)) = extrapolation_value;
+                }
+                break;
+            }
+            default:
+                ARM_COMPUTE_ERROR("Unsupported interpolation mode");
+        }
+    });
+
+    return out;
+}
+
+template <typename T>
+SimpleTensor<float> crop_image(const SimpleTensor<T> &src, Coordinates start, Coordinates end, int32_t batch_index, float extrapolation_value)
+{
+    TensorShape out_shape(src.shape()[0], abs(end[0] - start[0]) + 1, abs(end[1] - start[1]) + 1);
+
+    SimpleTensor<float> out{ out_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC };
+
+    Window win;
+    win.use_tensor_dimensions(out_shape);
+    execute_window_loop(win, [&](const Coordinates & id)
+    {
+        bool        out_of_bounds = false;
+        Coordinates offset(id[0], 0, 0, batch_index);
+        for(uint32_t i = 1; i < 3; ++i)
+        {
+            offset.set(i, end[i - 1] < start[i - 1] ? start[i - 1] - id[i] : start[i - 1] + id[i]);
+            if(offset[i] < 0 || static_cast<uint32_t>(offset[i]) > src.shape()[i] - 1)
+            {
+                out_of_bounds = true;
+                break;
+            }
+        }
+        if(!out_of_bounds)
+        {
+            *reinterpret_cast<float *>(out(id)) = static_cast<float>(*reinterpret_cast<const T *>(src(offset)));
+        }
+        else
+        {
+            *reinterpret_cast<float *>(out(id)) = extrapolation_value;
+        }
+    });
+    return out;
+}
+
+} // namespace
+
+template <typename T>
+SimpleTensor<float> crop_and_resize(const SimpleTensor<T> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                    Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value)
+{
+    ARM_COMPUTE_ERROR_ON(src.shape().num_dimensions() > 4);
+    ARM_COMPUTE_ERROR_ON(src.data_layout() != DataLayout::NHWC);
+
+    const TensorShape   out_shape(src.shape()[0], crop_size.x, crop_size.y, boxes.shape()[1]);
+    SimpleTensor<float> out{ out_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC };
+
+    const TensorShape scaled_image_shape(src.shape()[0], crop_size.x, crop_size.y);
+
+    for(uint32_t i = 0; i < boxes.shape()[1]; ++i)
+    {
+        Coordinates start = Coordinates(std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(1, i)))) * (src.shape()[1] - 1) + 0.5f),
+                                        std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(0, i)))) * (src.shape()[2] - 1) + 0.5f));
+        Coordinates end = Coordinates(std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(3, i)))) * (src.shape()[1] - 1) + 0.5f),
+                                      std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(2, i)))) * (src.shape()[2] - 1) + 0.5f));
+        SimpleTensor<float> cropped = crop_image(src, start, end, box_ind[i], extrapolation_value);
+        SimpleTensor<float> scaled  = scale_image(cropped, scaled_image_shape, method, extrapolation_value);
+        std::copy_n(reinterpret_cast<float *>(scaled.data()), scaled.num_elements(), reinterpret_cast<float *>(out(Coordinates(0, 0, 0, i))));
+    }
+    return out;
+}
+
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<float> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<uint16_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<uint32_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<int16_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<int32_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<half> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/WidthConcatenateLayer.h b/tests/validation/reference/CropResize.h
similarity index 76%
copy from tests/validation/reference/WidthConcatenateLayer.h
copy to tests/validation/reference/CropResize.h
index 0f1f428..517c24b 100644
--- a/tests/validation/reference/WidthConcatenateLayer.h
+++ b/tests/validation/reference/CropResize.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,13 +21,11 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
-#define __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
+#ifndef __ARM_COMPUTE_TEST_CROP_RESIZE_H__
+#define __ARM_COMPUTE_TEST_CROP_RESIZE_H__
 
 #include "tests/SimpleTensor.h"
 
-#include <vector>
-
 namespace arm_compute
 {
 namespace test
@@ -37,9 +35,10 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst);
+SimpleTensor<float> crop_and_resize(const SimpleTensor<T> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                    Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__ */
+#endif /* __ARM_COMPUTE_TEST_CROP_RESIZE_H__ */
diff --git a/tests/validation/reference/DFT.cpp b/tests/validation/reference/DFT.cpp
new file mode 100644
index 0000000..6ad1b9e
--- /dev/null
+++ b/tests/validation/reference/DFT.cpp
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "DFT.h"
+
+#include "PadLayer.h"
+#include "Permute.h"
+#include "Reverse.h"
+#include "SliceOperations.h"
+
+#include <cmath>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+/** Performs an one dimensional DFT on a given real sequence.
+ *
+ * @param[in]  src_ptr Pointer to the real input sequence.
+ * @param[in]  N       Size of input sequence.
+ * @param[out] dst_ptr Pointer to the complex output sequence.
+ * @param[out] K       Size of the output sequence
+ */
+template <typename T>
+void rdft_1d_step(const T *src_ptr, size_t N, T *dst_ptr, size_t K)
+{
+    for(unsigned int k = 0; k < K; ++k)
+    {
+        float Xr = 0;
+        float Xi = 0;
+        for(unsigned int n = 0; n < N; ++n)
+        {
+            const float alpha = (2 * M_PI * k * n) / N;
+            const float val_r = src_ptr[n];
+            // Assuming DFT from the R domain thus skipping imaginary calculations
+            Xr += val_r * cos(alpha);
+            Xi -= val_r * sin(alpha);
+        }
+
+        dst_ptr[k * 2]     = Xr;
+        dst_ptr[k * 2 + 1] = Xi;
+    }
+}
+
+/** Performs an one dimensional DFT on a given complex sequence.
+ *
+ * @param[in]  src_ptr Pointer to the complex input sequence.
+ * @param[out] dst_ptr Pointer to the complex output sequence.
+ * @param[in]  N       Size of the sequences
+ */
+template <typename T>
+void dft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
+{
+    for(unsigned int k = 0; k < N; ++k)
+    {
+        float Xr = 0;
+        float Xi = 0;
+        for(unsigned int n = 0; n < N; ++n)
+        {
+            const float alpha     = (2 * M_PI * k * n) / N;
+            const float val_r     = src_ptr[2 * n];
+            const float val_i     = src_ptr[2 * n + 1];
+            const float cos_alpha = cos(alpha);
+            const float sin_alpha = sin(alpha);
+
+            Xr += val_r * cos_alpha + val_i * sin_alpha;
+            Xi += val_i * cos_alpha - val_r * sin_alpha;
+        }
+
+        dst_ptr[k * 2]     = Xr;
+        dst_ptr[k * 2 + 1] = Xi;
+    }
+}
+
+/** Performs an one dimensional inverse DFT on a given real sequence.
+ *
+ * @param[in]  src_ptr Pointer to the real input sequence.
+ * @param[in]  K       Size of input sequence.
+ * @param[out] dst_ptr Pointer to the complex output sequence.
+ * @param[out] N       Size of the output sequence
+ */
+template <typename T>
+void irdft_1d_step(const T *src_ptr, size_t K, T *dst_ptr, size_t N)
+{
+    const bool         is_odd     = N % 2;
+    const unsigned int Nleft      = N - K;
+    const int          tail_start = is_odd ? K - 1 : K - 2;
+
+    for(unsigned int n = 0; n < N; ++n)
+    {
+        float xr = 0;
+        for(unsigned int k = 0; k < K; ++k)
+        {
+            const float alpha = (2 * M_PI * k * n) / N;
+            xr += src_ptr[2 * k] * cos(alpha) - src_ptr[2 * k + 1] * sin(alpha);
+        }
+
+        unsigned int j = tail_start;
+        for(unsigned int k = 0; k < Nleft; ++k)
+        {
+            const float alpha = (2 * M_PI * (k + K) * n) / N;
+            xr += src_ptr[2 * j] * cos(alpha) + src_ptr[2 * j + 1] * sin(alpha);
+            --j;
+        }
+
+        dst_ptr[n] = xr;
+    }
+}
+
+/** Performs an one dimensional inverse DFT on a given complex sequence.
+ *
+ * @param[in]  src_ptr Pointer to the complex input sequence.
+ * @param[out] dst_ptr Pointer to the complex output sequence.
+ * @param[in]  N       Size of the sequences
+ */
+template <typename T>
+void idft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
+{
+    for(unsigned int n = 0; n < N; ++n)
+    {
+        float xr = 0;
+        float xi = 0;
+        for(unsigned int k = 0; k < N; ++k)
+        {
+            const float alpha     = (2 * M_PI * k * n) / N;
+            const float cos_alpha = cos(alpha);
+            const float sin_alpha = sin(alpha);
+            const float val_r     = src_ptr[2 * k];
+            const float val_i     = src_ptr[2 * k + 1];
+
+            xr += val_r * cos_alpha - val_i * sin_alpha;
+            xi += val_i * cos_alpha + val_r * sin_alpha;
+        }
+
+        dst_ptr[2 * n]     = xr;
+        dst_ptr[2 * n + 1] = xi;
+    }
+}
+
+template <typename T>
+SimpleTensor<T> rdft_1d_core(const SimpleTensor<T> &src, FFTDirection direction, bool is_odd)
+{
+    // Performs only rdft
+    ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Forward && src.num_channels() != 1);
+    ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Inverse && src.num_channels() != 2);
+
+    const unsigned int inverse_tail = is_odd ? 1 : 0;
+    const unsigned int N            = src.shape()[0];
+    const unsigned int K            = direction == FFTDirection::Forward ? N / 2 + 1 : (N - 1) * 2 + inverse_tail;
+    const unsigned int num_channels = direction == FFTDirection::Forward ? 2 : 1;
+
+    TensorShape dst_shape = src.shape();
+    dst_shape.set(0, K);
+
+    SimpleTensor<T> dst(dst_shape, src.data_type(), num_channels);
+
+    const unsigned int upper_dims = src.shape().total_size_upper(1);
+    for(unsigned int du = 0; du < upper_dims; ++du)
+    {
+        const T *src_row_ptr = src.data() + du * N * src.num_channels();
+        T       *dst_row_ptr = dst.data() + du * K * dst.num_channels();
+        direction == FFTDirection::Forward ? rdft_1d_step(src_row_ptr, N, dst_row_ptr, K) : irdft_1d_step(src_row_ptr, N, dst_row_ptr, K);
+    }
+
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<T> dft_1d_core(const SimpleTensor<T> &src, FFTDirection direction)
+{
+    ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
+
+    const unsigned int N = src.shape()[0];
+
+    SimpleTensor<T> dst(src.shape(), src.data_type(), src.num_channels());
+
+    const unsigned int upper_dims = src.shape().total_size_upper(1);
+    for(unsigned int du = 0; du < upper_dims; ++du)
+    {
+        const T *src_row_ptr = src.data() + du * N * src.num_channels();
+        T       *dst_row_ptr = dst.data() + du * N * dst.num_channels();
+        direction == FFTDirection::Forward ? dft_1d_step(src_row_ptr, dst_row_ptr, N) : idft_1d_step(src_row_ptr, dst_row_ptr, N);
+    }
+
+    return dst;
+}
+
+/** Scale a tensor by a given scaling factor.
+ *
+ * @param[in,out] tensor         Tensor to scale.
+ * @param[in]     scaling_factor Scaling to scale the tensor data with.
+ */
+template <typename T>
+void scale(SimpleTensor<T> &tensor, T scaling_factor)
+{
+    const int total_elements = tensor.num_elements() * tensor.num_channels();
+    T        *data_ptr       = tensor.data();
+    for(int i = 0; i < total_elements; ++i)
+    {
+        data_ptr[i] /= scaling_factor;
+    }
+}
+
+/** Performs a complex element-wise multiplication with reduction across the channels axis.
+ *
+ * @param[in] input   Input tensor.
+ * @param[in] weights Weights tensor.
+ *
+ * @return Output tensor.
+ */
+template <typename T>
+SimpleTensor<T> complex_mul_and_reduce(const SimpleTensor<T> &input, const SimpleTensor<T> &weights)
+{
+    const int W  = input.shape().x();
+    const int H  = input.shape().y();
+    const int Ci = input.shape().z();
+    const int Co = weights.shape()[3];
+    const int N  = input.shape().total_size() / (W * H * Ci);
+
+    TensorShape output_shape = input.shape();
+    output_shape.set(2, Co);
+    SimpleTensor<T> dst(output_shape, input.data_type(), input.num_channels());
+
+    // MemSet dst memory to zero
+    std::memset(dst.data(), 0, dst.size());
+
+    for(int b = 0; b < N; ++b)
+    {
+        for(int co = 0; co < Co; ++co)
+        {
+            for(int ci = 0; ci < Ci; ++ci)
+            {
+                for(int h = 0; h < H; ++h)
+                {
+                    for(int w = 0; w < W; ++w)
+                    {
+                        size_t            i_index  = w + h * W + ci * H * W + b * H * W * Ci;
+                        size_t            w_index  = w + h * W + ci * H * W + co * H * W * Ci;
+                        size_t            o_index  = w + h * W + co * H * W + b * H * W * Co;
+                        const Coordinates i_coords = index2coords(input.shape(), i_index);
+                        const Coordinates w_coords = index2coords(weights.shape(), w_index);
+                        const Coordinates o_coords = index2coords(dst.shape(), o_index);
+
+                        auto i_ptr = static_cast<const T *>(input(i_coords));
+                        auto w_ptr = static_cast<const T *>(weights(w_coords));
+                        auto o_ptr = static_cast<T *>(dst(o_coords));
+
+                        const T Rin = i_ptr[0];
+                        const T Iin = i_ptr[1];
+                        const T Rw  = w_ptr[0];
+                        const T Iw  = w_ptr[1];
+
+                        o_ptr[0] += Rin * Rw - Iin * Iw;
+                        o_ptr[1] += Rin * Iw + Rw * Iin;
+                    }
+                }
+            }
+        }
+    }
+    return dst;
+}
+} // namespace
+
+template <typename T>
+SimpleTensor<T> rdft_1d(const SimpleTensor<T> &src)
+{
+    return rdft_1d_core(src, FFTDirection::Forward, false);
+}
+
+template <typename T>
+SimpleTensor<T> ridft_1d(const SimpleTensor<T> &src, bool is_odd)
+{
+    auto dst = rdft_1d_core(src, FFTDirection::Inverse, is_odd);
+
+    const T scaling_factor = dst.shape()[0];
+    scale(dst, scaling_factor);
+
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<T> dft_1d(const SimpleTensor<T> &src, FFTDirection direction)
+{
+    auto dst = dft_1d_core(src, direction);
+    if(direction == FFTDirection::Inverse)
+    {
+        const T scaling_factor = dst.shape()[0];
+        scale(dst, scaling_factor);
+    }
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<T> rdft_2d(const SimpleTensor<T> &src)
+{
+    ARM_COMPUTE_ERROR_ON(src.num_channels() != 1);
+    constexpr FFTDirection direction = FFTDirection::Forward;
+
+    auto first_pass  = rdft_1d_core(src, direction, false);
+    auto transposed  = permute(first_pass, PermutationVector(1U, 0U));
+    auto second_pass = dft_1d_core(transposed, direction);
+    return permute(second_pass, PermutationVector(1U, 0U));
+}
+
+template <typename T>
+SimpleTensor<T> ridft_2d(const SimpleTensor<T> &src, bool is_odd)
+{
+    ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
+    constexpr FFTDirection direction = FFTDirection::Inverse;
+
+    auto transposed   = permute(src, PermutationVector(1U, 0U));
+    auto first_pass   = dft_1d_core(transposed, direction);
+    auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
+    auto dst          = rdft_1d_core(transposed_2, direction, is_odd);
+
+    const T scaling_factor = dst.shape()[0] * dst.shape()[1];
+    scale(dst, scaling_factor);
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<T> dft_2d(const SimpleTensor<T> &src, FFTDirection direction)
+{
+    ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
+
+    if(direction == FFTDirection::Forward)
+    {
+        auto first_pass  = dft_1d_core(src, direction);
+        auto transposed  = permute(first_pass, PermutationVector(1U, 0U));
+        auto second_pass = dft_1d_core(transposed, direction);
+        return permute(second_pass, PermutationVector(1U, 0U));
+    }
+    else
+    {
+        auto transposed   = permute(src, PermutationVector(1U, 0U));
+        auto first_pass   = dft_1d_core(transposed, direction);
+        auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
+        auto dst          = dft_1d_core(transposed_2, direction);
+
+        const T scaling_factor = dst.shape()[0] * dst.shape()[1];
+        scale(dst, scaling_factor);
+
+        return dst;
+    }
+}
+
+template <typename T>
+SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, const PadStrideInfo &conv_info)
+{
+    // Pad input to full padding
+    const PaddingList padding_in = { { 0, w.shape()[0] - 1 }, { 0, w.shape()[1] - 1 } };
+    auto              padded_src = pad_layer(src, padding_in);
+
+    // Flip weights
+    std::vector<uint32_t>  axis_v = { 0, 1 };
+    SimpleTensor<uint32_t> axis{ TensorShape(2U), DataType::U32 };
+    std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data());
+    auto flipped_w = reverse(w, axis);
+
+    // Pad weights to have the same size as input
+    const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } };
+    auto              padded_w   = pad_layer(flipped_w, paddings_w);
+
+    // Transform input and weights to frequency domain
+    auto Fsrc = rdft_2d(padded_src);
+    auto Fw   = rdft_2d(padded_w);
+
+    // Perform dot product
+    auto Fdst = complex_mul_and_reduce(Fsrc, Fw);
+
+    // Transform output back to frequency domain
+    auto conv_res = ridft_2d(Fdst);
+
+    // Slice output
+    const int start_left = w.shape().x() - conv_info.pad_left() - 1;
+    const int start_top  = w.shape().y() - conv_info.pad_top() - 1;
+    const int end_right  = conv_res.shape().x() - (w.shape().x() - conv_info.pad_right() - 1);
+    const int end_botton = conv_res.shape().y() - (w.shape().y() - conv_info.pad_bottom() - 1);
+    return slice(conv_res, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
+}
+
+template SimpleTensor<float> rdft_1d(const SimpleTensor<float> &src);
+template SimpleTensor<float> ridft_1d(const SimpleTensor<float> &src, bool is_odd);
+template SimpleTensor<float> dft_1d(const SimpleTensor<float> &src, FFTDirection direction);
+
+template SimpleTensor<float> rdft_2d(const SimpleTensor<float> &src);
+template SimpleTensor<float> ridft_2d(const SimpleTensor<float> &src, bool is_odd);
+template SimpleTensor<float> dft_2d(const SimpleTensor<float> &src, FFTDirection direction);
+
+template SimpleTensor<float> conv2d_dft(const SimpleTensor<float> &src, const SimpleTensor<float> &w, const PadStrideInfo &conv_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/DFT.h b/tests/validation/reference/DFT.h
new file mode 100644
index 0000000..a3a10ab
--- /dev/null
+++ b/tests/validation/reference/DFT.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_FFT_H__
+#define __ARM_COMPUTE_TEST_FFT_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+enum class FFTDirection
+{
+    Forward,
+    Inverse
+};
+
+/** Performs an one dimensional DFT on a real input.
+ *
+ * @param[in] src Source tensor.
+ *
+ * @return Complex output of length n/2 + 1 due to symmetry.
+ */
+template <typename T>
+SimpleTensor<T> rdft_1d(const SimpleTensor<T> &src);
+
+/** Performs an one dimensional inverse DFT on a real input.
+ *
+ * @param[in] src    Source tensor.
+ * @param[in] is_odd (Optional) Specifies if the output has odd dimensions.
+ *                   Is used by the inverse variant to reconstruct odd sequences.
+ *
+ * @return Complex output of length n/2 + 1 due to symmetry.
+ */
+template <typename T>
+SimpleTensor<T> ridft_1d(const SimpleTensor<T> &src, bool is_odd = false);
+
+/**  Performs an one dimensional DFT on a complex input.
+ *
+ * @param[in] src       Source tensor.
+ * @param[in] direction Direction of the DFT.
+ *
+ * @return Complex output of same length as input.
+ */
+template <typename T>
+SimpleTensor<T> dft_1d(const SimpleTensor<T> &src, FFTDirection direction);
+
+/** Performs a two dimensional DFT on a real input.
+ *
+ * @param[in] src Source tensor.
+ *
+ * @return Complex output of length n/2 + 1 across width due to symmetry and height of same size as the input.
+ */
+template <typename T>
+SimpleTensor<T> rdft_2d(const SimpleTensor<T> &src);
+
+/** Performs a two dimensional inverse DFT on a real input.
+ *
+ * @param[in] src    Source tensor.
+ * @param[in] is_odd (Optional) Specifies if the output has odd dimensions across width.
+ *                   Is used by the inverse variant to reconstruct odd sequences.
+ *
+ * @return Complex output of length n/2 + 1 across width due to symmetry and height of same size as the input.
+ */
+template <typename T>
+SimpleTensor<T> ridft_2d(const SimpleTensor<T> &src, bool is_odd = false);
+
+/**  Performs a two dimensional DFT on a complex input.
+ *
+ * @param[in] src       Source tensor.
+ * @param[in] direction Direction of the DFT.
+ *
+ * @return Complex output of same length as input.
+ */
+template <typename T>
+SimpleTensor<T> dft_2d(const SimpleTensor<T> &src, FFTDirection direction);
+
+/** Performs and DFT based convolution on a real input.
+ *
+ * @param[in] src       Source tensor.
+ * @param[in] w         Weights tensor.
+ * @param[in] conv_info Convolution related metadata.
+ *
+ * @return The output tensor.
+ */
+template <typename T>
+SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, const PadStrideInfo &conv_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_FFT_H__ */
diff --git a/tests/validation/reference/DepthConcatenateLayer.cpp b/tests/validation/reference/DepthConcatenateLayer.cpp
index 139675d..22271a0 100644
--- a/tests/validation/reference/DepthConcatenateLayer.cpp
+++ b/tests/validation/reference/DepthConcatenateLayer.cpp
@@ -38,7 +38,7 @@
 {
     // Create reference
     std::vector<TensorShape> shapes;
-
+    shapes.reserve(srcs.size());
     for(const auto &src : srcs)
     {
         shapes.emplace_back(src.shape());
@@ -66,7 +66,7 @@
             {
                 auto       ptr_slice = static_cast<T *>(dst(Coordinates(0, 0, slice, b)));
                 const auto num_elems_in_slice((dst.num_elements() / depth_out) * src.shape().z());
-                std::transform(ptr_slice, ptr_slice + num_elems_in_slice, ptr_slice, [src, dst](T t)
+                std::transform(ptr_slice, ptr_slice + num_elems_in_slice, ptr_slice, [src, dst](T)
                 {
                     return dst.quantization_info().quantize(src.quantization_info().dequantize(0), RoundingPolicy::TO_NEAREST_UP);
                 });
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index 39429e2..90ecffb 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,8 +50,10 @@
  */
 template <typename T, typename TB>
 SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
-                                      unsigned int depth_multiplier)
+                                      unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info)
 {
+    ARM_COMPUTE_UNUSED(out_quant_info);
+
     SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
 
     // Compute reference
@@ -63,18 +65,24 @@
     const int input_depth   = src.shape().z();
     const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
 
-    const int filter_half_width  = filter_width / 2;
-    const int filter_half_height = filter_height / 2;
-
     const int pad_left   = conv_info.pad_left();
     const int pad_top    = conv_info.pad_top();
     const int pad_right  = conv_info.pad_right();
     const int pad_bottom = conv_info.pad_bottom();
 
-    const int minimum_x = -pad_left + filter_half_width;
-    const int minimum_y = -pad_top + filter_half_height;
-    const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
-    const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
+    const float patch_width  = (filter_width + (dilation.x() - 1) * (filter_width - 1));
+    const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
+
+    const int patch_half_width_floor  = patch_width / 2;
+    const int patch_half_height_floor = patch_height / 2;
+
+    const auto patch_half_width_ceil  = static_cast<int>(std::ceil(patch_width / 2));
+    const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
+
+    const int minimum_x = -pad_left + patch_half_width_floor;
+    const int minimum_y = -pad_top + patch_half_height_floor;
+    const int maximum_x = input_width + pad_left + pad_right - static_cast<int>(patch_width);
+    const int maximum_y = input_height + pad_top + pad_bottom - static_cast<int>(patch_height);
 
     const T border_value(0);
 
@@ -87,21 +95,20 @@
             {
                 const int out_z = z * depth_multiplier + m;
 
-                for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+                for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
                 {
-                    for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
+                    for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
                     {
                         Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
                         size_t      filter_offset = filter_plane * out_z;
 
                         T val(0);
-                        for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
+                        for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
                         {
-                            for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
+                            for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
                             {
                                 coords.set(0, i);
                                 coords.set(1, j);
-
                                 val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
                                 ++filter_offset;
                             }
@@ -119,9 +126,14 @@
 
 template <>
 SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
-                                            const PadStrideInfo &conv_info, unsigned int depth_multiplier)
+                                            const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info)
 {
-    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.quantization_info() };
+    // if no explicit quantization has been set you the same as src
+    if(out_quant_info == QuantizationInfo(0.0f, 0))
+    {
+        out_quant_info = src.quantization_info();
+    }
+    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, out_quant_info };
 
     // Create reference
     const int   input_offset   = -src.quantization_info().offset;
@@ -145,18 +157,24 @@
     const int input_depth   = src.shape().z();
     const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
 
-    const int filter_half_width  = filter_width / 2;
-    const int filter_half_height = filter_height / 2;
-
     const int pad_left   = conv_info.pad_left();
     const int pad_top    = conv_info.pad_top();
     const int pad_right  = conv_info.pad_right();
     const int pad_bottom = conv_info.pad_bottom();
 
-    const int minimum_x = -pad_left + filter_half_width;
-    const int minimum_y = -pad_top + filter_half_height;
-    const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
-    const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
+    const float patch_width  = (filter_width + (dilation.x() - 1) * (filter_width - 1));
+    const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
+
+    const int patch_half_width_floor  = patch_width / 2;
+    const int patch_half_height_floor = patch_height / 2;
+
+    const auto patch_half_width_ceil  = static_cast<int>(std::ceil(patch_width / 2));
+    const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
+
+    const int minimum_x = -pad_left + patch_half_width_floor;
+    const int minimum_y = -pad_top + patch_half_height_floor;
+    const int maximum_x = input_width + pad_left + pad_right - static_cast<int>(patch_width);
+    const int maximum_y = input_height + pad_top + pad_bottom - static_cast<int>(patch_height);
 
     int out_pos = 0;
     for(int r = 0; r < num_batches; ++r)
@@ -168,17 +186,17 @@
                 const int     out_z    = z * depth_multiplier + m;
                 const int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(out_z)));
 
-                for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+                for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
                 {
-                    for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
+                    for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
                     {
                         Coordinates coords(x, y, z, r);
                         int         filter_offset = filter_plane * out_z;
 
                         int32_t val = 0;
-                        for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j)
+                        for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
                         {
-                            for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i)
+                            for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
                             {
                                 coords.set(0, i);
                                 coords.set(1, j);
@@ -206,10 +224,10 @@
 }
 
 template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
-                                                   const PadStrideInfo &conv_info, unsigned int depth_multiplier);
+                                                   const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info);
 
 template SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
-                                                  const PadStrideInfo &conv_info, unsigned int depth_multiplier);
+                                                  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.h b/tests/validation/reference/DepthwiseConvolutionLayer.h
index bab3387..ac70de0 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.h
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@
 {
 template <typename T, typename TB>
 SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
-                                      unsigned int depth_multiplier);
+                                      unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U, 1U), QuantizationInfo out_quant_info = QuantizationInfo(0.0f, 0));
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp
index 33096a1..df50c14 100644
--- a/tests/validation/reference/DequantizationLayer.cpp
+++ b/tests/validation/reference/DequantizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,36 +31,24 @@
 {
 namespace reference
 {
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type>
-SimpleTensor<float> dequantization_layer(const SimpleTensor<T> &src, const SimpleTensor<float> &min_max)
+template <typename T>
+SimpleTensor<T> dequantization_layer(const SimpleTensor<uint8_t> &src)
 {
-    // Create reference
-    SimpleTensor<float> dst{ src.shape(), DataType::F32 };
+    const DataType          dst_data_type     = std::is_same<T, float>::value ? DataType::F32 : DataType::F16;
+    const QuantizationInfo &quantization_info = src.quantization_info();
 
-    // Compute reference
-    const int width       = src.shape().x();
-    const int height      = src.shape().y();
-    const int depth       = src.shape().z();
-    const int stride_w    = width * height * depth;
-    const int num_batches = min_max.shape().total_size_upper(1);
+    SimpleTensor<T> dst{ src.shape(), dst_data_type };
 
-    for(int k = 0; k < num_batches; ++k)
+    for(int i = 0; i < src.num_elements(); ++i)
     {
-        const float min     = min_max[k * 2 + 0];
-        const float max     = min_max[k * 2 + 1];
-        const float range   = max - min;
-        const float scaling = range / 255.0f;
-
-        for(int i = 0; i < stride_w; ++i)
-        {
-            dst[i + k * stride_w] = (static_cast<float>(src[i + k * stride_w]) * scaling) + min;
-        }
+        dst[i] = static_cast<T>(quantization_info.dequantize(src[i]));
     }
 
     return dst;
 }
 
-template SimpleTensor<float> dequantization_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<float> &min_max);
+template SimpleTensor<half> dequantization_layer(const SimpleTensor<uint8_t> &src);
+template SimpleTensor<float> dequantization_layer(const SimpleTensor<uint8_t> &src);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DequantizationLayer.h b/tests/validation/reference/DequantizationLayer.h
index 1a8adcf..1d0e54b 100644
--- a/tests/validation/reference/DequantizationLayer.h
+++ b/tests/validation/reference/DequantizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-SimpleTensor<float> dequantization_layer(const SimpleTensor<T> &src, const SimpleTensor<float> &min_max);
+template <typename T>
+SimpleTensor<T> dequantization_layer(const SimpleTensor<uint8_t> &src);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp
index 6d533ed..2ffb0fa 100644
--- a/tests/validation/reference/ElementwiseOperations.cpp
+++ b/tests/validation/reference/ElementwiseOperations.cpp
@@ -124,7 +124,9 @@
 template <typename T>
 SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst, ConvertPolicy convert_policy)
 {
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
 
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
 
@@ -140,7 +142,9 @@
         SimpleTensor<float> src2_tmp = convert_from_asymmetric(src2);
         SimpleTensor<float> dst_tmp(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst.data_type());
 
-        Coordinates id_src1, id_src2, id_dst;
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
 
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst);
 
@@ -150,7 +154,9 @@
     else
     {
         // DataType::U8
-        Coordinates id_src1, id_src2, id_dst;
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
 
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
 
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
index d65d0ca..07ddf6d 100644
--- a/tests/validation/reference/FullyConnectedLayer.cpp
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -105,10 +105,16 @@
 } // namespace
 
 template <typename T, typename TB>
-SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape)
+SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape, QuantizationInfo out_quant_info)
 {
+    // if no explicit quantization has been set you the same as src
+    if(out_quant_info == QuantizationInfo())
+    {
+        out_quant_info = src.quantization_info();
+    }
+
     // Create reference
-    SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, src.quantization_info() };
+    SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, out_quant_info };
 
     // Sanity checks
     const int          num_batch_dimensions = std::max(0, static_cast<int>(dst_shape.num_dimensions()) - 1);
@@ -145,9 +151,12 @@
     return dst;
 }
 
-template SimpleTensor<float> fully_connected_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &dst_shape);
-template SimpleTensor<half> fully_connected_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &dst_shape);
-template SimpleTensor<uint8_t> fully_connected_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &dst_shape);
+template SimpleTensor<float> fully_connected_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &dst_shape,
+                                                   QuantizationInfo out_quant_info);
+template SimpleTensor<half> fully_connected_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &dst_shape,
+                                                  QuantizationInfo out_quant_info);
+template SimpleTensor<uint8_t> fully_connected_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &dst_shape,
+                                                     QuantizationInfo out_quant_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/FullyConnectedLayer.h b/tests/validation/reference/FullyConnectedLayer.h
index 1dfb496..f474a1c 100644
--- a/tests/validation/reference/FullyConnectedLayer.h
+++ b/tests/validation/reference/FullyConnectedLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,8 @@
 namespace reference
 {
 template <typename T, typename TB>
-SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape);
+SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape,
+                                      QuantizationInfo out_quant_info = QuantizationInfo());
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/HOGDetector.cpp b/tests/validation/reference/HOGDetector.cpp
index 5a5ae37..8ca1b0c 100644
--- a/tests/validation/reference/HOGDetector.cpp
+++ b/tests/validation/reference/HOGDetector.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,8 +39,8 @@
     const size_t num_block_strides_width  = hog_info.detection_window_size().width / hog_info.block_stride().width;
     const size_t num_block_strides_height = hog_info.detection_window_size().height / hog_info.block_stride().height;
 
-    return Size2D(floor_to_multiple(shape.x() - num_block_strides_width, window_step.width) + window_step.width,
-                  floor_to_multiple(shape.y() - num_block_strides_height, window_step.height) + window_step.height);
+    return Size2D{ floor_to_multiple(shape.x() - num_block_strides_width, window_step.width) + window_step.width,
+                   floor_to_multiple(shape.y() - num_block_strides_height, window_step.height) + window_step.height };
 }
 } // namespace
 
diff --git a/tests/validation/reference/PadLayer.cpp b/tests/validation/reference/PadLayer.cpp
index b9a93dd..d072bc5 100644
--- a/tests/validation/reference/PadLayer.cpp
+++ b/tests/validation/reference/PadLayer.cpp
@@ -36,27 +36,27 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> pad_layer(const SimpleTensor<T> &src, const PaddingList &paddings)
+SimpleTensor<T> pad_layer(const SimpleTensor<T> &src, const PaddingList &paddings, const PixelValue const_value, const PaddingMode mode)
 {
-    DataType dst_data_type = src.data_type();
+    const DataType dst_data_type = src.data_type();
 
-    TensorShape orig_shape = src.shape();
+    const TensorShape orig_shape = src.shape();
 
     std::vector<PaddingInfo> paddings_extended = paddings;
 
-    for(size_t i = paddings.size(); i < TensorShape::num_max_dimensions; i++)
+    for(size_t i = paddings.size(); i < TensorShape::num_max_dimensions; ++i)
     {
         paddings_extended.emplace_back(PaddingInfo{ 0, 0 });
     }
 
-    TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(orig_shape, paddings);
+    const TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(orig_shape, paddings);
 
     SimpleTensor<T> dst(padded_shape, dst_data_type);
 
     // Reference algorithm: loop over the different dimension of the input.
-    for(int idx = 0; idx < dst.num_elements(); idx++)
+    for(int idx = 0; idx < dst.num_elements(); ++idx)
     {
-        Coordinates coord = index2coord(padded_shape, idx);
+        const Coordinates coord = index2coord(padded_shape, idx);
 
         const size_t i = coord.x();
         const size_t j = coord.y();
@@ -65,23 +65,89 @@
         const size_t m = coord[4];
         const size_t n = coord[5];
 
-        std::array<size_t, TensorShape::num_max_dimensions> dims   = { { 0, 1, 2, 3, 4, 5 } };
-        std::array<size_t, TensorShape::num_max_dimensions> coords = { { i, j, k, l, m, n } };
+        const std::array<size_t, TensorShape::num_max_dimensions> dims   = { { 0, 1, 2, 3, 4, 5 } };
+        const std::array<size_t, TensorShape::num_max_dimensions> coords = { { i, j, k, l, m, n } };
         auto is_padding_area = [&](size_t i)
         {
             return (coords[i] < paddings_extended[i].first || coords[i] > orig_shape[i] + paddings_extended[i].first - 1);
         };
 
-        // If the tuple [i,j,k,l,m] is in the padding area, then seimply set the value
+        auto orig_coord_reflect = [&](size_t i)
+        {
+            if(is_padding_area(i))
+            {
+                if(coords[i] < paddings_extended[i].first)
+                {
+                    return paddings_extended[i].first - coords[i];
+                }
+                else
+                {
+                    return 2 * orig_shape[i] + paddings_extended[i].first - 2 - coords[i];
+                }
+            }
+            return coords[i] - paddings_extended[i].first;
+        };
+
+        auto orig_coord_symm = [&](size_t i)
+        {
+            if(is_padding_area(i))
+            {
+                if(coords[i] < paddings_extended[i].first)
+                {
+                    return paddings_extended[i].first - coords[i] - 1;
+                }
+                else
+                {
+                    return 2 * orig_shape[i] + paddings_extended[i].first - 1 - coords[i];
+                }
+            }
+            return coords[i] - paddings_extended[i].first;
+        };
+
+        // If the tuple [i,j,k,l,m] is in the padding area, then simply set the value
         if(std::any_of(dims.begin(), dims.end(), is_padding_area))
         {
-            dst[idx] = T(0);
+            switch(mode)
+            {
+                case PaddingMode::CONSTANT:
+                    const_value.get(dst[idx]);
+                    break;
+                case PaddingMode::REFLECT:
+                {
+                    const Coordinates orig_coords{ orig_coord_reflect(0),
+                                             orig_coord_reflect(1),
+                                             orig_coord_reflect(2),
+                                             orig_coord_reflect(3),
+                                             orig_coord_reflect(4),
+                                             orig_coord_reflect(5) };
+
+                    const size_t idx_src = coord2index(orig_shape, orig_coords);
+                    dst[idx]             = src[idx_src];
+                    break;
+                }
+                case PaddingMode::SYMMETRIC:
+                {
+                    const Coordinates orig_coords{ orig_coord_symm(0),
+                                             orig_coord_symm(1),
+                                             orig_coord_symm(2),
+                                             orig_coord_symm(3),
+                                             orig_coord_symm(4),
+                                             orig_coord_symm(5) };
+
+                    const size_t idx_src = coord2index(orig_shape, orig_coords);
+                    dst[idx]             = src[idx_src];
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Padding mode not supported.");
+                    break;
+            }
         }
         else
         {
             // If the tuple[i,j,k,l,m] is not in the padding area, then copy the input into the output
 
-            Coordinates orig_coords{ i - paddings_extended[0].first,
+            const Coordinates orig_coords{ i - paddings_extended[0].first,
                                      j - paddings_extended[1].first,
                                      k - paddings_extended[2].first,
                                      l - paddings_extended[3].first,
@@ -96,13 +162,13 @@
     return dst;
 }
 
-template SimpleTensor<float> pad_layer(const SimpleTensor<float> &src, const PaddingList &paddings);
-template SimpleTensor<half> pad_layer(const SimpleTensor<half> &src, const PaddingList &paddings);
-template SimpleTensor<uint32_t> pad_layer(const SimpleTensor<uint32_t> &src, const PaddingList &paddings);
-template SimpleTensor<uint8_t> pad_layer(const SimpleTensor<uint8_t> &src, const PaddingList &paddings);
-template SimpleTensor<int8_t> pad_layer(const SimpleTensor<int8_t> &src, const PaddingList &paddings);
-template SimpleTensor<uint16_t> pad_layer(const SimpleTensor<uint16_t> &src, const PaddingList &paddings);
-template SimpleTensor<int16_t> pad_layer(const SimpleTensor<int16_t> &src, const PaddingList &paddings);
+template SimpleTensor<float> pad_layer(const SimpleTensor<float> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<half> pad_layer(const SimpleTensor<half> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<uint32_t> pad_layer(const SimpleTensor<uint32_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<uint8_t> pad_layer(const SimpleTensor<uint8_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<int8_t> pad_layer(const SimpleTensor<int8_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<uint16_t> pad_layer(const SimpleTensor<uint16_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<int16_t> pad_layer(const SimpleTensor<int16_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/PadLayer.h b/tests/validation/reference/PadLayer.h
index 9406b05..5ebb5ed 100644
--- a/tests/validation/reference/PadLayer.h
+++ b/tests/validation/reference/PadLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,13 +39,15 @@
 /** Reference function to pad an ND tensor. This function is not supposed to be optimized, but to
  * clearly and naively execute the padding of a tensor
  *
- * @param[in] src      Tensor to pad
- * @param[in] paddings Padding size in each dimension
+ * @param[in] src         Tensor to pad
+ * @param[in] paddings    Padding size in each dimension
+ * @param[in] const_value Constant value to fill padding with
+ * @param[in] mode        [optional] Padding mode to use
  *
  * @return The padded Tensor
  */
 template <typename T>
-SimpleTensor<T> pad_layer(const SimpleTensor<T> &src, const PaddingList &paddings);
+SimpleTensor<T> pad_layer(const SimpleTensor<T> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/Permute.cpp b/tests/validation/reference/Permute.cpp
index 29c3c5c..619a787 100644
--- a/tests/validation/reference/Permute.cpp
+++ b/tests/validation/reference/Permute.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,11 +47,11 @@
     // Compute reference
     for(int i = 0; i < src.num_elements(); ++i)
     {
-        Coordinates coord = index2coord(src.shape(), i);
-        permute(coord, perm);
-        const size_t dst_index = coord2index(dst.shape(), coord);
+        const Coordinates src_coords = index2coord(src.shape(), i);
+        Coordinates       dst_coords = src_coords;
+        permute(dst_coords, perm);
 
-        dst[dst_index] = src[i];
+        std::copy_n(static_cast<const T *>(src(src_coords)), src.num_channels(), static_cast<T *>(dst(dst_coords)));
     }
 
     return dst;
diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp
index d86f8aa..ea058ec 100644
--- a/tests/validation/reference/PixelWiseMultiplication.cpp
+++ b/tests/validation/reference/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -18,7 +18,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * dst OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "PixelWiseMultiplication.h"
@@ -139,7 +139,9 @@
         ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
     }
 
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
 
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2, dst, scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
 
@@ -166,7 +168,9 @@
             ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
         }
 
-        Coordinates id_src1, id_src2, id_dst;
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2, dst, scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
     }
     return dst;
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index e617c93..f4112a4 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,8 +38,9 @@
 using namespace arm_compute::misc::shape_calculator;
 
 template <typename T>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info)
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
 {
+    ARM_COMPUTE_UNUSED(output_qinfo); // requantization occurs in pooling_layer<uint8_t>
     ARM_COMPUTE_ERROR_ON(info.is_global_pooling() && (src.shape().x() != src.shape().y()));
 
     // Create reference
@@ -152,16 +153,16 @@
 }
 
 template <>
-SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const PoolingLayerInfo &info)
+SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
 {
     SimpleTensor<float>   src_tmp = convert_from_asymmetric(src);
-    SimpleTensor<float>   dst_tmp = pooling_layer<float>(src_tmp, info);
-    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, src.quantization_info());
+    SimpleTensor<float>   dst_tmp = pooling_layer<float>(src_tmp, info, output_qinfo);
+    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, output_qinfo);
     return dst;
 }
 
-template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, const PoolingLayerInfo &info);
-template SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, const PoolingLayerInfo &info);
+template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo);
+template SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/PoolingLayer.h b/tests/validation/reference/PoolingLayer.h
index 0097789..1c0b7ff 100644
--- a/tests/validation/reference/PoolingLayer.h
+++ b/tests/validation/reference/PoolingLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,7 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info);
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp
index d7ce490..2f33481 100644
--- a/tests/validation/reference/QuantizationLayer.cpp
+++ b/tests/validation/reference/QuantizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -33,54 +33,25 @@
 {
 namespace reference
 {
-template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src)
+template <typename T>
+SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src, const QuantizationInfo quantization_info)
 {
     // Create reference
-    SimpleTensor<uint8_t> dst{ src.shape(), DataType::U8 };
+    SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, quantization_info };
 
-    const int width       = src.shape().x();
-    const int height      = src.shape().y();
-    const int depth       = src.shape().z();
-    const int stride_w    = width * height * depth;
-    const int num_batches = src.shape().total_size_upper(3);
-
-    for(int k = 0; k < num_batches; ++k)
+    for(int i = 0; i < src.num_elements(); ++i)
     {
-        // Compute min and max of the 3D tensor
-        float min = src[k * stride_w];
-        float max = src[k * stride_w];
-
-        // Look for min and max values
-        for(int i = 1; i < stride_w; ++i)
-        {
-            float val = src[i + k * stride_w];
-            min       = std::min(min, val);
-            max       = std::max(max, val);
-        }
-
-        // Saturate the result in case min = max
-        if(min == max)
-        {
-            min = 0.0f;
-            max = 1.0f;
-        }
-
-        const float range = max - min;
-
-        for(int i = 0; i < stride_w; ++i)
-        {
-            // map values to range [0.0, 1.0]
-            float       val        = src[i + k * stride_w];
-            const float normalized = (val - min) / range;
-            dst[i + k * stride_w]  = static_cast<uint8_t>(std::min(255.0f, normalized * 256.0f));
-        }
+#ifdef __aarch64__
+        dst[i] = quantization_info.quantize((src[i]), RoundingPolicy::TO_NEAREST_EVEN);
+#else  // __aarch64__
+        dst[i] = quantization_info.quantize((src[i]), RoundingPolicy::TO_ZERO);
+#endif // __aarch64__
     }
-
     return dst;
 }
 
-template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<float> &src);
+template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<half> &src, const QuantizationInfo quantization_info);
+template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<float> &src, const QuantizationInfo quantization_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/QuantizationLayer.h b/tests/validation/reference/QuantizationLayer.h
index 7c5572c..2d13690 100644
--- a/tests/validation/reference/QuantizationLayer.h
+++ b/tests/validation/reference/QuantizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src);
+template <typename T>
+SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src, const QuantizationInfo quantization_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/SliceOperations.cpp b/tests/validation/reference/SliceOperations.cpp
index 40ca9de..b34afdc 100644
--- a/tests/validation/reference/SliceOperations.cpp
+++ b/tests/validation/reference/SliceOperations.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -103,7 +103,9 @@
     SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
 
     // Get coordinates
-    Coordinates starts_abs, ends_abs, final_strides;
+    Coordinates starts_abs{};
+    Coordinates ends_abs{};
+    Coordinates final_strides{};
     std::tie(starts_abs, ends_abs, final_strides) = calculate_strided_slice_coords(src_shape,
                                                                                    starts, ends, strides,
                                                                                    begin_mask, end_mask, shrink_axis_mask);
diff --git a/tests/validation/reference/Sobel.cpp b/tests/validation/reference/Sobel.cpp
index ff0e11d..233f1ad 100644
--- a/tests/validation/reference/Sobel.cpp
+++ b/tests/validation/reference/Sobel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/validation/reference/StackLayer.cpp b/tests/validation/reference/StackLayer.cpp
index 50e440c..9e9e434 100644
--- a/tests/validation/reference/StackLayer.cpp
+++ b/tests/validation/reference/StackLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,11 +56,11 @@
     // i_coordinates[0] = xi, i_coordinates[1] = yi, i_coordinates[2] = zi
     // i_coordinates[3] = bi, i_coordinates[4] = i, i_coordinates[5] = 0
     // i_coordinates[5] will be always zero and used for not incrementing the output when the input has less than 4 dimensions
-    int i_coordinates[6] = { 0 };
+    std::array<int, 6> i_coordinates{ 0 };
 
     // Array of pointers used to map the output coordinates to the input ones accordingly with the axis
     // This array is initialized with &i_coordinates[5] since this will be always zero
-    int *o_coordinates[5] = { &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5] };
+    std::array<int *, 5> o_coordinates = { &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5] };
 
     // Set the axis coordinate
     o_coordinates[axis] = &i_coordinates[4];
diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp
index 294993b..47f5ac7 100644
--- a/tests/validation/reference/Winograd.cpp
+++ b/tests/validation/reference/Winograd.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,7 +45,7 @@
 void initialize_matrix_transform(SimpleTensor<T> &src, const Size2D &output_tile_size, const Size2D &kernel_size, WinogradTransformType winograd_transform_type)
 {
     // Winograd input transform matrices
-    static const float imatrix2x2_3x3[] =
+    static const std::array<float, 16> imatrix2x2_3x3 =
     {
         1.0f, 0.0f, -1.0f, 0.0f,
         0.0f, 1.0f, 1.0f, 0.0f,
@@ -53,7 +53,7 @@
         0.0f, 1.0f, 0.0f, -1.0f
     };
 
-    static const float imatrix4x4_3x3[] =
+    static const std::array<float, 36> imatrix4x4_3x3 =
     {
         4.0f, 0.0f, -5.0f, 0.0f, 1.0f, 0.0f,
         0.0f, -4.0f, -4.0f, 1.0f, 1.0f, 0.0f,
@@ -63,7 +63,7 @@
         0.0f, 4.0f, 0.0f, -5.0f, 0.0f, 1.0f,
     };
 
-    static const float imatrix4x4_5x5[] =
+    static const std::array<float, 64> imatrix4x4_5x5 =
     {
         1.f, 0.f, -21.f / 4.f, 0.f, 21.f / 4.f, 0.f, -1.f, 0.f,
         0.f, 1.f, 1.f, -17.f / 4.f, -17.f / 4.f, 1.f, 1.f, 0.f,
@@ -75,7 +75,7 @@
         0.f, -1.f, 0.f, 21.f / 4.f, 0.f, -21.f / 4.f, 0.f, 1.f
     };
 
-    static const float imatrix2x1_7x7[] =
+    static const std::array<float, 64> imatrix2x1_7x7 =
     {
         -36.0f, 0.0f, 49.0f, 0.0f, -14.0f, 0.0f, 1.0f, 0.0f,
         0.0f, -36.0f, 36.0f, 13.0f, -13.0f, -1.0f, 1.0f, 0.0f,
@@ -90,7 +90,7 @@
     // ------------------------------------------
 
     // Winograd filter transform matrices
-    static const float fmatrix2x2_3x3[] =
+    static const std::array<float, 12> fmatrix2x2_3x3 =
     {
         1.0f, 0.0f, 0.0f,
         0.5f, 0.5f, 0.5f,
@@ -98,7 +98,7 @@
         0.0f, 0.0f, 1.0f
     };
 
-    static const float fmatrix4x4_3x3[] =
+    static const std::array<float, 18> fmatrix4x4_3x3 =
     {
         0.25f, 0.0f, 0.0f,
         -1.0f / 6.0f, -1.0f / 6.0f, -1.0f / 6.0f,
@@ -108,7 +108,7 @@
         0.0f, 0.0f, 1.0f
     };
 
-    static const float fmatrix4x4_5x5[] =
+    static const std::array<float, 40> fmatrix4x4_5x5 =
     {
         1.0f, 0.0f, 0.0f, 0.0f, 0.0f,
         -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f,
@@ -121,7 +121,7 @@
 
     };
 
-    static const float fmatrix2x1_7x7[] =
+    static const std::array<float, 56> fmatrix2x1_7x7 =
     {
         -1.0f / 36.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
         1.0f / 48.0f, -1.0f / 48.0f, 1.0f / 48.0f, -1.0f / 48.0f, 1.0f / 48.0f, -1.0f / 48.0f, 1.0f / 48.0f,
@@ -136,13 +136,13 @@
     // ------------------------------------------
 
     // Winograd output transform matrices
-    static const float omatrix2x2_3x3[] =
+    static const std::array<float, 8> omatrix2x2_3x3 =
     {
         1.0f, 1.0f, 1.0f, 0.0f,
         0.0f, 1.0f, -1.0f, -1.0f
     };
 
-    static const float omatrix4x4_3x3[] =
+    static const std::array<float, 24> omatrix4x4_3x3 =
     {
         1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,
         0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 0.0f,
@@ -150,7 +150,7 @@
         0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f
     };
 
-    static const float omatrix4x4_5x5[] =
+    static const std::array<float, 36> omatrix4x4_5x5 =
     {
         1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 8.0f, 0.0f,
         0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 4.0f, -4.0f, 0.0f,
@@ -158,7 +158,7 @@
         0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f, -1.0f, 1.0f
     };
 
-    static const float omatrix2x1_7x7[] =
+    static const std::array<float, 16> omatrix2x1_7x7 =
     {
         1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,
         0.0f, -1.0f, 1.0f, -2.0f, 2.0f, -3.0f, 3.0f, 1.0f
@@ -171,39 +171,42 @@
     // Key = (Output tile size, Kernel size, Winograd transform type)
     static std::map<WinogradKey, const float *> matrix_map =
     {
-        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::INPUT), imatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::INPUT), imatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::INPUT), imatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::FILTER), fmatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::FILTER), fmatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::OUTPUT), omatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::OUTPUT), omatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::INPUT), imatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::INPUT), imatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::INPUT), imatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7), WinogradTransformType::INPUT), imatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::INPUT), imatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::FILTER), fmatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::FILTER), fmatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::FILTER), fmatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7), WinogradTransformType::FILTER), fmatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::OUTPUT), omatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::OUTPUT), omatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::OUTPUT), omatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7), WinogradTransformType::OUTPUT), omatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5.data() },
     };
 
     // Find transformation matrix